geany  1.38
About: Geany is a text editor (using GTK2) with basic features of an integrated development environment (syntax highlighting, code folding, symbol name auto-completion, ...). F: office T: editor programming GTK+ IDE
  Fossies Dox: geany-1.38.tar.bz2  ("unofficial" and yet experimental doxygen-generated source code documentation)  

Document.cxx
Go to the documentation of this file.
1// Scintilla source code edit control
2/** @file Document.cxx
3 ** Text document that handles notifications, DBCS, styling, words and end of line.
4 **/
5// Copyright 1998-2011 by Neil Hodgson <neilh@scintilla.org>
6// The License.txt file describes the conditions under which this software may be distributed.
7
8#include <cstddef>
9#include <cstdlib>
10#include <cassert>
11#include <cstring>
12#include <cstdio>
13#include <cmath>
14
15#include <stdexcept>
16#include <string>
17#include <vector>
18#include <forward_list>
19#include <algorithm>
20#include <memory>
21#include <chrono>
22
23#ifndef NO_CXX11_REGEX
24#include <regex>
25#endif
26
27#include "Platform.h"
28
29#include "ILoader.h"
30#include "ILexer.h"
31#include "Scintilla.h"
32
33#include "CharacterSet.h"
34#include "CharacterCategory.h"
35#include "Position.h"
36#include "SplitVector.h"
37#include "Partitioning.h"
38#include "RunStyles.h"
39#include "CellBuffer.h"
40#include "PerLine.h"
41#include "CharClassify.h"
42#include "Decoration.h"
43#include "CaseFolder.h"
44#include "Document.h"
45#include "RESearch.h"
46#include "UniConversion.h"
47#include "ElapsedPeriod.h"
48
49using namespace Scintilla;
50
51void LexInterface::Colourise(Sci::Position start, Sci::Position end) {
52 if (pdoc && instance && !performingStyle) {
53 // Protect against reentrance, which may occur, for example, when
54 // fold points are discovered while performing styling and the folding
55 // code looks for child lines which may trigger styling.
56 performingStyle = true;
57
58 const Sci::Position lengthDoc = pdoc->Length();
59 if (end == -1)
60 end = lengthDoc;
61 const Sci::Position len = end - start;
62
63 PLATFORM_ASSERT(len >= 0);
64 PLATFORM_ASSERT(start + len <= lengthDoc);
65
66 int styleStart = 0;
67 if (start > 0)
68 styleStart = pdoc->StyleAt(start - 1);
69
70 if (len > 0) {
71 instance->Lex(start, len, styleStart, pdoc);
72 instance->Fold(start, len, styleStart, pdoc);
73 }
74
75 performingStyle = false;
76 }
77}
78
80 if (instance) {
81 const int interfaceVersion = instance->Version();
82 if (interfaceVersion >= lvSubStyles) {
83 ILexerWithSubStyles *ssinstance = static_cast<ILexerWithSubStyles *>(instance);
84 return ssinstance->LineEndTypesSupported();
85 }
86 }
87 return 0;
88}
89
90ActionDuration::ActionDuration(double duration_, double minDuration_, double maxDuration_) noexcept :
91 duration(duration_), minDuration(minDuration_), maxDuration(maxDuration_) {
92}
93
94void ActionDuration::AddSample(size_t numberActions, double durationOfActions) noexcept {
95 // Only adjust for multiple actions to avoid instability
96 if (numberActions < 8)
97 return;
98
99 // Alpha value for exponential smoothing.
100 // Most recent value contributes 25% to smoothed value.
101 constexpr double alpha = 0.25;
102
103 const double durationOne = durationOfActions / numberActions;
104 duration = Sci::clamp(alpha * durationOne + (1.0 - alpha) * duration,
105 minDuration, maxDuration);
106}
107
108double ActionDuration::Duration() const noexcept {
109 return duration;
110}
111
112Document::Document(int options) :
113 cb((options & SC_DOCUMENTOPTION_STYLES_NONE) == 0, (options & SC_DOCUMENTOPTION_TEXT_LARGE) != 0),
114 durationStyleOneLine(0.00001, 0.000001, 0.0001) {
115 refCount = 0;
116#ifdef _WIN32
118#else
120#endif
123 endStyled = 0;
124 styleClock = 0;
126 enteredStyling = 0;
128 insertionSet = false;
129 tabInChars = 8;
130 indentInChars = 0;
132 useTabs = true;
133 tabIndents = true;
134 backspaceUnindents = false;
135
136 matchesValid = false;
137
138 perLineData[ldMarkers] = Sci::make_unique<LineMarkers>();
139 perLineData[ldLevels] = Sci::make_unique<LineLevels>();
140 perLineData[ldState] = Sci::make_unique<LineState>();
141 perLineData[ldMargin] = Sci::make_unique<LineAnnotation>();
142 perLineData[ldAnnotation] = Sci::make_unique<LineAnnotation>();
143 perLineData[ldEOLAnnotation] = Sci::make_unique<LineAnnotation>();
144
146
147 cb.SetPerLine(this);
149}
150
152 for (const WatcherWithUserData &watcher : watchers) {
153 watcher.watcher->NotifyDeleted(this, watcher.userData);
154 }
155}
156
157// Increase reference count and return its previous value.
159 return refCount++;
160}
161
162// Decrease reference count and return its previous value.
163// Delete the document if reference count reaches zero.
165 const int curRefCount = --refCount;
166 if (curRefCount == 0)
167 delete this;
168 return curRefCount;
169}
170
172 for (const std::unique_ptr<PerLine> &pl : perLineData) {
173 if (pl)
174 pl->Init();
175 }
176}
177
179 for (const std::unique_ptr<PerLine> &pl : perLineData) {
180 if (pl)
181 pl->InsertLine(line);
182 }
183}
184
186 for (const auto &pl : perLineData) {
187 if (pl)
188 pl->InsertLines(line, lines);
189 }
190}
191
193 for (const std::unique_ptr<PerLine> &pl : perLineData) {
194 if (pl)
195 pl->RemoveLine(line);
196 }
197}
198
200 return dynamic_cast<LineMarkers *>(perLineData[ldMarkers].get());
201}
202
203LineLevels *Document::Levels() const noexcept {
204 return dynamic_cast<LineLevels *>(perLineData[ldLevels].get());
205}
206
207LineState *Document::States() const noexcept {
208 return dynamic_cast<LineState *>(perLineData[ldState].get());
209}
210
212 return dynamic_cast<LineAnnotation *>(perLineData[ldMargin].get());
213}
214
216 return dynamic_cast<LineAnnotation *>(perLineData[ldAnnotation].get());
217}
218
220 return dynamic_cast<LineAnnotation *>(perLineData[ldEOLAnnotation].get());
221}
222
224 if ((SC_CP_UTF8 == dbcsCodePage) && pli)
225 return pli->LineEndTypesSupported();
226 else
227 return 0;
228}
229
230bool Document::SetDBCSCodePage(int dbcsCodePage_) {
231 if (dbcsCodePage != dbcsCodePage_) {
232 dbcsCodePage = dbcsCodePage_;
233 SetCaseFolder(nullptr);
236 ModifiedAt(0); // Need to restyle whole document
237 return true;
238 } else {
239 return false;
240 }
241}
242
243bool Document::SetLineEndTypesAllowed(int lineEndBitSet_) {
244 if (lineEndBitSet != lineEndBitSet_) {
245 lineEndBitSet = lineEndBitSet_;
246 const int lineEndBitSetActive = lineEndBitSet & LineEndTypesSupported();
247 if (lineEndBitSetActive != cb.GetLineEndTypes()) {
248 ModifiedAt(0);
249 cb.SetLineEndTypes(lineEndBitSetActive);
250 return true;
251 } else {
252 return false;
253 }
254 } else {
255 return false;
256 }
257}
258
261 NotifySavePoint(true);
262}
263
265 if (!TentativeActive())
266 return;
268 if (enteredModification == 0) {
270 if (!cb.IsReadOnly()) {
271 const bool startSavePoint = cb.IsSavePoint();
272 bool multiLine = false;
273 const int steps = cb.TentativeSteps();
274 //Platform::DebugPrintf("Steps=%d\n", steps);
275 for (int step = 0; step < steps; step++) {
276 const Sci::Line prevLinesTotal = LinesTotal();
277 const Action &action = cb.GetUndoStep();
278 if (action.at == removeAction) {
281 } else if (action.at == containerAction) {
283 dm.token = action.position;
284 NotifyModified(dm);
285 } else {
288 }
290 if (action.at != containerAction) {
291 ModifiedAt(action.position);
292 }
293
294 int modFlags = SC_PERFORMED_UNDO;
295 // With undo, an insertion action becomes a deletion notification
296 if (action.at == removeAction) {
297 modFlags |= SC_MOD_INSERTTEXT;
298 } else if (action.at == insertAction) {
299 modFlags |= SC_MOD_DELETETEXT;
300 }
301 if (steps > 1)
302 modFlags |= SC_MULTISTEPUNDOREDO;
303 const Sci::Line linesAdded = LinesTotal() - prevLinesTotal;
304 if (linesAdded != 0)
305 multiLine = true;
306 if (step == steps - 1) {
307 modFlags |= SC_LASTSTEPINUNDOREDO;
308 if (multiLine)
309 modFlags |= SC_MULTILINEUNDOREDO;
310 }
311 NotifyModified(DocModification(modFlags, action.position, action.lenData,
312 linesAdded, action.data.get()));
313 }
314
315 const bool endSavePoint = cb.IsSavePoint();
316 if (startSavePoint != endSavePoint)
317 NotifySavePoint(endSavePoint);
318
320 }
322 }
323}
324
325int Document::GetMark(Sci::Line line) const noexcept {
326 return Markers()->MarkValue(line);
327}
328
329Sci::Line Document::MarkerNext(Sci::Line lineStart, int mask) const noexcept {
330 return Markers()->MarkerNext(lineStart, mask);
331}
332
333int Document::AddMark(Sci::Line line, int markerNum) {
334 if (line >= 0 && line <= LinesTotal()) {
335 const int prev = Markers()->AddMark(line, markerNum, LinesTotal());
336 const DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, nullptr, line);
337 NotifyModified(mh);
338 return prev;
339 } else {
340 return -1;
341 }
342}
343
345 if (line < 0 || line > LinesTotal()) {
346 return;
347 }
348 unsigned int m = valueSet;
349 for (int i = 0; m; i++, m >>= 1) {
350 if (m & 1)
351 Markers()->AddMark(line, i, LinesTotal());
352 }
353 const DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, nullptr, line);
354 NotifyModified(mh);
355}
356
357void Document::DeleteMark(Sci::Line line, int markerNum) {
358 Markers()->DeleteMark(line, markerNum, false);
359 const DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, nullptr, line);
360 NotifyModified(mh);
361}
362
363void Document::DeleteMarkFromHandle(int markerHandle) {
364 Markers()->DeleteMarkFromHandle(markerHandle);
366 mh.line = -1;
367 NotifyModified(mh);
368}
369
370void Document::DeleteAllMarks(int markerNum) {
371 bool someChanges = false;
372 for (Sci::Line line = 0; line < LinesTotal(); line++) {
373 if (Markers()->DeleteMark(line, markerNum, true))
374 someChanges = true;
375 }
376 if (someChanges) {
378 mh.line = -1;
379 NotifyModified(mh);
380 }
381}
382
383Sci::Line Document::LineFromHandle(int markerHandle) const noexcept {
384 return Markers()->LineFromHandle(markerHandle);
385}
386
387int Document::MarkerNumberFromLine(Sci::Line line, int which) const noexcept {
388 return Markers()->NumberFromLine(line, which);
389}
390
391int Document::MarkerHandleFromLine(Sci::Line line, int which) const noexcept {
392 return Markers()->HandleFromLine(line, which);
393}
394
396 return cb.LineStart(line);
397}
398
401}
402
404 if (line >= LinesTotal() - 1) {
405 return LineStart(line + 1);
406 } else {
409 const unsigned char bytes[] = {
413 };
414 if (UTF8IsSeparator(bytes)) {
416 }
417 if (UTF8IsNEL(bytes+1)) {
418 return position - UTF8NELLength;
419 }
420 }
421 position--; // Back over CR or LF
422 // When line terminator is CR+LF, may need to go back one more
423 if ((position > LineStart(line)) && (cb.CharAt(position - 1) == '\r')) {
424 position--;
425 }
426 return position;
427 }
428}
429
431 // Tell the watchers an error has occurred.
432 for (const WatcherWithUserData &watcher : watchers) {
433 watcher.watcher->NotifyErrorOccurred(this, watcher.userData, status);
434 }
435}
436
438 return cb.LineFromPosition(pos);
439}
440
442 // Avoids casting in callers for this very common function
443 return cb.LineFromPosition(pos);
444}
445
448}
449
452}
453
456}
457
460 const Sci::Position startPosition = LineStart(line);
461 const Sci::Position endLine = LineEnd(line);
462 Sci::Position startText = startPosition;
463 while (startText < endLine && (cb.CharAt(startText) == ' ' || cb.CharAt(startText) == '\t'))
464 startText++;
465 if (position == startText)
466 return startPosition;
467 else
468 return startText;
469}
470
471Sci::Position Document::IndexLineStart(Sci::Line line, int lineCharacterIndex) const noexcept {
472 return cb.IndexLineStart(line, lineCharacterIndex);
473}
474
475Sci::Line Document::LineFromPositionIndex(Sci::Position pos, int lineCharacterIndex) const noexcept {
476 return cb.LineFromPositionIndex(pos, lineCharacterIndex);
477}
478
480 const int prev = Levels()->SetLevel(line, level, LinesTotal());
481 if (prev != level) {
483 LineStart(line), 0, 0, nullptr, line);
484 mh.foldLevelNow = level;
485 mh.foldLevelPrev = prev;
486 NotifyModified(mh);
487 }
488 return prev;
489}
490
492 return Levels()->GetLevel(line);
493}
494
496 Levels()->ClearLevels();
497}
498
499static bool IsSubordinate(int levelStart, int levelTry) noexcept {
500 if (levelTry & SC_FOLDLEVELWHITEFLAG)
501 return true;
502 else
503 return LevelNumber(levelStart) < LevelNumber(levelTry);
504}
505
506Sci::Line Document::GetLastChild(Sci::Line lineParent, int level, Sci::Line lastLine) {
507 if (level == -1)
508 level = LevelNumber(GetLevel(lineParent));
509 const Sci::Line maxLine = LinesTotal();
510 const Sci::Line lookLastLine = (lastLine != -1) ? std::min(LinesTotal() - 1, lastLine) : -1;
511 Sci::Line lineMaxSubord = lineParent;
512 while (lineMaxSubord < maxLine - 1) {
513 EnsureStyledTo(LineStart(lineMaxSubord + 2));
514 if (!IsSubordinate(level, GetLevel(lineMaxSubord + 1)))
515 break;
516 if ((lookLastLine != -1) && (lineMaxSubord >= lookLastLine) && !(GetLevel(lineMaxSubord) & SC_FOLDLEVELWHITEFLAG))
517 break;
518 lineMaxSubord++;
519 }
520 if (lineMaxSubord > lineParent) {
521 if (level > LevelNumber(GetLevel(lineMaxSubord + 1))) {
522 // Have chewed up some whitespace that belongs to a parent so seek back
523 if (GetLevel(lineMaxSubord) & SC_FOLDLEVELWHITEFLAG) {
524 lineMaxSubord--;
525 }
526 }
527 }
528 return lineMaxSubord;
529}
530
532 const int level = LevelNumber(GetLevel(line));
533 Sci::Line lineLook = line - 1;
534 while ((lineLook > 0) && (
535 (!(GetLevel(lineLook) & SC_FOLDLEVELHEADERFLAG)) ||
536 (LevelNumber(GetLevel(lineLook)) >= level))
537 ) {
538 lineLook--;
539 }
540 if ((GetLevel(lineLook) & SC_FOLDLEVELHEADERFLAG) &&
541 (LevelNumber(GetLevel(lineLook)) < level)) {
542 return lineLook;
543 } else {
544 return -1;
545 }
546}
547
549 const int level = GetLevel(line);
550 const Sci::Line lookLastLine = std::max(line, lastLine) + 1;
551
552 Sci::Line lookLine = line;
553 int lookLineLevel = level;
554 int lookLineLevelNum = LevelNumber(lookLineLevel);
555 while ((lookLine > 0) && ((lookLineLevel & SC_FOLDLEVELWHITEFLAG) ||
556 ((lookLineLevel & SC_FOLDLEVELHEADERFLAG) && (lookLineLevelNum >= LevelNumber(GetLevel(lookLine + 1)))))) {
557 lookLineLevel = GetLevel(--lookLine);
558 lookLineLevelNum = LevelNumber(lookLineLevel);
559 }
560
561 Sci::Line beginFoldBlock = (lookLineLevel & SC_FOLDLEVELHEADERFLAG) ? lookLine : GetFoldParent(lookLine);
562 if (beginFoldBlock == -1) {
563 highlightDelimiter.Clear();
564 return;
565 }
566
567 Sci::Line endFoldBlock = GetLastChild(beginFoldBlock, -1, lookLastLine);
568 Sci::Line firstChangeableLineBefore = -1;
569 if (endFoldBlock < line) {
570 lookLine = beginFoldBlock - 1;
571 lookLineLevel = GetLevel(lookLine);
572 lookLineLevelNum = LevelNumber(lookLineLevel);
573 while ((lookLine >= 0) && (lookLineLevelNum >= SC_FOLDLEVELBASE)) {
574 if (lookLineLevel & SC_FOLDLEVELHEADERFLAG) {
575 if (GetLastChild(lookLine, -1, lookLastLine) == line) {
576 beginFoldBlock = lookLine;
577 endFoldBlock = line;
578 firstChangeableLineBefore = line - 1;
579 }
580 }
581 if ((lookLine > 0) && (lookLineLevelNum == SC_FOLDLEVELBASE) && (LevelNumber(GetLevel(lookLine - 1)) > lookLineLevelNum))
582 break;
583 lookLineLevel = GetLevel(--lookLine);
584 lookLineLevelNum = LevelNumber(lookLineLevel);
585 }
586 }
587 if (firstChangeableLineBefore == -1) {
588 for (lookLine = line - 1, lookLineLevel = GetLevel(lookLine), lookLineLevelNum = LevelNumber(lookLineLevel);
589 lookLine >= beginFoldBlock;
590 lookLineLevel = GetLevel(--lookLine), lookLineLevelNum = LevelNumber(lookLineLevel)) {
591 if ((lookLineLevel & SC_FOLDLEVELWHITEFLAG) || (lookLineLevelNum > LevelNumber(level))) {
592 firstChangeableLineBefore = lookLine;
593 break;
594 }
595 }
596 }
597 if (firstChangeableLineBefore == -1)
598 firstChangeableLineBefore = beginFoldBlock - 1;
599
600 Sci::Line firstChangeableLineAfter = -1;
601 for (lookLine = line + 1, lookLineLevel = GetLevel(lookLine), lookLineLevelNum = LevelNumber(lookLineLevel);
602 lookLine <= endFoldBlock;
603 lookLineLevel = GetLevel(++lookLine), lookLineLevelNum = LevelNumber(lookLineLevel)) {
604 if ((lookLineLevel & SC_FOLDLEVELHEADERFLAG) && (lookLineLevelNum < LevelNumber(GetLevel(lookLine + 1)))) {
605 firstChangeableLineAfter = lookLine;
606 break;
607 }
608 }
609 if (firstChangeableLineAfter == -1)
610 firstChangeableLineAfter = endFoldBlock + 1;
611
612 highlightDelimiter.beginFoldBlock = beginFoldBlock;
613 highlightDelimiter.endFoldBlock = endFoldBlock;
614 highlightDelimiter.firstChangeableLineBefore = firstChangeableLineBefore;
615 highlightDelimiter.firstChangeableLineAfter = firstChangeableLineAfter;
616}
617
619 return Sci::clamp(pos, static_cast<Sci::Position>(0), static_cast<Sci::Position>(Length()));
620}
621
622bool Document::IsCrLf(Sci::Position pos) const noexcept {
623 if (pos < 0)
624 return false;
625 if (pos >= (LengthNoExcept() - 1))
626 return false;
627 return (cb.CharAt(pos) == '\r') && (cb.CharAt(pos + 1) == '\n');
628}
629
631 if (pos < 0 || pos >= LengthNoExcept()) {
632 // Returning 1 instead of 0 to defend against hanging with a loop that goes (or starts) out of bounds.
633 return 1;
634 } else if (IsCrLf(pos)) {
635 return 2;
636 }
637
638 const unsigned char leadByte = cb.UCharAt(pos);
639 if (!dbcsCodePage || UTF8IsAscii(leadByte)) {
640 // Common case: ASCII character
641 return 1;
642 }
643 if (SC_CP_UTF8 == dbcsCodePage) {
644 const int widthCharBytes = UTF8BytesOfLead[leadByte];
645 unsigned char charBytes[UTF8MaxBytes] = { leadByte, 0, 0, 0 };
646 for (int b = 1; b < widthCharBytes; b++) {
647 charBytes[b] = cb.UCharAt(pos + b);
648 }
649 const int utf8status = UTF8Classify(charBytes, widthCharBytes);
650 if (utf8status & UTF8MaskInvalid) {
651 // Treat as invalid and use up just one byte
652 return 1;
653 } else {
654 return utf8status & UTF8MaskWidth;
655 }
656 } else {
657 if (IsDBCSLeadByteNoExcept(leadByte) && ((pos + 1) < LengthNoExcept())) {
658 return 2;
659 } else {
660 return 1;
661 }
662 }
663}
664
666 Sci::Position trail = pos;
667 while ((trail>0) && (pos-trail < UTF8MaxBytes) && UTF8IsTrailByte(cb.UCharAt(trail-1)))
668 trail--;
669 start = (trail > 0) ? trail-1 : trail;
670
671 const unsigned char leadByte = cb.UCharAt(start);
672 const int widthCharBytes = UTF8BytesOfLead[leadByte];
673 if (widthCharBytes == 1) {
674 return false;
675 } else {
676 const int trailBytes = widthCharBytes - 1;
677 const Sci::Position len = pos - start;
678 if (len > trailBytes)
679 // pos too far from lead
680 return false;
681 unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0};
682 for (Sci::Position b=1; b<widthCharBytes && ((start+b) < cb.Length()); b++)
683 charBytes[b] = cb.CharAt(start+b);
684 const int utf8status = UTF8Classify(charBytes, widthCharBytes);
685 if (utf8status & UTF8MaskInvalid)
686 return false;
687 end = start + widthCharBytes;
688 return true;
689 }
690}
691
692// Normalise a position so that it is not halfway through a two byte character.
693// This can occur in two situations -
694// When lines are terminated with \r\n pairs which should be treated as one character.
695// When displaying DBCS text such as Japanese.
696// If moving, move the position in the indicated direction.
698 //Platform::DebugPrintf("NoCRLF %d %d\n", pos, moveDir);
699 // If out of range, just return minimum/maximum value.
700 if (pos <= 0)
701 return 0;
702 if (pos >= LengthNoExcept())
703 return LengthNoExcept();
704
705 // PLATFORM_ASSERT(pos > 0 && pos < LengthNoExcept());
706 if (checkLineEnd && IsCrLf(pos - 1)) {
707 if (moveDir > 0)
708 return pos + 1;
709 else
710 return pos - 1;
711 }
712
713 if (dbcsCodePage) {
714 if (SC_CP_UTF8 == dbcsCodePage) {
715 const unsigned char ch = cb.UCharAt(pos);
716 // If ch is not a trail byte then pos is valid intercharacter position
717 if (UTF8IsTrailByte(ch)) {
718 Sci::Position startUTF = pos;
719 Sci::Position endUTF = pos;
720 if (InGoodUTF8(pos, startUTF, endUTF)) {
721 // ch is a trail byte within a UTF-8 character
722 if (moveDir > 0)
723 pos = endUTF;
724 else
725 pos = startUTF;
726 }
727 // Else invalid UTF-8 so return position of isolated trail byte
728 }
729 } else {
730 // Anchor DBCS calculations at start of line because start of line can
731 // not be a DBCS trail byte.
732 const Sci::Position posStartLine = cb.LineStart(cb.LineFromPosition(pos));
733 if (pos == posStartLine)
734 return pos;
735
736 // Step back until a non-lead-byte is found.
737 Sci::Position posCheck = pos;
738 while ((posCheck > posStartLine) && IsDBCSLeadByteNoExcept(cb.CharAt(posCheck-1)))
739 posCheck--;
740
741 // Check from known start of character.
742 while (posCheck < pos) {
743 const int mbsize = IsDBCSLeadByteNoExcept(cb.CharAt(posCheck)) ? 2 : 1;
744 if (posCheck + mbsize == pos) {
745 return pos;
746 } else if (posCheck + mbsize > pos) {
747 if (moveDir > 0) {
748 return posCheck + mbsize;
749 } else {
750 return posCheck;
751 }
752 }
753 posCheck += mbsize;
754 }
755 }
756 }
757
758 return pos;
759}
760
761// NextPosition moves between valid positions - it can not handle a position in the middle of a
762// multi-byte character. It is used to iterate through text more efficiently than MovePositionOutsideChar.
763// A \r\n pair is treated as two characters.
765 // If out of range, just return minimum/maximum value.
766 const int increment = (moveDir > 0) ? 1 : -1;
767 if (pos + increment <= 0)
768 return 0;
769 if (pos + increment >= cb.Length())
770 return cb.Length();
771
772 if (dbcsCodePage) {
773 if (SC_CP_UTF8 == dbcsCodePage) {
774 if (increment == 1) {
775 // Simple forward movement case so can avoid some checks
776 const unsigned char leadByte = cb.UCharAt(pos);
777 if (UTF8IsAscii(leadByte)) {
778 // Single byte character or invalid
779 pos++;
780 } else {
781 const int widthCharBytes = UTF8BytesOfLead[leadByte];
782 unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0};
783 for (int b=1; b<widthCharBytes; b++)
784 charBytes[b] = cb.CharAt(pos+b);
785 const int utf8status = UTF8Classify(charBytes, widthCharBytes);
786 if (utf8status & UTF8MaskInvalid)
787 pos++;
788 else
789 pos += utf8status & UTF8MaskWidth;
790 }
791 } else {
792 // Examine byte before position
793 pos--;
794 const unsigned char ch = cb.UCharAt(pos);
795 // If ch is not a trail byte then pos is valid intercharacter position
796 if (UTF8IsTrailByte(ch)) {
797 // If ch is a trail byte in a valid UTF-8 character then return start of character
798 Sci::Position startUTF = pos;
799 Sci::Position endUTF = pos;
800 if (InGoodUTF8(pos, startUTF, endUTF)) {
801 pos = startUTF;
802 }
803 // Else invalid UTF-8 so return position of isolated trail byte
804 }
805 }
806 } else {
807 if (moveDir > 0) {
808 const int mbsize = IsDBCSLeadByteNoExcept(cb.CharAt(pos)) ? 2 : 1;
809 pos += mbsize;
810 if (pos > cb.Length())
811 pos = cb.Length();
812 } else {
813 // Anchor DBCS calculations at start of line because start of line can
814 // not be a DBCS trail byte.
815 const Sci::Position posStartLine = cb.LineStart(cb.LineFromPosition(pos));
816 // See http://msdn.microsoft.com/en-us/library/cc194792%28v=MSDN.10%29.aspx
817 // http://msdn.microsoft.com/en-us/library/cc194790.aspx
818 if ((pos - 1) <= posStartLine) {
819 return pos - 1;
820 } else if (IsDBCSLeadByteNoExcept(cb.CharAt(pos - 1))) {
821 // Must actually be trail byte
822 return pos - 2;
823 } else {
824 // Otherwise, step back until a non-lead-byte is found.
825 Sci::Position posTemp = pos - 1;
826 while (posStartLine <= --posTemp && IsDBCSLeadByteNoExcept(cb.CharAt(posTemp)))
827 ;
828 // Now posTemp+1 must point to the beginning of a character,
829 // so figure out whether we went back an even or an odd
830 // number of bytes and go back 1 or 2 bytes, respectively.
831 return (pos - 1 - ((pos - posTemp) & 1));
832 }
833 }
834 }
835 } else {
836 pos += increment;
837 }
838
839 return pos;
840}
841
842bool Document::NextCharacter(Sci::Position &pos, int moveDir) const noexcept {
843 // Returns true if pos changed
844 Sci::Position posNext = NextPosition(pos, moveDir);
845 if (posNext == pos) {
846 return false;
847 } else {
848 pos = posNext;
849 return true;
850 }
851}
852
854 if (position >= LengthNoExcept()) {
856 }
857 const unsigned char leadByte = cb.UCharAt(position);
858 if (!dbcsCodePage || UTF8IsAscii(leadByte)) {
859 // Common case: ASCII character
860 return CharacterExtracted(leadByte, 1);
861 }
862 if (SC_CP_UTF8 == dbcsCodePage) {
863 const int widthCharBytes = UTF8BytesOfLead[leadByte];
864 unsigned char charBytes[UTF8MaxBytes] = { leadByte, 0, 0, 0 };
865 for (int b = 1; b<widthCharBytes; b++)
866 charBytes[b] = cb.UCharAt(position + b);
867 const int utf8status = UTF8Classify(charBytes, widthCharBytes);
868 if (utf8status & UTF8MaskInvalid) {
869 // Treat as invalid and use up just one byte
871 } else {
872 return CharacterExtracted(UnicodeFromUTF8(charBytes), utf8status & UTF8MaskWidth);
873 }
874 } else {
875 if (IsDBCSLeadByteNoExcept(leadByte) && ((position + 1) < LengthNoExcept())) {
876 return CharacterExtracted::DBCS(leadByte, cb.UCharAt(position + 1));
877 } else {
878 return CharacterExtracted(leadByte, 1);
879 }
880 }
881}
882
884 if (position <= 0) {
886 }
887 const unsigned char previousByte = cb.UCharAt(position - 1);
888 if (0 == dbcsCodePage) {
889 return CharacterExtracted(previousByte, 1);
890 }
891 if (SC_CP_UTF8 == dbcsCodePage) {
892 if (UTF8IsAscii(previousByte)) {
893 return CharacterExtracted(previousByte, 1);
894 }
895 position--;
896 // If previousByte is not a trail byte then its invalid
897 if (UTF8IsTrailByte(previousByte)) {
898 // If previousByte is a trail byte in a valid UTF-8 character then find start of character
899 Sci::Position startUTF = position;
900 Sci::Position endUTF = position;
901 if (InGoodUTF8(position, startUTF, endUTF)) {
902 const Sci::Position widthCharBytes = endUTF - startUTF;
903 unsigned char charBytes[UTF8MaxBytes] = { 0, 0, 0, 0 };
904 for (Sci::Position b = 0; b<widthCharBytes; b++)
905 charBytes[b] = cb.UCharAt(startUTF + b);
906 const int utf8status = UTF8Classify(charBytes, widthCharBytes);
907 if (utf8status & UTF8MaskInvalid) {
908 // Treat as invalid and use up just one byte
910 } else {
911 return CharacterExtracted(UnicodeFromUTF8(charBytes), utf8status & UTF8MaskWidth);
912 }
913 }
914 // Else invalid UTF-8 so return position of isolated trail byte
915 }
917 } else {
918 // Moving backwards in DBCS is complex so use NextPosition
919 const Sci::Position posStartCharacter = NextPosition(position, -1);
920 return CharacterAfter(posStartCharacter);
921 }
922}
923
924// Return -1 on out-of-bounds
926 Sci::Position pos = positionStart;
927 if (dbcsCodePage) {
928 const int increment = (characterOffset > 0) ? 1 : -1;
929 while (characterOffset != 0) {
930 const Sci::Position posNext = NextPosition(pos, increment);
931 if (posNext == pos)
932 return INVALID_POSITION;
933 pos = posNext;
934 characterOffset -= increment;
935 }
936 } else {
937 pos = positionStart + characterOffset;
938 if ((pos < 0) || (pos > Length()))
939 return INVALID_POSITION;
940 }
941 return pos;
942}
943
944Sci::Position Document::GetRelativePositionUTF16(Sci::Position positionStart, Sci::Position characterOffset) const noexcept {
945 Sci::Position pos = positionStart;
946 if (dbcsCodePage) {
947 const int increment = (characterOffset > 0) ? 1 : -1;
948 while (characterOffset != 0) {
949 const Sci::Position posNext = NextPosition(pos, increment);
950 if (posNext == pos)
951 return INVALID_POSITION;
952 if (std::abs(pos-posNext) > 3) // 4 byte character = 2*UTF16.
953 characterOffset -= increment;
954 pos = posNext;
955 characterOffset -= increment;
956 }
957 } else {
958 pos = positionStart + characterOffset;
959 if ((pos < 0) || (pos > LengthNoExcept()))
960 return INVALID_POSITION;
961 }
962 return pos;
963}
964
966 int character;
967 int bytesInCharacter = 1;
968 const unsigned char leadByte = cb.UCharAt(position);
969 if (dbcsCodePage) {
970 if (SC_CP_UTF8 == dbcsCodePage) {
971 if (UTF8IsAscii(leadByte)) {
972 // Single byte character or invalid
973 character = leadByte;
974 } else {
975 const int widthCharBytes = UTF8BytesOfLead[leadByte];
976 unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0};
977 for (int b=1; b<widthCharBytes; b++)
978 charBytes[b] = cb.UCharAt(position+b);
979 const int utf8status = UTF8Classify(charBytes, widthCharBytes);
980 if (utf8status & UTF8MaskInvalid) {
981 // Report as singleton surrogate values which are invalid Unicode
982 character = 0xDC80 + leadByte;
983 } else {
984 bytesInCharacter = utf8status & UTF8MaskWidth;
985 character = UnicodeFromUTF8(charBytes);
986 }
987 }
988 } else {
989 if (IsDBCSLeadByteNoExcept(leadByte)) {
990 bytesInCharacter = 2;
991 character = (leadByte << 8) | cb.UCharAt(position+1);
992 } else {
993 character = leadByte;
994 }
995 }
996 } else {
997 character = leadByte;
998 }
999 if (pWidth) {
1000 *pWidth = bytesInCharacter;
1001 }
1002 return character;
1003}
1004
1006 return dbcsCodePage;
1007}
1008
1010 // Used by lexers so must match IDocument method exactly
1011 return IsDBCSLeadByteNoExcept(ch);
1012}
1013
1014bool Document::IsDBCSLeadByteNoExcept(char ch) const noexcept {
1015 // Used inside core Scintilla
1016 // Byte ranges found in Wikipedia articles with relevant search strings in each case
1017 const unsigned char uch = ch;
1018 switch (dbcsCodePage) {
1019 case 932:
1020 // Shift_jis
1021 return ((uch >= 0x81) && (uch <= 0x9F)) ||
1022 ((uch >= 0xE0) && (uch <= 0xFC));
1023 // Lead bytes F0 to FC may be a Microsoft addition.
1024 case 936:
1025 // GBK
1026 return (uch >= 0x81) && (uch <= 0xFE);
1027 case 949:
1028 // Korean Wansung KS C-5601-1987
1029 return (uch >= 0x81) && (uch <= 0xFE);
1030 case 950:
1031 // Big5
1032 return (uch >= 0x81) && (uch <= 0xFE);
1033 case 1361:
1034 // Korean Johab KS C-5601-1992
1035 return
1036 ((uch >= 0x84) && (uch <= 0xD3)) ||
1037 ((uch >= 0xD8) && (uch <= 0xDE)) ||
1038 ((uch >= 0xE0) && (uch <= 0xF9));
1039 }
1040 return false;
1041}
1042
1043bool Document::IsDBCSLeadByteInvalid(char ch) const noexcept {
1044 const unsigned char lead = ch;
1045 switch (dbcsCodePage) {
1046 case 932:
1047 // Shift_jis
1048 return
1049 (lead == 0x85) ||
1050 (lead == 0x86) ||
1051 (lead == 0xEB) ||
1052 (lead == 0xEC) ||
1053 (lead == 0xEF) ||
1054 (lead == 0xFA) ||
1055 (lead == 0xFB) ||
1056 (lead == 0xFC);
1057 case 936:
1058 // GBK
1059 return (lead == 0x80) || (lead == 0xFF);
1060 case 949:
1061 // Korean Wansung KS C-5601-1987
1062 return (lead == 0x80) || (lead == 0xC9) || (lead >= 0xFE);
1063 case 950:
1064 // Big5
1065 return
1066 ((lead >= 0x80) && (lead <= 0xA0)) ||
1067 (lead == 0xC8) ||
1068 (lead >= 0xFA);
1069 case 1361:
1070 // Korean Johab KS C-5601-1992
1071 return
1072 ((lead >= 0x80) && (lead <= 0x83)) ||
1073 ((lead >= 0xD4) && (lead <= 0xD8)) ||
1074 (lead == 0xDF) ||
1075 (lead >= 0xFA);
1076 }
1077 return false;
1078}
1079
1080bool Document::IsDBCSTrailByteInvalid(char ch) const noexcept {
1081 const unsigned char trail = ch;
1082 switch (dbcsCodePage) {
1083 case 932:
1084 // Shift_jis
1085 return
1086 (trail <= 0x3F) ||
1087 (trail == 0x7F) ||
1088 (trail >= 0xFD);
1089 case 936:
1090 // GBK
1091 return
1092 (trail <= 0x3F) ||
1093 (trail == 0x7F) ||
1094 (trail == 0xFF);
1095 case 949:
1096 // Korean Wansung KS C-5601-1987
1097 return
1098 (trail <= 0x40) ||
1099 ((trail >= 0x5B) && (trail <= 0x60)) ||
1100 ((trail >= 0x7B) && (trail <= 0x80)) ||
1101 (trail == 0xFF);
1102 case 950:
1103 // Big5
1104 return
1105 (trail <= 0x3F) ||
1106 ((trail >= 0x7F) && (trail <= 0xA0)) ||
1107 (trail == 0xFF);
1108 case 1361:
1109 // Korean Johab KS C-5601-1992
1110 return
1111 (trail <= 0x30) ||
1112 (trail == 0x7F) ||
1113 (trail == 0x80) ||
1114 (trail == 0xFF);
1115 }
1116 return false;
1117}
1118
1119int Document::DBCSDrawBytes(const char *text, int len) const noexcept {
1120 if (len <= 1) {
1121 return len;
1122 }
1123 if (IsDBCSLeadByteNoExcept(text[0])) {
1124 return IsDBCSTrailByteInvalid(text[1]) ? 1 : 2;
1125 } else {
1126 return 1;
1127 }
1128}
1129
1130static constexpr bool IsSpaceOrTab(int ch) noexcept {
1131 return ch == ' ' || ch == '\t';
1132}
1133
1134// Need to break text into segments near lengthSegment but taking into
1135// account the encoding to not break inside a UTF-8 or DBCS character
1136// and also trying to avoid breaking inside a pair of combining characters.
1137// The segment length must always be long enough (more than 4 bytes)
1138// so that there will be at least one whole character to make a segment.
1139// For UTF-8, text must consist only of valid whole characters.
1140// In preference order from best to worst:
1141// 1) Break after space
1142// 2) Break before punctuation
1143// 3) Break after whole character
1144
1145int Document::SafeSegment(const char *text, int length, int lengthSegment) const noexcept {
1146 if (length <= lengthSegment)
1147 return length;
1148 int lastSpaceBreak = -1;
1149 int lastPunctuationBreak = -1;
1150 int lastEncodingAllowedBreak = 0;
1151 for (int j=0; j < lengthSegment;) {
1152 const unsigned char ch = text[j];
1153 if (j > 0) {
1154 if (IsSpaceOrTab(text[j - 1]) && !IsSpaceOrTab(text[j])) {
1155 lastSpaceBreak = j;
1156 }
1157 if (ch < 'A') {
1158 lastPunctuationBreak = j;
1159 }
1160 }
1161 lastEncodingAllowedBreak = j;
1162
1163 if (dbcsCodePage == SC_CP_UTF8) {
1164 j += UTF8BytesOfLead[ch];
1165 } else if (dbcsCodePage) {
1166 j += IsDBCSLeadByteNoExcept(ch) ? 2 : 1;
1167 } else {
1168 j++;
1169 }
1170 }
1171 if (lastSpaceBreak >= 0) {
1172 return lastSpaceBreak;
1173 } else if (lastPunctuationBreak >= 0) {
1174 return lastPunctuationBreak;
1175 }
1176 return lastEncodingAllowedBreak;
1177}
1178
1180 if (SC_CP_UTF8 == dbcsCodePage)
1182 else if (dbcsCodePage)
1183 return EncodingFamily::dbcs;
1184 else
1186}
1187
1189 if (endStyled > pos)
1190 endStyled = pos;
1191}
1192
1194 if (cb.IsReadOnly() && enteredReadOnlyCount == 0) {
1198 }
1199}
1200
1201// Document only modified by gateways DeleteChars, InsertString, Undo, Redo, and SetStyleAt.
1202// SetStyleAt does not change the persistent state of a document
1203
1205 if (pos < 0)
1206 return false;
1207 if (len <= 0)
1208 return false;
1209 if ((pos + len) > LengthNoExcept())
1210 return false;
1211 CheckReadOnly();
1212 if (enteredModification != 0) {
1213 return false;
1214 } else {
1216 if (!cb.IsReadOnly()) {
1220 pos, len,
1221 0, 0));
1222 const Sci::Line prevLinesTotal = LinesTotal();
1223 const bool startSavePoint = cb.IsSavePoint();
1224 bool startSequence = false;
1225 const char *text = cb.DeleteChars(pos, len, startSequence);
1226 if (startSavePoint && cb.IsCollectingUndo())
1227 NotifySavePoint(false);
1228 if ((pos < LengthNoExcept()) || (pos == 0))
1229 ModifiedAt(pos);
1230 else
1231 ModifiedAt(pos-1);
1235 pos, len,
1236 LinesTotal() - prevLinesTotal, text));
1237 }
1239 }
1240 return !cb.IsReadOnly();
1241}
1242
1243/**
1244 * Insert a string with a length.
1245 */
1247 if (insertLength <= 0) {
1248 return 0;
1249 }
1250 CheckReadOnly(); // Application may change read only state here
1251 if (cb.IsReadOnly()) {
1252 return 0;
1253 }
1254 if (enteredModification != 0) {
1255 return 0;
1256 }
1258 insertionSet = false;
1259 insertion.clear();
1263 position, insertLength,
1264 0, s));
1265 if (insertionSet) {
1266 s = insertion.c_str();
1267 insertLength = insertion.length();
1268 }
1272 position, insertLength,
1273 0, s));
1274 const Sci::Line prevLinesTotal = LinesTotal();
1275 const bool startSavePoint = cb.IsSavePoint();
1276 bool startSequence = false;
1277 const char *text = cb.InsertString(position, s, insertLength, startSequence);
1278 if (startSavePoint && cb.IsCollectingUndo())
1279 NotifySavePoint(false);
1284 position, insertLength,
1285 LinesTotal() - prevLinesTotal, text));
1286 if (insertionSet) { // Free memory as could be large
1287 std::string().swap(insertion);
1288 }
1290 return insertLength;
1291}
1292
1293void Document::ChangeInsertion(const char *s, Sci::Position length) {
1294 insertionSet = true;
1295 insertion.assign(s, length);
1296}
1297
1298int SCI_METHOD Document::AddData(const char *data, Sci_Position length) {
1299 try {
1300 const Sci::Position position = Length();
1301 InsertString(position, data, length);
1302 } catch (std::bad_alloc &) {
1303 return SC_STATUS_BADALLOC;
1304 } catch (...) {
1305 return SC_STATUS_FAILURE;
1306 }
1307 return 0;
1308}
1309
1311 return this;
1312}
1313
1315 Sci::Position newPos = -1;
1316 CheckReadOnly();
1317 if ((enteredModification == 0) && (cb.IsCollectingUndo())) {
1319 if (!cb.IsReadOnly()) {
1320 const bool startSavePoint = cb.IsSavePoint();
1321 bool multiLine = false;
1322 const int steps = cb.StartUndo();
1323 //Platform::DebugPrintf("Steps=%d\n", steps);
1324 Sci::Position coalescedRemovePos = -1;
1325 Sci::Position coalescedRemoveLen = 0;
1326 Sci::Position prevRemoveActionPos = -1;
1327 Sci::Position prevRemoveActionLen = 0;
1328 for (int step = 0; step < steps; step++) {
1329 const Sci::Line prevLinesTotal = LinesTotal();
1330 const Action &action = cb.GetUndoStep();
1331 if (action.at == removeAction) {
1334 } else if (action.at == containerAction) {
1336 dm.token = action.position;
1337 NotifyModified(dm);
1338 if (!action.mayCoalesce) {
1339 coalescedRemovePos = -1;
1340 coalescedRemoveLen = 0;
1341 prevRemoveActionPos = -1;
1342 prevRemoveActionLen = 0;
1343 }
1344 } else {
1347 }
1349 if (action.at != containerAction) {
1350 ModifiedAt(action.position);
1351 newPos = action.position;
1352 }
1353
1354 int modFlags = SC_PERFORMED_UNDO;
1355 // With undo, an insertion action becomes a deletion notification
1356 if (action.at == removeAction) {
1357 newPos += action.lenData;
1358 modFlags |= SC_MOD_INSERTTEXT;
1359 if ((coalescedRemoveLen > 0) &&
1360 (action.position == prevRemoveActionPos || action.position == (prevRemoveActionPos + prevRemoveActionLen))) {
1361 coalescedRemoveLen += action.lenData;
1362 newPos = coalescedRemovePos + coalescedRemoveLen;
1363 } else {
1364 coalescedRemovePos = action.position;
1365 coalescedRemoveLen = action.lenData;
1366 }
1367 prevRemoveActionPos = action.position;
1368 prevRemoveActionLen = action.lenData;
1369 } else if (action.at == insertAction) {
1370 modFlags |= SC_MOD_DELETETEXT;
1371 coalescedRemovePos = -1;
1372 coalescedRemoveLen = 0;
1373 prevRemoveActionPos = -1;
1374 prevRemoveActionLen = 0;
1375 }
1376 if (steps > 1)
1377 modFlags |= SC_MULTISTEPUNDOREDO;
1378 const Sci::Line linesAdded = LinesTotal() - prevLinesTotal;
1379 if (linesAdded != 0)
1380 multiLine = true;
1381 if (step == steps - 1) {
1382 modFlags |= SC_LASTSTEPINUNDOREDO;
1383 if (multiLine)
1384 modFlags |= SC_MULTILINEUNDOREDO;
1385 }
1386 NotifyModified(DocModification(modFlags, action.position, action.lenData,
1387 linesAdded, action.data.get()));
1388 }
1389
1390 const bool endSavePoint = cb.IsSavePoint();
1391 if (startSavePoint != endSavePoint)
1392 NotifySavePoint(endSavePoint);
1393 }
1395 }
1396 return newPos;
1397}
1398
1400 Sci::Position newPos = -1;
1401 CheckReadOnly();
1402 if ((enteredModification == 0) && (cb.IsCollectingUndo())) {
1404 if (!cb.IsReadOnly()) {
1405 const bool startSavePoint = cb.IsSavePoint();
1406 bool multiLine = false;
1407 const int steps = cb.StartRedo();
1408 for (int step = 0; step < steps; step++) {
1409 const Sci::Line prevLinesTotal = LinesTotal();
1410 const Action &action = cb.GetRedoStep();
1411 if (action.at == insertAction) {
1414 } else if (action.at == containerAction) {
1416 dm.token = action.position;
1417 NotifyModified(dm);
1418 } else {
1421 }
1423 if (action.at != containerAction) {
1424 ModifiedAt(action.position);
1425 newPos = action.position;
1426 }
1427
1428 int modFlags = SC_PERFORMED_REDO;
1429 if (action.at == insertAction) {
1430 newPos += action.lenData;
1431 modFlags |= SC_MOD_INSERTTEXT;
1432 } else if (action.at == removeAction) {
1433 modFlags |= SC_MOD_DELETETEXT;
1434 }
1435 if (steps > 1)
1436 modFlags |= SC_MULTISTEPUNDOREDO;
1437 const Sci::Line linesAdded = LinesTotal() - prevLinesTotal;
1438 if (linesAdded != 0)
1439 multiLine = true;
1440 if (step == steps - 1) {
1441 modFlags |= SC_LASTSTEPINUNDOREDO;
1442 if (multiLine)
1443 modFlags |= SC_MULTILINEUNDOREDO;
1444 }
1446 DocModification(modFlags, action.position, action.lenData,
1447 linesAdded, action.data.get()));
1448 }
1449
1450 const bool endSavePoint = cb.IsSavePoint();
1451 if (startSavePoint != endSavePoint)
1452 NotifySavePoint(endSavePoint);
1453 }
1455 }
1456 return newPos;
1457}
1458
1461}
1462
1464 if (pos <= 0) {
1465 return;
1466 } else if (IsCrLf(pos - 2)) {
1467 DeleteChars(pos - 2, 2);
1468 } else if (dbcsCodePage) {
1469 const Sci::Position startChar = NextPosition(pos, -1);
1470 DeleteChars(startChar, pos - startChar);
1471 } else {
1472 DeleteChars(pos - 1, 1);
1473 }
1474}
1475
1476static constexpr Sci::Position NextTab(Sci::Position pos, Sci::Position tabSize) noexcept {
1477 return ((pos / tabSize) + 1) * tabSize;
1478}
1479
1480static std::string CreateIndentation(Sci::Position indent, int tabSize, bool insertSpaces) {
1481 std::string indentation;
1482 if (!insertSpaces) {
1483 while (indent >= tabSize) {
1484 indentation += '\t';
1485 indent -= tabSize;
1486 }
1487 }
1488 while (indent > 0) {
1489 indentation += ' ';
1490 indent--;
1491 }
1492 return indentation;
1493}
1494
1496 int indent = 0;
1497 if ((line >= 0) && (line < LinesTotal())) {
1498 const Sci::Position lineStart = LineStart(line);
1499 const Sci::Position length = Length();
1500 for (Sci::Position i = lineStart; i < length; i++) {
1501 const char ch = cb.CharAt(i);
1502 if (ch == ' ')
1503 indent++;
1504 else if (ch == '\t')
1505 indent = static_cast<int>(NextTab(indent, tabInChars));
1506 else
1507 return indent;
1508 }
1509 }
1510 return indent;
1511}
1512
1514 const int indentOfLine = GetLineIndentation(line);
1515 if (indent < 0)
1516 indent = 0;
1517 if (indent != indentOfLine) {
1518 std::string linebuf = CreateIndentation(indent, tabInChars, !useTabs);
1519 const Sci::Position thisLineStart = LineStart(line);
1520 const Sci::Position indentPos = GetLineIndentPosition(line);
1521 UndoGroup ug(this);
1522 DeleteChars(thisLineStart, indentPos - thisLineStart);
1523 return thisLineStart + InsertString(thisLineStart, linebuf.c_str(),
1524 linebuf.length());
1525 } else {
1527 }
1528}
1529
1531 if (line < 0)
1532 return 0;
1534 const Sci::Position length = Length();
1535 while ((pos < length) && IsSpaceOrTab(cb.CharAt(pos))) {
1536 pos++;
1537 }
1538 return pos;
1539}
1540
1542 Sci::Position column = 0;
1544 if ((line >= 0) && (line < LinesTotal())) {
1545 for (Sci::Position i = LineStart(line); i < pos;) {
1546 const char ch = cb.CharAt(i);
1547 if (ch == '\t') {
1548 column = NextTab(column, tabInChars);
1549 i++;
1550 } else if (ch == '\r') {
1551 return column;
1552 } else if (ch == '\n') {
1553 return column;
1554 } else if (i >= Length()) {
1555 return column;
1556 } else {
1557 column++;
1558 i = NextPosition(i, 1);
1559 }
1560 }
1561 }
1562 return column;
1563}
1564
1566 startPos = MovePositionOutsideChar(startPos, 1, false);
1567 endPos = MovePositionOutsideChar(endPos, -1, false);
1568 Sci::Position count = 0;
1569 Sci::Position i = startPos;
1570 while (i < endPos) {
1571 count++;
1572 i = NextPosition(i, 1);
1573 }
1574 return count;
1575}
1576
1578 startPos = MovePositionOutsideChar(startPos, 1, false);
1579 endPos = MovePositionOutsideChar(endPos, -1, false);
1580 Sci::Position count = 0;
1581 Sci::Position i = startPos;
1582 while (i < endPos) {
1583 count++;
1584 const Sci::Position next = NextPosition(i, 1);
1585 if ((next - i) > 3)
1586 count++;
1587 i = next;
1588 }
1589 return count;
1590}
1591
1594 if ((line >= 0) && (line < LinesTotal())) {
1595 Sci::Position columnCurrent = 0;
1596 while ((columnCurrent < column) && (position < Length())) {
1597 const char ch = cb.CharAt(position);
1598 if (ch == '\t') {
1599 columnCurrent = NextTab(columnCurrent, tabInChars);
1600 if (columnCurrent > column)
1601 return position;
1602 position++;
1603 } else if (ch == '\r') {
1604 return position;
1605 } else if (ch == '\n') {
1606 return position;
1607 } else {
1608 columnCurrent++;
1610 }
1611 }
1612 }
1613 return position;
1614}
1615
1616void Document::Indent(bool forwards, Sci::Line lineBottom, Sci::Line lineTop) {
1617 // Dedent - suck white space off the front of the line to dedent by equivalent of a tab
1618 for (Sci::Line line = lineBottom; line >= lineTop; line--) {
1619 const Sci::Position indentOfLine = GetLineIndentation(line);
1620 if (forwards) {
1621 if (LineStart(line) < LineEnd(line)) {
1622 SetLineIndentation(line, indentOfLine + IndentSize());
1623 }
1624 } else {
1625 SetLineIndentation(line, indentOfLine - IndentSize());
1626 }
1627 }
1628}
1629
1630// Convert line endings for a piece of text to a particular mode.
1631// Stop at len or when a NUL is found.
1632std::string Document::TransformLineEnds(const char *s, size_t len, int eolModeWanted) {
1633 std::string dest;
1634 for (size_t i = 0; (i < len) && (s[i]); i++) {
1635 if (s[i] == '\n' || s[i] == '\r') {
1636 if (eolModeWanted == SC_EOL_CR) {
1637 dest.push_back('\r');
1638 } else if (eolModeWanted == SC_EOL_LF) {
1639 dest.push_back('\n');
1640 } else { // eolModeWanted == SC_EOL_CRLF
1641 dest.push_back('\r');
1642 dest.push_back('\n');
1643 }
1644 if ((s[i] == '\r') && (i+1 < len) && (s[i+1] == '\n')) {
1645 i++;
1646 }
1647 } else {
1648 dest.push_back(s[i]);
1649 }
1650 }
1651 return dest;
1652}
1653
1654void Document::ConvertLineEnds(int eolModeSet) {
1655 UndoGroup ug(this);
1656
1657 for (Sci::Position pos = 0; pos < Length(); pos++) {
1658 if (cb.CharAt(pos) == '\r') {
1659 if (cb.CharAt(pos + 1) == '\n') {
1660 // CRLF
1661 if (eolModeSet == SC_EOL_CR) {
1662 DeleteChars(pos + 1, 1); // Delete the LF
1663 } else if (eolModeSet == SC_EOL_LF) {
1664 DeleteChars(pos, 1); // Delete the CR
1665 } else {
1666 pos++;
1667 }
1668 } else {
1669 // CR
1670 if (eolModeSet == SC_EOL_CRLF) {
1671 pos += InsertString(pos + 1, "\n", 1); // Insert LF
1672 } else if (eolModeSet == SC_EOL_LF) {
1673 pos += InsertString(pos, "\n", 1); // Insert LF
1674 DeleteChars(pos, 1); // Delete CR
1675 pos--;
1676 }
1677 }
1678 } else if (cb.CharAt(pos) == '\n') {
1679 // LF
1680 if (eolModeSet == SC_EOL_CRLF) {
1681 pos += InsertString(pos, "\r", 1); // Insert CR
1682 } else if (eolModeSet == SC_EOL_CR) {
1683 pos += InsertString(pos, "\r", 1); // Insert CR
1684 DeleteChars(pos, 1); // Delete LF
1685 pos--;
1686 }
1687 }
1688 }
1689
1690}
1691
1692int Document::Options() const noexcept {
1693 return (IsLarge() ? SC_DOCUMENTOPTION_TEXT_LARGE : 0) |
1695}
1696
1698 Sci::Position currentChar = LineStart(line);
1699 const Sci::Position endLine = LineEnd(line);
1700 while (currentChar < endLine) {
1701 if (!IsSpaceOrTab(cb.CharAt(currentChar))) {
1702 return false;
1703 }
1704 ++currentChar;
1705 }
1706 return true;
1707}
1708
1711 line--;
1712 while (line >= 0 && IsWhiteLine(line)) { // skip empty lines
1713 line--;
1714 }
1715 while (line >= 0 && !IsWhiteLine(line)) { // skip non-empty lines
1716 line--;
1717 }
1718 line++;
1719 return LineStart(line);
1720}
1721
1724 while (line < LinesTotal() && !IsWhiteLine(line)) { // skip non-empty lines
1725 line++;
1726 }
1727 while (line < LinesTotal() && IsWhiteLine(line)) { // skip empty lines
1728 line++;
1729 }
1730 if (line < LinesTotal())
1731 return LineStart(line);
1732 else // end of a document
1733 return LineEnd(line-1);
1734}
1735
1737 if (dbcsCodePage && (!UTF8IsAscii(ch))) {
1738 if (SC_CP_UTF8 == dbcsCodePage) {
1739 // Use hard coded Unicode class
1740 const CharacterCategory cc = charMap.CategoryFor(ch);
1741 switch (cc) {
1742
1743 // Separator, Line/Paragraph
1744 case ccZl:
1745 case ccZp:
1747
1748 // Separator, Space
1749 case ccZs:
1750 // Other
1751 case ccCc:
1752 case ccCf:
1753 case ccCs:
1754 case ccCo:
1755 case ccCn:
1756 return CharClassify::ccSpace;
1757
1758 // Letter
1759 case ccLu:
1760 case ccLl:
1761 case ccLt:
1762 case ccLm:
1763 case ccLo:
1764 // Number
1765 case ccNd:
1766 case ccNl:
1767 case ccNo:
1768 // Mark - includes combining diacritics
1769 case ccMn:
1770 case ccMc:
1771 case ccMe:
1772 return CharClassify::ccWord;
1773
1774 // Punctuation
1775 case ccPc:
1776 case ccPd:
1777 case ccPs:
1778 case ccPe:
1779 case ccPi:
1780 case ccPf:
1781 case ccPo:
1782 // Symbol
1783 case ccSm:
1784 case ccSc:
1785 case ccSk:
1786 case ccSo:
1788
1789 }
1790 } else {
1791 // Asian DBCS
1792 return CharClassify::ccWord;
1793 }
1794 }
1795 return charClass.GetClass(static_cast<unsigned char>(ch));
1796}
1797
1798/**
1799 * Used by commands that want to select whole words.
1800 * Finds the start of word at pos when delta < 0 or the end of the word when delta >= 0.
1801 */
1802Sci::Position Document::ExtendWordSelect(Sci::Position pos, int delta, bool onlyWordCharacters) const {
1804 if (delta < 0) {
1805 if (!onlyWordCharacters) {
1807 ccStart = WordCharacterClass(ce.character);
1808 }
1809 while (pos > 0) {
1811 if (WordCharacterClass(ce.character) != ccStart)
1812 break;
1813 pos -= ce.widthBytes;
1814 }
1815 } else {
1816 if (!onlyWordCharacters && pos < LengthNoExcept()) {
1818 ccStart = WordCharacterClass(ce.character);
1819 }
1820 while (pos < LengthNoExcept()) {
1822 if (WordCharacterClass(ce.character) != ccStart)
1823 break;
1824 pos += ce.widthBytes;
1825 }
1826 }
1827 return MovePositionOutsideChar(pos, delta, true);
1828}
1829
1830/**
1831 * Find the start of the next word in either a forward (delta >= 0) or backwards direction
1832 * (delta < 0).
1833 * This is looking for a transition between character classes although there is also some
1834 * additional movement to transit white space.
1835 * Used by cursor movement by word commands.
1836 */
1838 if (delta < 0) {
1839 while (pos > 0) {
1842 break;
1843 pos -= ce.widthBytes;
1844 }
1845 if (pos > 0) {
1847 const CharClassify::cc ccStart = WordCharacterClass(ce.character);
1848 while (pos > 0) {
1849 ce = CharacterBefore(pos);
1850 if (WordCharacterClass(ce.character) != ccStart)
1851 break;
1852 pos -= ce.widthBytes;
1853 }
1854 }
1855 } else {
1857 const CharClassify::cc ccStart = WordCharacterClass(ce.character);
1858 while (pos < LengthNoExcept()) {
1859 ce = CharacterAfter(pos);
1860 if (WordCharacterClass(ce.character) != ccStart)
1861 break;
1862 pos += ce.widthBytes;
1863 }
1864 while (pos < LengthNoExcept()) {
1865 ce = CharacterAfter(pos);
1867 break;
1868 pos += ce.widthBytes;
1869 }
1870 }
1871 return pos;
1872}
1873
1874/**
1875 * Find the end of the next word in either a forward (delta >= 0) or backwards direction
1876 * (delta < 0).
1877 * This is looking for a transition between character classes although there is also some
1878 * additional movement to transit white space.
1879 * Used by cursor movement by word commands.
1880 */
1882 if (delta < 0) {
1883 if (pos > 0) {
1885 const CharClassify::cc ccStart = WordCharacterClass(ce.character);
1886 if (ccStart != CharClassify::ccSpace) {
1887 while (pos > 0) {
1888 ce = CharacterBefore(pos);
1889 if (WordCharacterClass(ce.character) != ccStart)
1890 break;
1891 pos -= ce.widthBytes;
1892 }
1893 }
1894 while (pos > 0) {
1895 ce = CharacterBefore(pos);
1897 break;
1898 pos -= ce.widthBytes;
1899 }
1900 }
1901 } else {
1902 while (pos < LengthNoExcept()) {
1905 break;
1906 pos += ce.widthBytes;
1907 }
1908 if (pos < LengthNoExcept()) {
1910 const CharClassify::cc ccStart = WordCharacterClass(ce.character);
1911 while (pos < LengthNoExcept()) {
1912 ce = CharacterAfter(pos);
1913 if (WordCharacterClass(ce.character) != ccStart)
1914 break;
1915 pos += ce.widthBytes;
1916 }
1917 }
1918 }
1919 return pos;
1920}
1921
1922/**
1923 * Check that the character at the given position is a word or punctuation character and that
1924 * the previous character is of a different character class.
1925 */
1927 if (pos >= LengthNoExcept())
1928 return false;
1929 if (pos > 0) {
1930 const CharacterExtracted cePos = CharacterAfter(pos);
1931 const CharClassify::cc ccPos = WordCharacterClass(cePos.character);
1932 const CharacterExtracted cePrev = CharacterBefore(pos);
1933 const CharClassify::cc ccPrev = WordCharacterClass(cePrev.character);
1934 return (ccPos == CharClassify::ccWord || ccPos == CharClassify::ccPunctuation) &&
1935 (ccPos != ccPrev);
1936 }
1937 return true;
1938}
1939
1940/**
1941 * Check that the character at the given position is a word or punctuation character and that
1942 * the next character is of a different character class.
1943 */
1945 if (pos <= 0)
1946 return false;
1947 if (pos < LengthNoExcept()) {
1948 const CharacterExtracted cePos = CharacterAfter(pos);
1949 const CharClassify::cc ccPos = WordCharacterClass(cePos.character);
1950 const CharacterExtracted cePrev = CharacterBefore(pos);
1951 const CharClassify::cc ccPrev = WordCharacterClass(cePrev.character);
1952 return (ccPrev == CharClassify::ccWord || ccPrev == CharClassify::ccPunctuation) &&
1953 (ccPrev != ccPos);
1954 }
1955 return true;
1956}
1957
1958/**
1959 * Check that the given range is has transitions between character classes at both
1960 * ends and where the characters on the inside are word or punctuation characters.
1961 */
1963 return (start < end) && IsWordStartAt(start) && IsWordEndAt(end);
1964}
1965
1966bool Document::MatchesWordOptions(bool word, bool wordStart, Sci::Position pos, Sci::Position length) const {
1967 return (!word && !wordStart) ||
1968 (word && IsWordAt(pos, pos + length)) ||
1969 (wordStart && IsWordStartAt(pos));
1970}
1971
1972bool Document::HasCaseFolder() const noexcept {
1973 return pcf != nullptr;
1974}
1975
1977 pcf.reset(pcf_);
1978}
1979
1981 const unsigned char leadByte = cb.UCharAt(position);
1982 if (UTF8IsAscii(leadByte)) {
1983 // Common case: ASCII character
1984 return CharacterExtracted(leadByte, 1);
1985 }
1986 const int widthCharBytes = UTF8BytesOfLead[leadByte];
1987 unsigned char charBytes[UTF8MaxBytes] = { leadByte, 0, 0, 0 };
1988 for (int b=1; b<widthCharBytes; b++)
1989 charBytes[b] = cb.UCharAt(position + b);
1990 const int utf8status = UTF8Classify(charBytes, widthCharBytes);
1991 if (utf8status & UTF8MaskInvalid) {
1992 // Treat as invalid and use up just one byte
1994 } else {
1995 return CharacterExtracted(UnicodeFromUTF8(charBytes), utf8status & UTF8MaskWidth);
1996 }
1997}
1998
1999/**
2000 * Find text in document, supporting both forward and backward
2001 * searches (just pass minPos > maxPos to do a backward search)
2002 * Has not been tested with backwards DBCS searches yet.
2003 */
2005 int flags, Sci::Position *length) {
2006 if (*length <= 0)
2007 return minPos;
2008 const bool caseSensitive = (flags & SCFIND_MATCHCASE) != 0;
2009 const bool word = (flags & SCFIND_WHOLEWORD) != 0;
2010 const bool wordStart = (flags & SCFIND_WORDSTART) != 0;
2011 const bool regExp = (flags & SCFIND_REGEXP) != 0;
2012 if (regExp) {
2013 if (!regex)
2014 regex = std::unique_ptr<RegexSearchBase>(CreateRegexSearch(&charClass));
2015 return regex->FindText(this, minPos, maxPos, search, caseSensitive, word, wordStart, flags, length);
2016 } else {
2017
2018 const bool forward = minPos <= maxPos;
2019 const int increment = forward ? 1 : -1;
2020
2021 // Range endpoints should not be inside DBCS characters, but just in case, move them.
2022 const Sci::Position startPos = MovePositionOutsideChar(minPos, increment, false);
2023 const Sci::Position endPos = MovePositionOutsideChar(maxPos, increment, false);
2024
2025 // Compute actual search ranges needed
2026 const Sci::Position lengthFind = *length;
2027
2028 //Platform::DebugPrintf("Find %d %d %s %d\n", startPos, endPos, ft->lpstrText, lengthFind);
2029 const Sci::Position limitPos = std::max(startPos, endPos);
2030 Sci::Position pos = startPos;
2031 if (!forward) {
2032 // Back all of a character
2033 pos = NextPosition(pos, increment);
2034 }
2035 if (caseSensitive) {
2036 const Sci::Position endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos;
2037 const char charStartSearch = search[0];
2038 while (forward ? (pos < endSearch) : (pos >= endSearch)) {
2039 if (CharAt(pos) == charStartSearch) {
2040 bool found = (pos + lengthFind) <= limitPos;
2041 for (int indexSearch = 1; (indexSearch < lengthFind) && found; indexSearch++) {
2042 found = CharAt(pos + indexSearch) == search[indexSearch];
2043 }
2044 if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
2045 return pos;
2046 }
2047 }
2048 if (!NextCharacter(pos, increment))
2049 break;
2050 }
2051 } else if (SC_CP_UTF8 == dbcsCodePage) {
2052 constexpr size_t maxFoldingExpansion = 4;
2053 std::vector<char> searchThing((lengthFind+1) * UTF8MaxBytes * maxFoldingExpansion + 1);
2054 const size_t lenSearch =
2055 pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind);
2056 char bytes[UTF8MaxBytes + 1] = "";
2057 char folded[UTF8MaxBytes * maxFoldingExpansion + 1] = "";
2058 while (forward ? (pos < endPos) : (pos >= endPos)) {
2059 int widthFirstCharacter = 0;
2060 Sci::Position posIndexDocument = pos;
2061 size_t indexSearch = 0;
2062 bool characterMatches = true;
2063 for (;;) {
2064 const unsigned char leadByte = cb.UCharAt(posIndexDocument);
2065 bytes[0] = leadByte;
2066 int widthChar = 1;
2067 if (!UTF8IsAscii(leadByte)) {
2068 const int widthCharBytes = UTF8BytesOfLead[leadByte];
2069 for (int b=1; b<widthCharBytes; b++) {
2070 bytes[b] = cb.CharAt(posIndexDocument+b);
2071 }
2072 widthChar = UTF8Classify(reinterpret_cast<const unsigned char *>(bytes), widthCharBytes) & UTF8MaskWidth;
2073 }
2074 if (!widthFirstCharacter)
2075 widthFirstCharacter = widthChar;
2076 if ((posIndexDocument + widthChar) > limitPos)
2077 break;
2078 const size_t lenFlat = pcf->Fold(folded, sizeof(folded), bytes, widthChar);
2079 // memcmp may examine lenFlat bytes in both arguments so assert it doesn't read past end of searchThing
2080 assert((indexSearch + lenFlat) <= searchThing.size());
2081 // Does folded match the buffer
2082 characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat);
2083 if (!characterMatches)
2084 break;
2085 posIndexDocument += widthChar;
2086 indexSearch += lenFlat;
2087 if (indexSearch >= lenSearch)
2088 break;
2089 }
2090 if (characterMatches && (indexSearch == lenSearch)) {
2091 if (MatchesWordOptions(word, wordStart, pos, posIndexDocument - pos)) {
2092 *length = posIndexDocument - pos;
2093 return pos;
2094 }
2095 }
2096 if (forward) {
2097 pos += widthFirstCharacter;
2098 } else {
2099 if (!NextCharacter(pos, increment))
2100 break;
2101 }
2102 }
2103 } else if (dbcsCodePage) {
2104 constexpr size_t maxBytesCharacter = 2;
2105 constexpr size_t maxFoldingExpansion = 4;
2106 std::vector<char> searchThing((lengthFind+1) * maxBytesCharacter * maxFoldingExpansion + 1);
2107 const size_t lenSearch = pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind);
2108 while (forward ? (pos < endPos) : (pos >= endPos)) {
2109 Sci::Position indexDocument = 0;
2110 size_t indexSearch = 0;
2111 bool characterMatches = true;
2112 while (characterMatches &&
2113 ((pos + indexDocument) < limitPos) &&
2114 (indexSearch < lenSearch)) {
2115 char bytes[maxBytesCharacter + 1];
2116 bytes[0] = cb.CharAt(pos + indexDocument);
2117 const Sci::Position widthChar = IsDBCSLeadByteNoExcept(bytes[0]) ? 2 : 1;
2118 if (widthChar == 2)
2119 bytes[1] = cb.CharAt(pos + indexDocument + 1);
2120 if ((pos + indexDocument + widthChar) > limitPos)
2121 break;
2122 char folded[maxBytesCharacter * maxFoldingExpansion + 1];
2123 const size_t lenFlat = pcf->Fold(folded, sizeof(folded), bytes, widthChar);
2124 // memcmp may examine lenFlat bytes in both arguments so assert it doesn't read past end of searchThing
2125 assert((indexSearch + lenFlat) <= searchThing.size());
2126 // Does folded match the buffer
2127 characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat);
2128 indexDocument += widthChar;
2129 indexSearch += lenFlat;
2130 }
2131 if (characterMatches && (indexSearch == lenSearch)) {
2132 if (MatchesWordOptions(word, wordStart, pos, indexDocument)) {
2133 *length = indexDocument;
2134 return pos;
2135 }
2136 }
2137 if (!NextCharacter(pos, increment))
2138 break;
2139 }
2140 } else {
2141 const Sci::Position endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos;
2142 std::vector<char> searchThing(lengthFind + 1);
2143 pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind);
2144 while (forward ? (pos < endSearch) : (pos >= endSearch)) {
2145 bool found = (pos + lengthFind) <= limitPos;
2146 for (int indexSearch = 0; (indexSearch < lengthFind) && found; indexSearch++) {
2147 const char ch = CharAt(pos + indexSearch);
2148 char folded[2];
2149 pcf->Fold(folded, sizeof(folded), &ch, 1);
2150 found = folded[0] == searchThing[indexSearch];
2151 }
2152 if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
2153 return pos;
2154 }
2155 if (!NextCharacter(pos, increment))
2156 break;
2157 }
2158 }
2159 }
2160 //Platform::DebugPrintf("Not found\n");
2161 return -1;
2162}
2163
2164const char *Document::SubstituteByPosition(const char *text, Sci::Position *length) {
2165 if (regex)
2166 return regex->SubstituteByPosition(this, text, length);
2167 else
2168 return nullptr;
2169}
2170
2171int Document::LineCharacterIndex() const noexcept {
2172 return cb.LineCharacterIndex();
2173}
2174
2175void Document::AllocateLineCharacterIndex(int lineCharacterIndex) {
2176 return cb.AllocateLineCharacterIndex(lineCharacterIndex);
2177}
2178
2179void Document::ReleaseLineCharacterIndex(int lineCharacterIndex) {
2180 return cb.ReleaseLineCharacterIndex(lineCharacterIndex);
2181}
2182
2184 return cb.Lines();
2185}
2186
2187void Document::SetDefaultCharClasses(bool includeWordClass) {
2188 charClass.SetDefaultCharClasses(includeWordClass);
2189}
2190
2191void Document::SetCharClasses(const unsigned char *chars, CharClassify::cc newCharClass) {
2192 charClass.SetCharClasses(chars, newCharClass);
2193}
2194
2195int Document::GetCharsOfClass(CharClassify::cc characterClass, unsigned char *buffer) const {
2196 return charClass.GetCharsOfClass(characterClass, buffer);
2197}
2198
2200 charMap.Optimize(countCharacters);
2201}
2202
2204 return charMap.Size();
2205}
2206
2209}
2210
2212 if (enteredStyling != 0) {
2213 return false;
2214 } else {
2216 const Sci::Position prevEndStyled = endStyled;
2217 if (cb.SetStyleFor(endStyled, length, style)) {
2219 prevEndStyled, length);
2220 NotifyModified(mh);
2221 }
2222 endStyled += length;
2224 return true;
2225 }
2226}
2227
2228bool SCI_METHOD Document::SetStyles(Sci_Position length, const char *styles) {
2229 if (enteredStyling != 0) {
2230 return false;
2231 } else {
2233 bool didChange = false;
2234 Sci::Position startMod = 0;
2235 Sci::Position endMod = 0;
2236 for (int iPos = 0; iPos < length; iPos++, endStyled++) {
2238 if (cb.SetStyleAt(endStyled, styles[iPos])) {
2239 if (!didChange) {
2240 startMod = endStyled;
2241 }
2242 didChange = true;
2243 endMod = endStyled;
2244 }
2245 }
2246 if (didChange) {
2248 startMod, endMod - startMod + 1);
2249 NotifyModified(mh);
2250 }
2252 return true;
2253 }
2254}
2255
2257 if ((enteredStyling == 0) && (pos > GetEndStyled())) {
2259 if (pli && !pli->UseContainerLexing()) {
2260 const Sci::Line lineEndStyled = SciLineFromPosition(GetEndStyled());
2261 const Sci::Position endStyledTo = LineStart(lineEndStyled);
2262 pli->Colourise(endStyledTo, pos);
2263 } else {
2264 // Ask the watchers to style, and stop as soon as one responds.
2265 for (std::vector<WatcherWithUserData>::iterator it = watchers.begin();
2266 (pos > GetEndStyled()) && (it != watchers.end()); ++it) {
2267 it->watcher->NotifyStyleNeeded(this, it->userData, pos);
2268 }
2269 }
2270 }
2271}
2272
2274 const Sci::Line lineFirst = SciLineFromPosition(GetEndStyled());
2275 ElapsedPeriod epStyling;
2277 const Sci::Line lineLast = SciLineFromPosition(GetEndStyled());
2278 durationStyleOneLine.AddSample(lineLast - lineFirst, epStyling.Duration());
2279}
2280
2282 // Tell the watchers the lexer has changed.
2283 for (const WatcherWithUserData &watcher : watchers) {
2284 watcher.watcher->NotifyLexerChanged(this, watcher.userData);
2285 }
2286}
2287
2289 return pli.get();
2290}
2291
2292void Document::SetLexInterface(std::unique_ptr<LexInterface> pLexInterface) noexcept {
2293 pli = std::move(pLexInterface);
2294}
2295
2297 const int statePrevious = States()->SetLineState(line, state);
2298 if (state != statePrevious) {
2299 const DocModification mh(SC_MOD_CHANGELINESTATE, LineStart(line), 0, 0, nullptr,
2300 static_cast<Sci::Line>(line));
2301 NotifyModified(mh);
2302 }
2303 return statePrevious;
2304}
2305
2307 return States()->GetLineState(line);
2308}
2309
2311 return States()->GetMaxLineState();
2312}
2313
2315 const DocModification mh(SC_MOD_LEXERSTATE, start,
2316 end-start, 0, 0, 0);
2317 NotifyModified(mh);
2318}
2319
2321 const LineAnnotation *pla = Margins();
2322 return StyledText(pla->Length(line), pla->Text(line),
2323 pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
2324}
2325
2327 Margins()->SetText(line, text);
2329 0, 0, 0, line);
2330 NotifyModified(mh);
2331}
2332
2334 Margins()->SetStyle(line, style);
2336 0, 0, 0, line));
2337}
2338
2339void Document::MarginSetStyles(Sci::Line line, const unsigned char *styles) {
2340 Margins()->SetStyles(line, styles);
2342 0, 0, 0, line));
2343}
2344
2346 const Sci::Line maxEditorLine = LinesTotal();
2347 for (Sci::Line l=0; l<maxEditorLine; l++)
2348 MarginSetText(l, nullptr);
2349 // Free remaining data
2350 Margins()->ClearAll();
2351}
2352
2354 const LineAnnotation *pla = Annotations();
2355 return StyledText(pla->Length(line), pla->Text(line),
2356 pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
2357}
2358
2360 if (line >= 0 && line < LinesTotal()) {
2361 const Sci::Line linesBefore = AnnotationLines(line);
2363 const int linesAfter = AnnotationLines(line);
2365 0, 0, 0, line);
2366 mh.annotationLinesAdded = linesAfter - linesBefore;
2367 NotifyModified(mh);
2368 }
2369}
2370
2372 if (line >= 0 && line < LinesTotal()) {
2373 Annotations()->SetStyle(line, style);
2375 0, 0, 0, line);
2376 NotifyModified(mh);
2377 }
2378}
2379
2380void Document::AnnotationSetStyles(Sci::Line line, const unsigned char *styles) {
2381 if (line >= 0 && line < LinesTotal()) {
2382 Annotations()->SetStyles(line, styles);
2383 }
2384}
2385
2387 return Annotations()->Lines(line);
2388}
2389
2391 const Sci::Line maxEditorLine = LinesTotal();
2392 for (Sci::Line l=0; l<maxEditorLine; l++)
2393 AnnotationSetText(l, nullptr);
2394 // Free remaining data
2395 Annotations()->ClearAll();
2396}
2397
2399 const LineAnnotation *pla = EOLAnnotations();
2400 return StyledText(pla->Length(line), pla->Text(line),
2401 pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
2402}
2403
2405 if (line >= 0 && line < LinesTotal()) {
2408 0, 0, 0, line);
2409 NotifyModified(mh);
2410 }
2411}
2412
2414 if (line >= 0 && line < LinesTotal()) {
2415 EOLAnnotations()->SetStyle(line, style);
2417 0, 0, 0, line);
2418 NotifyModified(mh);
2419 }
2420}
2421
2423 const Sci::Line maxEditorLine = LinesTotal();
2424 for (Sci::Line l=0; l<maxEditorLine; l++)
2425 EOLAnnotationSetText(l, nullptr);
2426 // Free remaining data
2428}
2429
2431 styleClock = (styleClock + 1) % 0x100000;
2432}
2433
2435 decorations->SetCurrentIndicator(indicator);
2436}
2437
2439 const FillResult<Sci::Position> fr = decorations->FillRange(
2440 position, value, fillLength);
2441 if (fr.changed) {
2443 fr.position, fr.fillLength);
2444 NotifyModified(mh);
2445 }
2446}
2447
2448bool Document::AddWatcher(DocWatcher *watcher, void *userData) {
2449 const WatcherWithUserData wwud(watcher, userData);
2450 std::vector<WatcherWithUserData>::iterator it =
2451 std::find(watchers.begin(), watchers.end(), wwud);
2452 if (it != watchers.end())
2453 return false;
2454 watchers.push_back(wwud);
2455 return true;
2456}
2457
2458bool Document::RemoveWatcher(DocWatcher *watcher, void *userData) {
2459 std::vector<WatcherWithUserData>::iterator it =
2460 std::find(watchers.begin(), watchers.end(), WatcherWithUserData(watcher, userData));
2461 if (it != watchers.end()) {
2462 watchers.erase(it);
2463 return true;
2464 }
2465 return false;
2466}
2467
2469 for (const WatcherWithUserData &watcher : watchers) {
2470 watcher.watcher->NotifyModifyAttempt(this, watcher.userData);
2471 }
2472}
2473
2474void Document::NotifySavePoint(bool atSavePoint) {
2475 for (const WatcherWithUserData &watcher : watchers) {
2476 watcher.watcher->NotifySavePoint(this, watcher.userData, atSavePoint);
2477 }
2478}
2479
2482 decorations->InsertSpace(mh.position, mh.length);
2483 } else if (mh.modificationType & SC_MOD_DELETETEXT) {
2484 decorations->DeleteRange(mh.position, mh.length);
2485 }
2486 for (const WatcherWithUserData &watcher : watchers) {
2487 watcher.watcher->NotifyModified(this, mh, watcher.userData);
2488 }
2489}
2490
2491// Used for word part navigation.
2492static bool IsASCIIPunctuationCharacter(unsigned int ch) noexcept {
2493 switch (ch) {
2494 case '!':
2495 case '"':
2496 case '#':
2497 case '$':
2498 case '%':
2499 case '&':
2500 case '\'':
2501 case '(':
2502 case ')':
2503 case '*':
2504 case '+':
2505 case ',':
2506 case '-':
2507 case '.':
2508 case '/':
2509 case ':':
2510 case ';':
2511 case '<':
2512 case '=':
2513 case '>':
2514 case '?':
2515 case '@':
2516 case '[':
2517 case '\\':
2518 case ']':
2519 case '^':
2520 case '_':
2521 case '`':
2522 case '{':
2523 case '|':
2524 case '}':
2525 case '~':
2526 return true;
2527 default:
2528 return false;
2529 }
2530}
2531
2532bool Document::IsWordPartSeparator(unsigned int ch) const {
2534}
2535
2537 if (pos > 0) {
2540 if (IsWordPartSeparator(ceStart.character)) {
2543 }
2544 }
2545 if (pos > 0) {
2546 ceStart = CharacterAfter(pos);
2548 if (IsLowerCase(ceStart.character)) {
2549 while (pos > 0 && IsLowerCase(CharacterAfter(pos).character))
2553 } else if (IsUpperCase(ceStart.character)) {
2554 while (pos > 0 && IsUpperCase(CharacterAfter(pos).character))
2558 } else if (IsADigit(ceStart.character)) {
2559 while (pos > 0 && IsADigit(CharacterAfter(pos).character))
2563 } else if (IsASCIIPunctuationCharacter(ceStart.character)) {
2568 } else if (isspacechar(ceStart.character)) {
2569 while (pos > 0 && isspacechar(CharacterAfter(pos).character))
2573 } else if (!IsASCII(ceStart.character)) {
2574 while (pos > 0 && !IsASCII(CharacterAfter(pos).character))
2578 } else {
2580 }
2581 }
2582 }
2583 return pos;
2584}
2585
2588 const Sci::Position length = LengthNoExcept();
2589 if (IsWordPartSeparator(ceStart.character)) {
2590 while (pos < length && IsWordPartSeparator(CharacterAfter(pos).character))
2592 ceStart = CharacterAfter(pos);
2593 }
2594 if (!IsASCII(ceStart.character)) {
2595 while (pos < length && !IsASCII(CharacterAfter(pos).character))
2597 } else if (IsLowerCase(ceStart.character)) {
2598 while (pos < length && IsLowerCase(CharacterAfter(pos).character))
2600 } else if (IsUpperCase(ceStart.character)) {
2603 while (pos < length && IsLowerCase(CharacterAfter(pos).character))
2605 } else {
2606 while (pos < length && IsUpperCase(CharacterAfter(pos).character))
2608 }
2611 } else if (IsADigit(ceStart.character)) {
2612 while (pos < length && IsADigit(CharacterAfter(pos).character))
2614 } else if (IsASCIIPunctuationCharacter(ceStart.character)) {
2617 } else if (isspacechar(ceStart.character)) {
2618 while (pos < length && isspacechar(CharacterAfter(pos).character))
2620 } else {
2622 }
2623 return pos;
2624}
2625
2626static constexpr bool IsLineEndChar(char c) noexcept {
2627 return (c == '\n' || c == '\r');
2628}
2629
2630Sci::Position Document::ExtendStyleRange(Sci::Position pos, int delta, bool singleLine) noexcept {
2631 const int sStart = cb.StyleAt(pos);
2632 if (delta < 0) {
2633 while (pos > 0 && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsLineEndChar(cb.CharAt(pos))))
2634 pos--;
2635 pos++;
2636 } else {
2637 while (pos < (LengthNoExcept()) && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsLineEndChar(cb.CharAt(pos))))
2638 pos++;
2639 }
2640 return pos;
2641}
2642
2643static char BraceOpposite(char ch) noexcept {
2644 switch (ch) {
2645 case '(':
2646 return ')';
2647 case ')':
2648 return '(';
2649 case '[':
2650 return ']';
2651 case ']':
2652 return '[';
2653 case '{':
2654 return '}';
2655 case '}':
2656 return '{';
2657 case '<':
2658 return '>';
2659 case '>':
2660 return '<';
2661 default:
2662 return '\0';
2663 }
2664}
2665
2666// TODO: should be able to extend styled region to find matching brace
2667Sci::Position Document::BraceMatch(Sci::Position position, Sci::Position /*maxReStyle*/, Sci::Position startPos, bool useStartPos) noexcept {
2668 const char chBrace = CharAt(position);
2669 const char chSeek = BraceOpposite(chBrace);
2670 if (chSeek == '\0')
2671 return - 1;
2672 const int styBrace = StyleIndexAt(position);
2673 int direction = -1;
2674 if (chBrace == '(' || chBrace == '[' || chBrace == '{' || chBrace == '<')
2675 direction = 1;
2676 int depth = 1;
2677 position = useStartPos ? startPos : NextPosition(position, direction);
2678 while ((position >= 0) && (position < LengthNoExcept())) {
2679 const char chAtPos = CharAt(position);
2680 const int styAtPos = StyleIndexAt(position);
2681 if ((position > GetEndStyled()) || (styAtPos == styBrace)) {
2682 if (chAtPos == chBrace)
2683 depth++;
2684 if (chAtPos == chSeek)
2685 depth--;
2686 if (depth == 0)
2687 return position;
2688 }
2689 const Sci::Position positionBeforeMove = position;
2690 position = NextPosition(position, direction);
2691 if (position == positionBeforeMove)
2692 break;
2693 }
2694 return - 1;
2695}
2696
2697/**
2698 * Implementation of RegexSearchBase for the default built-in regular expression engine
2699 */
2701public:
2702 explicit BuiltinRegex(CharClassify *charClassTable) : search(charClassTable) {}
2703 BuiltinRegex(const BuiltinRegex &) = delete;
2707 ~BuiltinRegex() override = default;
2708
2709 Sci::Position FindText(Document *doc, Sci::Position minPos, Sci::Position maxPos, const char *s,
2710 bool caseSensitive, bool word, bool wordStart, int flags,
2711 Sci::Position *length) override;
2712
2713 const char *SubstituteByPosition(Document *doc, const char *text, Sci::Position *length) override;
2714
2715private:
2717 std::string substituted;
2718};
2719
2720namespace {
2721
2722/**
2723* RESearchRange keeps track of search range.
2724*/
2725class RESearchRange {
2726public:
2727 const Document *doc;
2728 int increment;
2729 Sci::Position startPos;
2730 Sci::Position endPos;
2731 Sci::Line lineRangeStart;
2732 Sci::Line lineRangeEnd;
2733 Sci::Line lineRangeBreak;
2734 RESearchRange(const Document *doc_, Sci::Position minPos, Sci::Position maxPos) noexcept : doc(doc_) {
2735 increment = (minPos <= maxPos) ? 1 : -1;
2736
2737 // Range endpoints should not be inside DBCS characters or between a CR and LF,
2738 // but just in case, move them.
2739 startPos = doc->MovePositionOutsideChar(minPos, 1, true);
2740 endPos = doc->MovePositionOutsideChar(maxPos, 1, true);
2741
2742 lineRangeStart = doc->SciLineFromPosition(startPos);
2743 lineRangeEnd = doc->SciLineFromPosition(endPos);
2744 lineRangeBreak = lineRangeEnd + increment;
2745 }
2746 Range LineRange(Sci::Line line) const {
2747 Range range(doc->LineStart(line), doc->LineEnd(line));
2748 if (increment == 1) {
2749 if (line == lineRangeStart)
2750 range.start = startPos;
2751 if (line == lineRangeEnd)
2752 range.end = endPos;
2753 } else {
2754 if (line == lineRangeEnd)
2755 range.start = endPos;
2756 if (line == lineRangeStart)
2757 range.end = startPos;
2758 }
2759 return range;
2760 }
2761};
2762
2763// Define a way for the Regular Expression code to access the document
2764class DocumentIndexer : public CharacterIndexer {
2765 Document *pdoc;
2766 Sci::Position end;
2767public:
2768 DocumentIndexer(Document *pdoc_, Sci::Position end_) noexcept :
2769 pdoc(pdoc_), end(end_) {
2770 }
2771
2772 DocumentIndexer(const DocumentIndexer &) = delete;
2773 DocumentIndexer(DocumentIndexer &&) = delete;
2774 DocumentIndexer &operator=(const DocumentIndexer &) = delete;
2775 DocumentIndexer &operator=(DocumentIndexer &&) = delete;
2776
2777 ~DocumentIndexer() override = default;
2778
2779 char CharAt(Sci::Position index) const noexcept override {
2780 if (index < 0 || index >= end)
2781 return 0;
2782 else
2783 return pdoc->CharAt(index);
2784 }
2785};
2786
2787#ifndef NO_CXX11_REGEX
2788
2789class ByteIterator {
2790public:
2791 typedef std::bidirectional_iterator_tag iterator_category;
2792 typedef char value_type;
2793 typedef ptrdiff_t difference_type;
2794 typedef char* pointer;
2795 typedef char& reference;
2796
2797 const Document *doc;
2799
2800 ByteIterator(const Document *doc_=nullptr, Sci::Position position_=0) noexcept :
2801 doc(doc_), position(position_) {
2802 }
2803 ByteIterator(const ByteIterator &other) noexcept {
2804 doc = other.doc;
2805 position = other.position;
2806 }
2807 ByteIterator(ByteIterator &&other) noexcept {
2808 doc = other.doc;
2809 position = other.position;
2810 }
2811 ByteIterator &operator=(const ByteIterator &other) noexcept {
2812 if (this != &other) {
2813 doc = other.doc;
2814 position = other.position;
2815 }
2816 return *this;
2817 }
2818 ByteIterator &operator=(ByteIterator &&) noexcept = default;
2819 ~ByteIterator() = default;
2820 char operator*() const noexcept {
2821 return doc->CharAt(position);
2822 }
2823 ByteIterator &operator++() noexcept {
2824 position++;
2825 return *this;
2826 }
2827 ByteIterator operator++(int) noexcept {
2828 ByteIterator retVal(*this);
2829 position++;
2830 return retVal;
2831 }
2832 ByteIterator &operator--() noexcept {
2833 position--;
2834 return *this;
2835 }
2836 bool operator==(const ByteIterator &other) const noexcept {
2837 return doc == other.doc && position == other.position;
2838 }
2839 bool operator!=(const ByteIterator &other) const noexcept {
2840 return doc != other.doc || position != other.position;
2841 }
2842 Sci::Position Pos() const noexcept {
2843 return position;
2844 }
2845 Sci::Position PosRoundUp() const noexcept {
2846 return position;
2847 }
2848};
2849
2850// On Windows, wchar_t is 16 bits wide and on Unix it is 32 bits wide.
2851// Would be better to use sizeof(wchar_t) or similar to differentiate
2852// but easier for now to hard-code platforms.
2853// C++11 has char16_t and char32_t but neither Clang nor Visual C++
2854// appear to allow specializing basic_regex over these.
2855
2856#ifdef _WIN32
2857#define WCHAR_T_IS_16 1
2858#else
2859#define WCHAR_T_IS_16 0
2860#endif
2861
2862#if WCHAR_T_IS_16
2863
2864// On Windows, report non-BMP characters as 2 separate surrogates as that
2865// matches wregex since it is based on wchar_t.
2866class UTF8Iterator {
2867 // These 3 fields determine the iterator position and are used for comparisons
2868 const Document *doc;
2870 size_t characterIndex;
2871 // Remaining fields are derived from the determining fields so are excluded in comparisons
2872 unsigned int lenBytes;
2873 size_t lenCharacters;
2874 wchar_t buffered[2];
2875public:
2876 typedef std::bidirectional_iterator_tag iterator_category;
2877 typedef wchar_t value_type;
2878 typedef ptrdiff_t difference_type;
2879 typedef wchar_t* pointer;
2880 typedef wchar_t& reference;
2881
2882 UTF8Iterator(const Document *doc_=nullptr, Sci::Position position_=0) noexcept :
2883 doc(doc_), position(position_), characterIndex(0), lenBytes(0), lenCharacters(0), buffered{} {
2884 buffered[0] = 0;
2885 buffered[1] = 0;
2886 if (doc) {
2887 ReadCharacter();
2888 }
2889 }
2890 UTF8Iterator(const UTF8Iterator &other) noexcept : buffered{} {
2891 doc = other.doc;
2892 position = other.position;
2893 characterIndex = other.characterIndex;
2894 lenBytes = other.lenBytes;
2895 lenCharacters = other.lenCharacters;
2896 buffered[0] = other.buffered[0];
2897 buffered[1] = other.buffered[1];
2898 }
2899 UTF8Iterator(UTF8Iterator &&other) noexcept = default;
2900 UTF8Iterator &operator=(const UTF8Iterator &other) noexcept {
2901 if (this != &other) {
2902 doc = other.doc;
2903 position = other.position;
2904 characterIndex = other.characterIndex;
2905 lenBytes = other.lenBytes;
2906 lenCharacters = other.lenCharacters;
2907 buffered[0] = other.buffered[0];
2908 buffered[1] = other.buffered[1];
2909 }
2910 return *this;
2911 }
2912 UTF8Iterator &operator=(UTF8Iterator &&) noexcept = default;
2913 ~UTF8Iterator() = default;
2914 wchar_t operator*() const noexcept {
2915 assert(lenCharacters != 0);
2916 return buffered[characterIndex];
2917 }
2918 UTF8Iterator &operator++() noexcept {
2919 if ((characterIndex + 1) < (lenCharacters)) {
2920 characterIndex++;
2921 } else {
2922 position += lenBytes;
2923 ReadCharacter();
2924 characterIndex = 0;
2925 }
2926 return *this;
2927 }
2928 UTF8Iterator operator++(int) noexcept {
2929 UTF8Iterator retVal(*this);
2930 if ((characterIndex + 1) < (lenCharacters)) {
2931 characterIndex++;
2932 } else {
2933 position += lenBytes;
2934 ReadCharacter();
2935 characterIndex = 0;
2936 }
2937 return retVal;
2938 }
2939 UTF8Iterator &operator--() noexcept {
2940 if (characterIndex) {
2941 characterIndex--;
2942 } else {
2943 position = doc->NextPosition(position, -1);
2944 ReadCharacter();
2945 characterIndex = lenCharacters - 1;
2946 }
2947 return *this;
2948 }
2949 bool operator==(const UTF8Iterator &other) const noexcept {
2950 // Only test the determining fields, not the character widths and values derived from this
2951 return doc == other.doc &&
2952 position == other.position &&
2953 characterIndex == other.characterIndex;
2954 }
2955 bool operator!=(const UTF8Iterator &other) const noexcept {
2956 // Only test the determining fields, not the character widths and values derived from this
2957 return doc != other.doc ||
2958 position != other.position ||
2959 characterIndex != other.characterIndex;
2960 }
2961 Sci::Position Pos() const noexcept {
2962 return position;
2963 }
2964 Sci::Position PosRoundUp() const noexcept {
2965 if (characterIndex)
2966 return position + lenBytes; // Force to end of character
2967 else
2968 return position;
2969 }
2970private:
2971 void ReadCharacter() noexcept {
2972 const Document::CharacterExtracted charExtracted = doc->ExtractCharacter(position);
2973 lenBytes = charExtracted.widthBytes;
2974 if (charExtracted.character == unicodeReplacementChar) {
2975 lenCharacters = 1;
2976 buffered[0] = static_cast<wchar_t>(charExtracted.character);
2977 } else {
2978 lenCharacters = UTF16FromUTF32Character(charExtracted.character, buffered);
2979 }
2980 }
2981};
2982
2983#else
2984
2985// On Unix, report non-BMP characters as single characters
2986
2987class UTF8Iterator {
2988 const Document *doc;
2990public:
2991 typedef std::bidirectional_iterator_tag iterator_category;
2992 typedef wchar_t value_type;
2993 typedef ptrdiff_t difference_type;
2994 typedef wchar_t* pointer;
2995 typedef wchar_t& reference;
2996
2997 UTF8Iterator(const Document *doc_=nullptr, Sci::Position position_=0) noexcept :
2998 doc(doc_), position(position_) {
2999 }
3000 UTF8Iterator(const UTF8Iterator &other) noexcept {
3001 doc = other.doc;
3002 position = other.position;
3003 }
3004 UTF8Iterator(UTF8Iterator &&other) noexcept = default;
3005 UTF8Iterator &operator=(const UTF8Iterator &other) noexcept {
3006 if (this != &other) {
3007 doc = other.doc;
3008 position = other.position;
3009 }
3010 return *this;
3011 }
3012 UTF8Iterator &operator=(UTF8Iterator &&) noexcept = default;
3013 ~UTF8Iterator() = default;
3014 wchar_t operator*() const noexcept {
3015 const Document::CharacterExtracted charExtracted = doc->ExtractCharacter(position);
3016 return charExtracted.character;
3017 }
3018 UTF8Iterator &operator++() noexcept {
3019 position = doc->NextPosition(position, 1);
3020 return *this;
3021 }
3022 UTF8Iterator operator++(int) noexcept {
3023 UTF8Iterator retVal(*this);
3024 position = doc->NextPosition(position, 1);
3025 return retVal;
3026 }
3027 UTF8Iterator &operator--() noexcept {
3028 position = doc->NextPosition(position, -1);
3029 return *this;
3030 }
3031 bool operator==(const UTF8Iterator &other) const noexcept {
3032 return doc == other.doc && position == other.position;
3033 }
3034 bool operator!=(const UTF8Iterator &other) const noexcept {
3035 return doc != other.doc || position != other.position;
3036 }
3037 Sci::Position Pos() const noexcept {
3038 return position;
3039 }
3040 Sci::Position PosRoundUp() const noexcept {
3041 return position;
3042 }
3043};
3044
3045#endif
3046
3047std::regex_constants::match_flag_type MatchFlags(const Document *doc, Sci::Position startPos, Sci::Position endPos) {
3048 std::regex_constants::match_flag_type flagsMatch = std::regex_constants::match_default;
3049 if (!doc->IsLineStartPosition(startPos))
3050 flagsMatch |= std::regex_constants::match_not_bol;
3051 if (!doc->IsLineEndPosition(endPos))
3052 flagsMatch |= std::regex_constants::match_not_eol;
3053 return flagsMatch;
3054}
3055
3056template<typename Iterator, typename Regex>
3057bool MatchOnLines(const Document *doc, const Regex &regexp, const RESearchRange &resr, RESearch &search) {
3058 std::match_results<Iterator> match;
3059
3060 // MSVC and libc++ have problems with ^ and $ matching line ends inside a range.
3061 // CRLF line ends are also a problem as ^ and $ only treat LF as a line end.
3062 // The std::regex::multiline option was added to C++17 to improve behaviour but
3063 // has not been implemented by compiler runtimes with MSVC always in multiline
3064 // mode and libc++ and libstdc++ always in single-line mode.
3065 // If multiline regex worked well then the line by line iteration could be removed
3066 // for the forwards case and replaced with the following 4 lines:
3067#ifdef REGEX_MULTILINE
3068 Iterator itStart(doc, resr.startPos);
3069 Iterator itEnd(doc, resr.endPos);
3070 const std::regex_constants::match_flag_type flagsMatch = MatchFlags(doc, resr.startPos, resr.endPos);
3071 const bool matched = std::regex_search(itStart, itEnd, match, regexp, flagsMatch);
3072#else
3073 // Line by line.
3074 bool matched = false;
3075 for (Sci::Line line = resr.lineRangeStart; line != resr.lineRangeBreak; line += resr.increment) {
3076 const Range lineRange = resr.LineRange(line);
3077 Iterator itStart(doc, lineRange.start);
3078 Iterator itEnd(doc, lineRange.end);
3079 std::regex_constants::match_flag_type flagsMatch = MatchFlags(doc, lineRange.start, lineRange.end);
3080 matched = std::regex_search(itStart, itEnd, match, regexp, flagsMatch);
3081 // Check for the last match on this line.
3082 if (matched) {
3083 if (resr.increment == -1) {
3084 while (matched) {
3085 Iterator itNext(doc, match[0].second.PosRoundUp());
3086 flagsMatch = MatchFlags(doc, itNext.Pos(), lineRange.end);
3087 std::match_results<Iterator> matchNext;
3088 matched = std::regex_search(itNext, itEnd, matchNext, regexp, flagsMatch);
3089 if (matched) {
3090 if (match[0].first == match[0].second) {
3091 // Empty match means failure so exit
3092 return false;
3093 }
3094 match = matchNext;
3095 }
3096 }
3097 matched = true;
3098 }
3099 break;
3100 }
3101 }
3102#endif
3103 if (matched) {
3104 for (size_t co = 0; co < match.size(); co++) {
3105 search.bopat[co] = match[co].first.Pos();
3106 search.eopat[co] = match[co].second.PosRoundUp();
3107 const Sci::Position lenMatch = search.eopat[co] - search.bopat[co];
3108 search.pat[co].resize(lenMatch);
3109 for (Sci::Position iPos = 0; iPos < lenMatch; iPos++) {
3110 search.pat[co][iPos] = doc->CharAt(iPos + search.bopat[co]);
3111 }
3112 }
3113 }
3114 return matched;
3115}
3116
3117Sci::Position Cxx11RegexFindText(const Document *doc, Sci::Position minPos, Sci::Position maxPos, const char *s,
3118 bool caseSensitive, Sci::Position *length, RESearch &search) {
3119 const RESearchRange resr(doc, minPos, maxPos);
3120 try {
3121 //ElapsedPeriod ep;
3122 std::regex::flag_type flagsRe = std::regex::ECMAScript;
3123 // Flags that appear to have no effect:
3124 // | std::regex::collate | std::regex::extended;
3125 if (!caseSensitive)
3126 flagsRe = flagsRe | std::regex::icase;
3127
3128 // Clear the RESearch so can fill in matches
3129 search.Clear();
3130
3131 bool matched = false;
3132 if (SC_CP_UTF8 == doc->dbcsCodePage) {
3133 const std::wstring ws = WStringFromUTF8(s, strlen(s));
3134 std::wregex regexp;
3135 regexp.assign(ws, flagsRe);
3136 matched = MatchOnLines<UTF8Iterator>(doc, regexp, resr, search);
3137
3138 } else {
3139 std::regex regexp;
3140 regexp.assign(s, flagsRe);
3141 matched = MatchOnLines<ByteIterator>(doc, regexp, resr, search);
3142 }
3143
3144 Sci::Position posMatch = -1;
3145 if (matched) {
3146 posMatch = search.bopat[0];
3147 *length = search.eopat[0] - search.bopat[0];
3148 }
3149 // Example - search in doc/ScintillaHistory.html for
3150 // [[:upper:]]eta[[:space:]]
3151 // On MacBook, normally around 1 second but with locale imbued -> 14 seconds.
3152 //const double durSearch = ep.Duration(true);
3153 //Platform::DebugPrintf("Search:%9.6g \n", durSearch);
3154 return posMatch;
3155 } catch (std::regex_error &) {
3156 // Failed to create regular expression
3157 throw RegexError();
3158 } catch (...) {
3159 // Failed in some other way
3160 return -1;
3161 }
3162}
3163
3164#endif
3165
3166}
3167
3169 bool caseSensitive, bool, bool, int flags,
3170 Sci::Position *length) {
3171
3172#ifndef NO_CXX11_REGEX
3173 if (flags & SCFIND_CXX11REGEX) {
3174 return Cxx11RegexFindText(doc, minPos, maxPos, s,
3175 caseSensitive, length, search);
3176 }
3177#endif
3178
3179 const RESearchRange resr(doc, minPos, maxPos);
3180
3181 const bool posix = (flags & SCFIND_POSIX) != 0;
3182
3183 const char *errmsg = search.Compile(s, *length, caseSensitive, posix);
3184 if (errmsg) {
3185 return -1;
3186 }
3187 // Find a variable in a property file: \$(\‍([A-Za-z0-9_.]+\‍))
3188 // Replace first '.' with '-' in each property file variable reference:
3189 // Search: \$(\‍([A-Za-z0-9_-]+\‍)\.\‍([A-Za-z0-9_.]+\‍))
3190 // Replace: $(\1-\2)
3191 Sci::Position pos = -1;
3192 Sci::Position lenRet = 0;
3193 const bool searchforLineStart = s[0] == '^';
3194 const char searchEnd = s[*length - 1];
3195 const char searchEndPrev = (*length > 1) ? s[*length - 2] : '\0';
3196 const bool searchforLineEnd = (searchEnd == '$') && (searchEndPrev != '\\');
3197 for (Sci::Line line = resr.lineRangeStart; line != resr.lineRangeBreak; line += resr.increment) {
3198 Sci::Position startOfLine = doc->LineStart(line);
3199 Sci::Position endOfLine = doc->LineEnd(line);
3200 if (resr.increment == 1) {
3201 if (line == resr.lineRangeStart) {
3202 if ((resr.startPos != startOfLine) && searchforLineStart)
3203 continue; // Can't match start of line if start position after start of line
3204 startOfLine = resr.startPos;
3205 }
3206 if (line == resr.lineRangeEnd) {
3207 if ((resr.endPos != endOfLine) && searchforLineEnd)
3208 continue; // Can't match end of line if end position before end of line
3209 endOfLine = resr.endPos;
3210 }
3211 } else {
3212 if (line == resr.lineRangeEnd) {
3213 if ((resr.endPos != startOfLine) && searchforLineStart)
3214 continue; // Can't match start of line if end position after start of line
3215 startOfLine = resr.endPos;
3216 }
3217 if (line == resr.lineRangeStart) {
3218 if ((resr.startPos != endOfLine) && searchforLineEnd)
3219 continue; // Can't match end of line if start position before end of line
3220 endOfLine = resr.startPos;
3221 }
3222 }
3223
3224 const DocumentIndexer di(doc, endOfLine);
3225 int success = search.Execute(di, startOfLine, endOfLine);
3226 if (success) {
3227 pos = search.bopat[0];
3228 // Ensure only whole characters selected
3229 search.eopat[0] = doc->MovePositionOutsideChar(search.eopat[0], 1, false);
3230 lenRet = search.eopat[0] - search.bopat[0];
3231 // There can be only one start of a line, so no need to look for last match in line
3232 if ((resr.increment == -1) && !searchforLineStart) {
3233 // Check for the last match on this line.
3234 int repetitions = 1000; // Break out of infinite loop
3235 while (success && (search.eopat[0] <= endOfLine) && (repetitions--)) {
3236 success = search.Execute(di, pos+1, endOfLine);
3237 if (success) {
3238 if (search.eopat[0] <= minPos) {
3239 pos = search.bopat[0];
3240 lenRet = search.eopat[0] - search.bopat[0];
3241 } else {
3242 success = 0;
3243 }
3244 }
3245 }
3246 }
3247 break;
3248 }
3249 }
3250 *length = lenRet;
3251 return pos;
3252}
3253
3254const char *BuiltinRegex::SubstituteByPosition(Document *doc, const char *text, Sci::Position *length) {
3255 substituted.clear();
3256 const DocumentIndexer di(doc, doc->Length());
3257 search.GrabMatches(di);
3258 for (Sci::Position j = 0; j < *length; j++) {
3259 if (text[j] == '\\') {
3260 if (text[j + 1] >= '0' && text[j + 1] <= '9') {
3261 const unsigned int patNum = text[j + 1] - '0';
3262 const Sci::Position len = search.eopat[patNum] - search.bopat[patNum];
3263 if (!search.pat[patNum].empty()) // Will be null if try for a match that did not occur
3264 substituted.append(search.pat[patNum].c_str(), len);
3265 j++;
3266 } else {
3267 j++;
3268 switch (text[j]) {
3269 case 'a':
3270 substituted.push_back('\a');
3271 break;
3272 case 'b':
3273 substituted.push_back('\b');
3274 break;
3275 case 'f':
3276 substituted.push_back('\f');
3277 break;
3278 case 'n':
3279 substituted.push_back('\n');
3280 break;
3281 case 'r':
3282 substituted.push_back('\r');
3283 break;
3284 case 't':
3285 substituted.push_back('\t');
3286 break;
3287 case 'v':
3288 substituted.push_back('\v');
3289 break;
3290 case '\\':
3291 substituted.push_back('\\');
3292 break;
3293 default:
3294 substituted.push_back('\\');
3295 j--;
3296 }
3297 }
3298 } else {
3299 substituted.push_back(text[j]);
3300 }
3301 }
3302 *length = substituted.length();
3303 return substituted.c_str();
3304}
3305
3306#ifndef SCI_OWNREGEX
3307
3309 return new BuiltinRegex(charClassTable);
3310}
3311
3312#endif
Classes for case folding.
Manages the text of the document.
Character classifications used by Document and RESearch.
Returns the Unicode general category of a character.
Encapsulates a set of characters.
Visual elements added over text.
static constexpr bool IsLineEndChar(char c) noexcept
Definition: Document.cxx:2626
static std::string CreateIndentation(Sci::Position indent, int tabSize, bool insertSpaces)
Definition: Document.cxx:1480
static char BraceOpposite(char ch) noexcept
Definition: Document.cxx:2643
static bool IsSubordinate(int levelStart, int levelTry) noexcept
Definition: Document.cxx:499
static bool IsASCIIPunctuationCharacter(unsigned int ch) noexcept
Definition: Document.cxx:2492
static constexpr Sci::Position NextTab(Sci::Position pos, Sci::Position tabSize) noexcept
Definition: Document.cxx:1476
Text document that handles notifications, DBCS, styling, words and end of line.
Encapsulate C++ <chrono> to simplify use.
Interface between Scintilla and lexers.
Interface for loading into a Scintilla document from a background thread.
Data structure used to partition an interval.
Manages data associated with each line of the document.
Interface to platform facilities.
#define PLATFORM_ASSERT(c)
Definition: Platform.h:544
Defines global type name Position in the Sci internal namespace.
Interface to the regular expression search library.
Data structure used to store sparse styles.
#define SCI_METHOD
Definition: Sci_Position.h:26
ptrdiff_t Sci_Position
Definition: Sci_Position.h:15
Interface to the edit control.
#define SC_MOD_CHANGEMARGIN
Definition: Scintilla.h:1077
#define SC_MOD_INSERTTEXT
Definition: Scintilla.h:1061
#define SC_DOCUMENTOPTION_TEXT_LARGE
Definition: Scintilla.h:722
#define SC_MOD_CHANGEINDICATOR
Definition: Scintilla.h:1075
#define SC_MOD_LEXERSTATE
Definition: Scintilla.h:1080
#define INVALID_POSITION
Definition: Scintilla.h:44
#define SC_MOD_CHANGEFOLD
Definition: Scintilla.h:1064
#define SCFIND_REGEXP
Definition: Scintilla.h:419
#define SCFIND_CXX11REGEX
Definition: Scintilla.h:421
#define SCFIND_POSIX
Definition: Scintilla.h:420
#define SC_LASTSTEPINUNDOREDO
Definition: Scintilla.h:1069
#define SC_STATUS_BADALLOC
Definition: Scintilla.h:734
#define SC_MOD_CHANGELINESTATE
Definition: Scintilla.h:1076
#define SC_PERFORMED_REDO
Definition: Scintilla.h:1067
#define SC_EOL_CR
Definition: Scintilla.h:89
#define SC_PERFORMED_UNDO
Definition: Scintilla.h:1066
#define SC_MOD_CHANGESTYLE
Definition: Scintilla.h:1063
#define SC_MOD_INSERTCHECK
Definition: Scintilla.h:1081
#define SCFIND_WHOLEWORD
Definition: Scintilla.h:416
#define SCFIND_MATCHCASE
Definition: Scintilla.h:417
#define SC_MOD_BEFOREINSERT
Definition: Scintilla.h:1071
#define SC_EOL_LF
Definition: Scintilla.h:90
#define SC_EOL_CRLF
Definition: Scintilla.h:88
#define SC_LINE_END_TYPE_UNICODE
Definition: Scintilla.h:1005
#define SC_MOD_CHANGEANNOTATION
Definition: Scintilla.h:1078
#define SC_MULTILINEUNDOREDO
Definition: Scintilla.h:1073
#define SC_LINE_END_TYPE_DEFAULT
Definition: Scintilla.h:1004
#define SC_FOLDLEVELWHITEFLAG
Definition: Scintilla.h:495
#define SC_MOD_CHANGEMARKER
Definition: Scintilla.h:1070
#define SC_FOLDLEVELBASE
Definition: Scintilla.h:494
#define SC_MOD_CHANGEEOLANNOTATION
Definition: Scintilla.h:1083
#define SCFIND_WORDSTART
Definition: Scintilla.h:418
#define SC_MOD_DELETETEXT
Definition: Scintilla.h:1062
#define SC_FOLDLEVELHEADERFLAG
Definition: Scintilla.h:496
#define SC_MOD_CONTAINER
Definition: Scintilla.h:1079
#define SC_MULTISTEPUNDOREDO
Definition: Scintilla.h:1068
#define SC_CP_UTF8
Definition: Scintilla.h:105
#define SC_STATUS_FAILURE
Definition: Scintilla.h:733
#define SC_DOCUMENTOPTION_STYLES_NONE
Definition: Scintilla.h:721
#define SC_STARTACTION
Definition: Scintilla.h:1074
#define SC_MOD_BEFOREDELETE
Definition: Scintilla.h:1072
#define SC_PERFORMED_USER
Definition: Scintilla.h:1065
Main data structure for holding arrays that handle insertions and deletions efficiently.
Functions to handle UTF-8 and UTF-16 strings.
gint index
Definition: build.c:2680
Implementation of RegexSearchBase for the default built-in regular expression engine.
Definition: Document.cxx:2700
BuiltinRegex(CharClassify *charClassTable)
Definition: Document.cxx:2702
const char * SubstituteByPosition(Document *doc, const char *text, Sci::Position *length) override
Definition: Document.cxx:3254
~BuiltinRegex() override=default
BuiltinRegex & operator=(const BuiltinRegex &)=delete
BuiltinRegex & operator=(BuiltinRegex &&)=delete
BuiltinRegex(BuiltinRegex &&)=delete
RESearch search
Definition: Document.cxx:2716
Sci::Position FindText(Document *doc, Sci::Position minPos, Sci::Position maxPos, const char *s, bool caseSensitive, bool word, bool wordStart, int flags, Sci::Position *length) override
Definition: Document.cxx:3168
std::string substituted
Definition: Document.cxx:2717
BuiltinRegex(const BuiltinRegex &)=delete
void AddSample(size_t numberActions, double durationOfActions) noexcept
Definition: Document.cxx:94
double Duration() const noexcept
Definition: Document.cxx:108
ActionDuration(double duration_, double minDuration_, double maxDuration_) noexcept
Definition: Document.cxx:90
Actions are used to store all the information required to perform one undo/redo step.
Definition: CellBuffer.h:33
Sci::Position lenData
Definition: CellBuffer.h:38
Sci::Position position
Definition: CellBuffer.h:36
std::unique_ptr< char[]> data
Definition: CellBuffer.h:37
actionType at
Definition: CellBuffer.h:35
void ReleaseLineCharacterIndex(int lineCharacterIndex)
Definition: CellBuffer.cxx:764
int TentativeSteps() noexcept
Definition: CellBuffer.cxx:825
bool IsSavePoint() const noexcept
Definition: CellBuffer.cxx:813
unsigned char UCharAt(Sci::Position position) const noexcept
Definition: CellBuffer.cxx:586
bool SetStyleAt(Sci::Position position, char styleValue) noexcept
Setting styles for positions outside the range of the buffer is safe and has no effect.
Definition: CellBuffer.cxx:656
void SetSavePoint()
The save point is a marker in the undo stack where the container has stated that the buffer was saved...
Definition: CellBuffer.cxx:809
Sci::Line LineFromPosition(Sci::Position pos) const noexcept
Definition: CellBuffer.cxx:781
bool IsCollectingUndo() const noexcept
bool HasStyles() const noexcept
Definition: CellBuffer.cxx:805
void SetLineEndTypes(int utf8LineEnds_)
Definition: CellBuffer.cxx:720
int GetLineEndTypes() const noexcept
Definition: CellBuffer.h:157
const char * DeleteChars(Sci::Position position, Sci::Position deleteLength, bool &startSequence)
Definition: CellBuffer.cxx:688
void SetUTF8Substance(bool utf8Substance_) noexcept
Definition: CellBuffer.cxx:716
int LineCharacterIndex() const noexcept
Definition: CellBuffer.cxx:751
void SetPerLine(PerLine *pl) noexcept
Definition: CellBuffer.cxx:747
char CharAt(Sci::Position position) const noexcept
Retrieving positions outside the range of the buffer works and returns 0.
Definition: CellBuffer.cxx:582
const Action & GetRedoStep() const
const char * InsertString(Sci::Position position, const char *s, Sci::Position insertLength, bool &startSequence)
Definition: CellBuffer.cxx:641
bool SetStyleFor(Sci::Position position, Sci::Position lengthStyle, char styleValue) noexcept
Definition: CellBuffer.cxx:669
bool IsReadOnly() const noexcept
Definition: CellBuffer.cxx:793
Sci::Line Lines() const noexcept
Definition: CellBuffer.cxx:768
const Action & GetUndoStep() const
Sci::Position LineStart(Sci::Line line) const noexcept
Definition: CellBuffer.cxx:772
void AllocateLineCharacterIndex(int lineCharacterIndex)
Definition: CellBuffer.cxx:755
cc GetClass(unsigned char ch) const noexcept
Definition: CharClassify.h:21
void SetDefaultCharClasses(bool includeWordClass)
void SetCharClasses(const unsigned char *chars, cc newCharClass)
int GetCharsOfClass(cc characterClass, unsigned char *buffer) const noexcept
void Optimize(int countCharacters)
CharacterCategory CategoryFor(int character) const
To optimise processing of document modifications by DocWatchers, a hint is passed indicating the scop...
Definition: Document.h:543
Sci::Line annotationLinesAdded
Definition: Document.h:553
Sci::Position position
Definition: Document.h:546
Sci::Position length
Definition: Document.h:547
A class that wants to receive notifications from a Document must be derived from DocWatcher and imple...
Definition: Document.h:586
Sci::Position NextPosition(Sci::Position pos, int moveDir) const noexcept
Definition: Document.cxx:764
bool DeleteChars(Sci::Position pos, Sci::Position len)
Definition: Document.cxx:1204
Sci::Position NextWordEnd(Sci::Position pos, int delta) const
Find the end of the next word in either a forward (delta >= 0) or backwards direction (delta < 0).
Definition: Document.cxx:1881
bool HasCaseFolder() const noexcept
Definition: Document.cxx:1972
Sci::Line SciLineFromPosition(Sci::Position pos) const noexcept
Definition: Document.cxx:441
void InsertLines(Sci::Line line, Sci::Line lines) override
Definition: Document.cxx:185
char SCI_METHOD StyleAt(Sci_Position position) const override
Definition: Document.h:395
Sci::Position Undo()
Definition: Document.cxx:1314
std::unique_ptr< PerLine > perLineData[ldSize]
Definition: Document.h:248
int SCI_METHOD GetLineState(Sci_Position line) const override
Definition: Document.cxx:2306
void EnsureStyledTo(Sci::Position pos)
Definition: Document.cxx:2256
Sci::Position FindColumn(Sci::Line line, Sci::Position column)
Definition: Document.cxx:1592
Sci::Line LineFromHandle(int markerHandle) const noexcept
Definition: Document.cxx:383
void IncrementStyleClock() noexcept
Definition: Document.cxx:2430
void SetCharacterCategoryOptimization(int countCharacters)
Definition: Document.cxx:2199
CharacterCategoryMap charMap
Definition: Document.h:233
Sci_Position SCI_METHOD Length() const override
Definition: Document.h:430
Sci::Position MovePositionOutsideChar(Sci::Position pos, Sci::Position moveDir, bool checkLineEnd=true) const noexcept
Definition: Document.cxx:697
int dbcsCodePage
Can also be SC_CP_UTF8 to enable UTF-8 mode.
Definition: Document.h:276
void SCI_METHOD ChangeLexerState(Sci_Position start, Sci_Position end) override
Definition: Document.cxx:2314
bool InGoodUTF8(Sci::Position pos, Sci::Position &start, Sci::Position &end) const noexcept
Definition: Document.cxx:665
Sci::Position SetLineIndentation(Sci::Line line, Sci::Position indent)
Definition: Document.cxx:1513
char CharAt(Sci::Position position) const noexcept
Definition: Document.h:391
CharClassify::cc WordCharacterClass(unsigned int ch) const
Definition: Document.cxx:1736
void MarginSetStyles(Sci::Line line, const unsigned char *styles)
Definition: Document.cxx:2339
void SCI_METHOD StartStyling(Sci_Position position, char mask) override
Definition: Document.cxx:2207
Sci::Line GetFoldParent(Sci::Line line) const
Definition: Document.cxx:531
LineMarkers * Markers() const noexcept
Definition: Document.cxx:199
void NotifyModifyAttempt()
Definition: Document.cxx:2468
CharacterExtracted ExtractCharacter(Sci::Position position) const noexcept
Definition: Document.cxx:1980
void MarginSetText(Sci::Line line, const char *text)
Definition: Document.cxx:2326
void ChangeInsertion(const char *s, Sci::Position length)
Definition: Document.cxx:1293
bool IsCrLf(Sci::Position pos) const noexcept
Definition: Document.cxx:622
bool IsWhiteLine(Sci::Line line) const
Definition: Document.cxx:1697
void AnnotationSetStyle(Sci::Line line, int style)
Definition: Document.cxx:2371
std::unique_ptr< RegexSearchBase > regex
Definition: Document.h:257
void ReleaseLineCharacterIndex(int lineCharacterIndex)
Definition: Document.cxx:2179
void AnnotationSetText(Sci::Line line, const char *text)
Definition: Document.cxx:2359
Sci::Position ExtendWordSelect(Sci::Position pos, int delta, bool onlyWordCharacters=false) const
Used by commands that want to select whole words.
Definition: Document.cxx:1802
Sci::Position Redo()
Definition: Document.cxx:1399
Sci_Position SCI_METHOD LineEnd(Sci_Position line) const override
Definition: Document.cxx:403
const char * SubstituteByPosition(const char *text, Sci::Position *length)
Definition: Document.cxx:2164
void EOLAnnotationSetText(Sci::Line line, const char *text)
Definition: Document.cxx:2404
bool IsWordPartSeparator(unsigned int ch) const
Definition: Document.cxx:2532
int LenChar(Sci::Position pos) const noexcept
Definition: Document.cxx:630
void NotifySavePoint(bool atSavePoint)
Definition: Document.cxx:2474
bool IsWordAt(Sci::Position start, Sci::Position end) const
Check that the given range is has transitions between character classes at both ends and where the ch...
Definition: Document.cxx:1962
Sci::Position endStyled
Definition: Document.h:235
Sci::Position GetRelativePositionUTF16(Sci::Position positionStart, Sci::Position characterOffset) const noexcept
Definition: Document.cxx:944
Sci::Position InsertString(Sci::Position position, const char *s, Sci::Position insertLength)
Insert a string with a length.
Definition: Document.cxx:1246
std::unique_ptr< IDecorationList > decorations
Definition: Document.h:286
int SCI_METHOD Release() override
Definition: Document.cxx:164
void SCI_METHOD DecorationFillRange(Sci_Position position, int value, Sci_Position fillLength) override
Definition: Document.cxx:2438
int AnnotationLines(Sci::Line line) const noexcept
Definition: Document.cxx:2386
void SetLexInterface(std::unique_ptr< LexInterface > pLexInterface) noexcept
Definition: Document.cxx:2292
Sci::Position GetEndStyled() const noexcept
Definition: Document.h:458
void ConvertLineEnds(int eolModeSet)
Definition: Document.cxx:1654
bool IsLarge() const noexcept
Definition: Document.h:385
Sci::Position ClampPositionIntoDocument(Sci::Position pos) const noexcept
Definition: Document.cxx:618
void SCI_METHOD DecorationSetCurrentIndicator(int indicator) override
Definition: Document.cxx:2434
Sci::Position VCHomePosition(Sci::Position position) const
Definition: Document.cxx:458
int SCI_METHOD GetLevel(Sci_Position line) const override
Definition: Document.cxx:491
CellBuffer cb
Definition: Document.h:231
std::vector< WatcherWithUserData > watchers
Definition: Document.h:244
void DelChar(Sci::Position pos)
Definition: Document.cxx:1459
Sci::Position WordPartLeft(Sci::Position pos) const
Definition: Document.cxx:2536
Sci::Line GetLastChild(Sci::Line lineParent, int level=-1, Sci::Line lastLine=-1)
Definition: Document.cxx:506
StyledText MarginStyledText(Sci::Line line) const noexcept
Definition: Document.cxx:2320
Sci::Position ParaDown(Sci::Position pos) const
Definition: Document.cxx:1722
Sci::Position ParaUp(Sci::Position pos) const
Definition: Document.cxx:1709
LineState * States() const noexcept
Definition: Document.cxx:207
int GetMark(Sci::Line line) const noexcept
Definition: Document.cxx:325
void Init() override
Definition: Document.cxx:171
int SCI_METHOD SetLevel(Sci_Position line, int level) override
Definition: Document.cxx:479
static std::string TransformLineEnds(const char *s, size_t len, int eolModeWanted)
Definition: Document.cxx:1632
bool IsLineStartPosition(Sci::Position position) const
Definition: Document.cxx:399
void GetHighlightDelimiters(HighlightDelimiter &highlightDelimiter, Sci::Line line, Sci::Line lastLine)
Definition: Document.cxx:548
void StyleToAdjustingLineDuration(Sci::Position pos)
Definition: Document.cxx:2273
void SetCharClasses(const unsigned char *chars, CharClassify::cc newCharClass)
Definition: Document.cxx:2191
LineAnnotation * Margins() const noexcept
Definition: Document.cxx:211
StyledText AnnotationStyledText(Sci::Line line) const noexcept
Definition: Document.cxx:2353
Document(int options)
Definition: Document.cxx:112
bool TentativeActive() const noexcept
Definition: Document.h:367
Sci::Position IndexLineStart(Sci::Line line, int lineCharacterIndex) const noexcept
Definition: Document.cxx:471
void EOLAnnotationClearAll()
Definition: Document.cxx:2422
LexInterface * GetLexInterface() const noexcept
Definition: Document.cxx:2288
Sci::Position LengthNoExcept() const noexcept
Definition: Document.h:431
bool AddWatcher(DocWatcher *watcher, void *userData)
Definition: Document.cxx:2448
void *SCI_METHOD ConvertToDocument() override
Definition: Document.cxx:1310
int SCI_METHOD AddData(const char *data, Sci_Position length) override
Definition: Document.cxx:1298
int IndentSize() const noexcept
Definition: Document.h:503
ActionDuration durationStyleOneLine
Definition: Document.h:284
int MarkerNumberFromLine(Sci::Line line, int which) const noexcept
Definition: Document.cxx:387
void RemoveLine(Sci::Line line) override
Definition: Document.cxx:192
bool IsWordStartAt(Sci::Position pos) const
Check that the character at the given position is a word or punctuation character and that the previo...
Definition: Document.cxx:1926
Document::CharacterExtracted CharacterAfter(Sci::Position position) const noexcept
Definition: Document.cxx:853
void InsertLine(Sci::Line line) override
Definition: Document.cxx:178
bool IsLineEndPosition(Sci::Position position) const
Definition: Document.cxx:450
int LineEndTypesSupported() const
Definition: Document.cxx:223
bool IsDBCSLeadByteInvalid(char ch) const noexcept
Definition: Document.cxx:1043
void DelCharBack(Sci::Position pos)
Definition: Document.cxx:1463
~Document() override
Definition: Document.cxx:151
Sci::Position WordPartRight(Sci::Position pos) const
Definition: Document.cxx:2586
Sci::Line GetMaxLineState() const noexcept
Definition: Document.cxx:2310
int AddMark(Sci::Line line, int markerNum)
Definition: Document.cxx:333
Sci_Position SCI_METHOD GetRelativePosition(Sci_Position positionStart, Sci_Position characterOffset) const override
Definition: Document.cxx:925
Sci::Line LinesTotal() const noexcept
Definition: Document.cxx:2183
bool IsPositionInLineEnd(Sci::Position position) const
Definition: Document.cxx:454
bool SetDBCSCodePage(int dbcsCodePage_)
Definition: Document.cxx:230
int Options() const noexcept
Definition: Document.cxx:1692
bool SetLineEndTypesAllowed(int lineEndBitSet_)
Definition: Document.cxx:243
LineLevels * Levels() const noexcept
Definition: Document.cxx:203
int SCI_METHOD GetCharacterAndWidth(Sci_Position position, Sci_Position *pWidth) const override
Definition: Document.cxx:965
int LineCharacterIndex() const noexcept
Definition: Document.cxx:2171
void ModifiedAt(Sci::Position pos) noexcept
Definition: Document.cxx:1188
bool MatchesWordOptions(bool word, bool wordStart, Sci::Position pos, Sci::Position length) const
Definition: Document.cxx:1966
int SafeSegment(const char *text, int length, int lengthSegment) const noexcept
Definition: Document.cxx:1145
int MarkerHandleFromLine(Sci::Line line, int which) const noexcept
Definition: Document.cxx:391
void SCI_METHOD SetErrorStatus(int status) override
Definition: Document.cxx:430
bool SCI_METHOD SetStyleFor(Sci_Position length, char style) override
Definition: Document.cxx:2211
std::string insertion
Definition: Document.h:242
bool SCI_METHOD SetStyles(Sci_Position length, const char *styles) override
Definition: Document.cxx:2228
int CharacterCategoryOptimization() const noexcept
Definition: Document.cxx:2203
Sci::Position BraceMatch(Sci::Position position, Sci::Position maxReStyle, Sci::Position startPos, bool useStartPos) noexcept
Definition: Document.cxx:2667
Sci::Position LineEndPosition(Sci::Position position) const
Definition: Document.cxx:446
Sci::Position GetLineIndentPosition(Sci::Line line) const
Definition: Document.cxx:1530
bool IsWordEndAt(Sci::Position pos) const
Check that the character at the given position is a word or punctuation character and that the next c...
Definition: Document.cxx:1944
StyledText EOLAnnotationStyledText(Sci::Line line) const noexcept
Definition: Document.cxx:2398
void DeleteAllMarks(int markerNum)
Definition: Document.cxx:370
std::unique_ptr< CaseFolder > pcf
Definition: Document.h:234
LineAnnotation * Annotations() const noexcept
Definition: Document.cxx:215
bool IsDBCSTrailByteInvalid(char ch) const noexcept
Definition: Document.cxx:1080
void MarginSetStyle(Sci::Line line, int style)
Definition: Document.cxx:2333
void SetDefaultCharClasses(bool includeWordClass)
Definition: Document.cxx:2187
void DeleteMark(Sci::Line line, int markerNum)
Definition: Document.cxx:357
CharClassify charClass
Definition: Document.h:232
void AddMarkSet(Sci::Line line, int valueSet)
Definition: Document.cxx:344
int DBCSDrawBytes(const char *text, int len) const noexcept
Definition: Document.cxx:1119
bool NextCharacter(Sci::Position &pos, int moveDir) const noexcept
Definition: Document.cxx:842
LineAnnotation * EOLAnnotations() const noexcept
Definition: Document.cxx:219
int GetCharsOfClass(CharClassify::cc characterClass, unsigned char *buffer) const
Definition: Document.cxx:2195
int SCI_METHOD CodePage() const override
Definition: Document.cxx:1005
int SCI_METHOD GetLineIndentation(Sci_Position line) override
Definition: Document.cxx:1495
Sci_Position SCI_METHOD LineFromPosition(Sci_Position pos) const override
Definition: Document.cxx:437
Document::CharacterExtracted CharacterBefore(Sci::Position position) const noexcept
Definition: Document.cxx:883
void SetCaseFolder(CaseFolder *pcf_) noexcept
Definition: Document.cxx:1976
void DeleteMarkFromHandle(int markerHandle)
Definition: Document.cxx:363
std::unique_ptr< LexInterface > pli
Definition: Document.h:258
void EOLAnnotationSetStyle(Sci::Line line, int style)
Definition: Document.cxx:2413
void AnnotationSetStyles(Sci::Line line, const unsigned char *styles)
Definition: Document.cxx:2380
Sci::Position NextWordStart(Sci::Position pos, int delta) const
Find the start of the next word in either a forward (delta >= 0) or backwards direction (delta < 0).
Definition: Document.cxx:1837
Sci::Line LineFromPositionIndex(Sci::Position pos, int lineCharacterIndex) const noexcept
Definition: Document.cxx:475
void Indent(bool forwards, Sci::Line lineBottom, Sci::Line lineTop)
Definition: Document.cxx:1616
Sci_Position SCI_METHOD LineStart(Sci_Position line) const override
Definition: Document.cxx:395
Sci::Position FindText(Sci::Position minPos, Sci::Position maxPos, const char *search, int flags, Sci::Position *length)
Find text in document, supporting both forward and backward searches (just pass minPos > maxPos to do...
Definition: Document.cxx:2004
bool IsDBCSLeadByteNoExcept(char ch) const noexcept
Definition: Document.cxx:1014
Sci::Position ExtendStyleRange(Sci::Position pos, int delta, bool singleLine) noexcept
Definition: Document.cxx:2630
Sci::Position GetColumn(Sci::Position pos)
Definition: Document.cxx:1541
Sci::Line MarkerNext(Sci::Line lineStart, int mask) const noexcept
Definition: Document.cxx:329
bool RemoveWatcher(DocWatcher *watcher, void *userData)
Definition: Document.cxx:2458
bool SCI_METHOD IsDBCSLeadByte(char ch) const override
Definition: Document.cxx:1009
void AllocateLineCharacterIndex(int lineCharacterIndex)
Definition: Document.cxx:2175
void NotifyModified(DocModification mh)
Definition: Document.cxx:2480
EncodingFamily CodePageFamily() const noexcept
Definition: Document.cxx:1179
int SCI_METHOD SetLineState(Sci_Position line, int state) override
Definition: Document.cxx:2296
Sci::Position CountCharacters(Sci::Position startPos, Sci::Position endPos) const noexcept
Definition: Document.cxx:1565
Sci::Position CountUTF16(Sci::Position startPos, Sci::Position endPos) const noexcept
Definition: Document.cxx:1577
double Duration(bool reset=false) noexcept
Return duration as floating point seconds.
Definition: ElapsedPeriod.h:21
Sci::Line firstChangeableLineBefore
Definition: Document.h:162
Sci::Line firstChangeableLineAfter
Definition: Document.h:163
virtual int LineEndTypesSupported()=0
virtual int Version() const =0
virtual void Lex(Sci_PositionU startPos, Sci_Position lengthDoc, int initStyle, IDocument *pAccess)=0
virtual void Fold(Sci_PositionU startPos, Sci_Position lengthDoc, int initStyle, IDocument *pAccess)=0
bool performingStyle
Prevent reentrance.
Definition: Document.h:175
virtual int LineEndTypesSupported()
Definition: Document.cxx:79
bool MultipleStyles(Sci::Line line) const noexcept
Definition: PerLine.cxx:388
const char * Text(Sci::Line line) const noexcept
Definition: PerLine.cxx:402
void SetText(Sci::Line line, const char *text)
Definition: PerLine.cxx:416
int Length(Sci::Line line) const noexcept
Definition: PerLine.cxx:469
void SetStyles(Sci::Line line, const unsigned char *styles)
Definition: PerLine.cxx:447
int Style(Sci::Line line) const noexcept
Definition: PerLine.cxx:395
void SetStyle(Sci::Line line, int style)
Definition: PerLine.cxx:439
const unsigned char * Styles(Sci::Line line) const noexcept
Definition: PerLine.cxx:409
int SetLevel(Sci::Line line, int level, Sci::Line lines)
Definition: PerLine.cxx:259
int GetLevel(Sci::Line line) const noexcept
Definition: PerLine.cxx:273
void DeleteMarkFromHandle(int markerHandle)
Definition: PerLine.cxx:207
int AddMark(Sci::Line line, int markerNum, Sci::Line lines)
Definition: PerLine.cxx:173
bool DeleteMark(Sci::Line line, int markerNum, bool all)
Definition: PerLine.cxx:191
Sci::Line GetMaxLineState() const noexcept
Definition: PerLine.cxx:324
int GetLineState(Sci::Line line)
Definition: PerLine.cxx:317
int SetLineState(Sci::Line line, int state)
Definition: PerLine.cxx:310
void Clear() noexcept
Definition: RESearch.cxx:270
Sci::Position bopat[MAXTAG]
Definition: RESearch.h:35
const char * Compile(const char *pattern, Sci::Position length, bool caseSensitive, bool posix) noexcept
Definition: RESearch.cxx:433
void GrabMatches(const CharacterIndexer &ci)
Definition: RESearch.cxx:278
int Execute(const CharacterIndexer &ci, Sci::Position lp, Sci::Position endp)
Definition: RESearch.cxx:754
Sci::Position eopat[MAXTAG]
Definition: RESearch.h:36
std::string pat[MAXTAG]
Definition: RESearch.h:37
The range class represents a range of text in a document.
Definition: Document.h:29
Sci::Position end
Definition: Document.h:32
Sci::Position start
Definition: Document.h:31
Interface class for regular expression searching.
Definition: Document.h:91
default
Definition: filetypes.c:4
static gchar indent[100]
Definition: editor.c:91
gchar * text
Definition: editor.c:83
gint pos
Definition: editor.c:87
vString * line
Definition: geany_cobol.c:133
unsigned int count
unsigned int max
static bool match(const unsigned char *line, const char *word)
Definition: geany_tcl.c:55
const gchar * chars[][2]
Definition: htmlchars.c:72
ptrdiff_t Position
Definition: Position.h:19
ptrdiff_t Line
Definition: Position.h:20
Styling buffer using one element for each run rather than using a filled buffer.
Definition: Converter.h:9
constexpr int UTF8MaxBytes
Definition: UniConversion.h:13
std::unique_ptr< IDecorationList > DecorationListCreate(bool largeDocument)
Definition: Decoration.cxx:308
constexpr int unicodeReplacementChar
Definition: UniConversion.h:15
constexpr bool UTF8IsAscii(int ch) noexcept
Definition: UniConversion.h:50
unsigned int UTF16FromUTF32Character(unsigned int val, wchar_t *tbuf) noexcept
constexpr bool IsLowerCase(int ch) noexcept
Definition: CharacterSet.h:137
bool UTF8IsSeparator(const unsigned char *us) noexcept
Definition: UniConversion.h:64
constexpr bool IsSpaceOrTab(int ch) noexcept
Definition: PositionCache.h:17
RegexSearchBase * CreateRegexSearch(CharClassify *charClassTable)
Factory function for RegexSearchBase.
Definition: Document.cxx:3308
@ removeAction
Definition: CellBuffer.h:28
@ insertAction
Definition: CellBuffer.h:28
@ containerAction
Definition: CellBuffer.h:28
bool UTF8IsNEL(const unsigned char *us) noexcept
Definition: UniConversion.h:70
constexpr bool IsASCII(int ch) noexcept
Definition: CharacterSet.h:133
constexpr int UTF8SeparatorLength
Definition: UniConversion.h:63
constexpr int UTF8NELLength
Definition: UniConversion.h:69
int UTF8Classify(const unsigned char *us, size_t len) noexcept
constexpr bool IsADigit(int ch) noexcept
Definition: CharacterSet.h:119
int UnicodeFromUTF8(const unsigned char *us) noexcept
Definition: UniConversion.h:33
@ lvSubStyles
Definition: ILexer.h:49
std::wstring WStringFromUTF8(const char *s, size_t len)
constexpr int LevelNumber(int level) noexcept
Definition: Document.h:167
EncodingFamily
Definition: Document.h:21
constexpr bool UTF8IsTrailByte(unsigned char ch) noexcept
Definition: UniConversion.h:46
const unsigned char UTF8BytesOfLead[256]
constexpr bool isspacechar(int ch) noexcept
Check if a character is a space.
Definition: CharacterSet.h:160
constexpr bool IsUpperCase(int ch) noexcept
Definition: CharacterSet.h:141
static GeanyIndentPrefs indentation
Definition: project.c:57
gint position[2]
Definition: search.c:120
long bytes
Definition: stats.c:32
long lines
Definition: stats.c:32
Used to pair watcher pointer with user data.
Definition: Document.h:218