Skip to content

Commit

Permalink
Simplify HTML lexer.
Browse files Browse the repository at this point in the history
  • Loading branch information
zufuliu committed Jan 4, 2025
1 parent a483bcc commit d791d68
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 33 deletions.
53 changes: 23 additions & 30 deletions scintilla/lexers/LexHTML.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -151,9 +151,10 @@ constexpr bool isCommentASPState(int state) noexcept {
|| state == SCE_HB_COMMENTLINE;
}

bool classifyAttribHTML(script_mode inScriptType, Sci_PositionU start, Sci_PositionU end, const WordList &keywords, LexAccessor &styler) {
bool classifyAttribHTML(script_mode inScriptType, Sci_PositionU end, const WordList &keywords, LexAccessor &styler) {
int chAttr = SCE_H_ATTRIBUTEUNKNOWN;
bool isLanguageType = false;
const Sci_PositionU start = styler.GetStartSegment();
if (IsNumberChar(styler[start])) {
chAttr = SCE_H_NUMBER;
} else {
Expand Down Expand Up @@ -183,15 +184,15 @@ bool isHTMLCustomElement(const char *tag, size_t length, bool dashColon) noexcep
return dashColon;
}

int classifyTagHTML(Sci_PositionU start, Sci_PositionU end, const WordList &keywords, LexAccessor &styler, bool &tagDontFold,
bool isXml, bool allowScripts) {
int classifyTagHTML(Sci_PositionU end, const WordList &keywords, LexAccessor &styler, bool &tagDontFold, bool isXml, bool allowScripts) {
char withSpace[126 + 2];
withSpace[0] = ' ';
withSpace[1] = '\0';
const char *tag = withSpace + 1;
// Copy after the '<' and stop before space
Sci_PositionU i = 1;
bool dashColon = false;
const Sci_PositionU start = styler.GetStartSegment();
for (Sci_PositionU cPos = start; cPos < end && i < sizeof(withSpace) - 2; cPos++) {
const char ch = styler[cPos];
if (static_cast<unsigned char>(ch) <= ' ') {
Expand Down Expand Up @@ -257,20 +258,20 @@ int classifyTagHTML(Sci_PositionU start, Sci_PositionU end, const WordList &keyw
return chAttr;
}

void classifyWordHTJS(Sci_PositionU start, Sci_PositionU end, const WordList &keywords, LexAccessor &styler, script_mode inScriptType) {
void classifyWordHTJS(Sci_PositionU end, const WordList &keywords, LexAccessor &styler, script_mode inScriptType) {
char s[31 + 1];
styler.GetRange(start, end, s, sizeof(s));
styler.GetRange(styler.GetStartSegment(), end, s, sizeof(s));
int chAttr = SCE_HJ_WORD;
if (keywords.InList(s)) {
chAttr = SCE_HJ_KEYWORD;
}
styler.ColorTo(end, statePrintForState(chAttr, inScriptType));
}

int classifyWordHTVB(Sci_PositionU start, Sci_PositionU end, const WordList &keywords, LexAccessor &styler, script_mode inScriptType) {
int classifyWordHTVB(Sci_PositionU end, const WordList &keywords, LexAccessor &styler, script_mode inScriptType) {
int chAttr = SCE_HB_IDENTIFIER;
char s[32];
styler.GetRangeLowered(start, end, s, sizeof(s));
styler.GetRangeLowered(styler.GetStartSegment(), end, s, sizeof(s));
if (keywords.InList(s)) {
chAttr = SCE_HB_WORD;
if (StrEqual(s, "rem"))
Expand All @@ -283,16 +284,6 @@ int classifyWordHTVB(Sci_PositionU start, Sci_PositionU end, const WordList &key
return SCE_HB_DEFAULT;
}

bool isWordHSGML(Sci_PositionU start, Sci_PositionU end, const WordList &keywords, const LexAccessor &styler) noexcept {
char s[15 + 1];
styler.GetRange(start, end, s, sizeof(s));
return keywords.InList(s);
}

inline bool isWordCdata(Sci_PositionU start, LexAccessor &styler) noexcept {
return styler.Match(start, "[CDATA[");
}

// Return the first state to reach when entering a scripting language
constexpr int StateForScript(script_type scriptLanguage) noexcept {
switch (scriptLanguage) {
Expand Down Expand Up @@ -631,7 +622,7 @@ void ColouriseHyperTextDoc(Sci_PositionU startPos, Sci_Position length, int init
ch = '-';
}
}
} else if (isWordCdata(i + 1, styler)) {
} else if (styler.Match(i + 1, "[CDATA[")) {
state = SCE_H_CDATA;
} else {
styler.ColorTo(i + 1, SCE_H_SGML_DEFAULT); // <! is default
Expand All @@ -655,10 +646,10 @@ void ColouriseHyperTextDoc(Sci_PositionU startPos, Sci_Position length, int init
// Bounce out of any ASP mode
switch (state) {
case SCE_HJ_WORD:
classifyWordHTJS(styler.GetStartSegment(), i, keywordLists[KeywordIndex_JavaScript], styler, inScriptType);
classifyWordHTJS(i, keywordLists[KeywordIndex_JavaScript], styler, inScriptType);
break;
case SCE_HB_WORD:
classifyWordHTVB(styler.GetStartSegment(), i, keywordLists[KeywordIndex_VBScript], styler, inScriptType);
classifyWordHTVB(i, keywordLists[KeywordIndex_VBScript], styler, inScriptType);
break;
case SCE_H_XCCOMMENT:
styler.ColorTo(i, state);
Expand Down Expand Up @@ -756,7 +747,9 @@ void ColouriseHyperTextDoc(Sci_PositionU startPos, Sci_Position length, int init
break;
case SCE_H_SGML_COMMAND:
if (!IsSGMLWordChar(ch)) {
if (isWordHSGML(styler.GetStartSegment(), i, keywordLists[KeywordIndex_SGML], styler)) {
char s[15 + 1];
styler.GetRange(styler.GetStartSegment(), i, s, sizeof(s));
if (keywordLists[KeywordIndex_SGML].InList(s)) {
styler.ColorTo(i, StateToPrint);
state = SCE_H_SGML_1ST_PARAM;
} else {
Expand Down Expand Up @@ -861,8 +854,7 @@ void ColouriseHyperTextDoc(Sci_PositionU startPos, Sci_Position length, int init
break;
case SCE_H_TAGUNKNOWN:
if (!IsTagContinue(ch) && !((ch == '/') && (chPrev == '<'))) {
int eClass = classifyTagHTML(styler.GetStartSegment(),
i, keywordLists[KeywordIndex_Tag], styler, tagDontFold, isXml, allowScripts);
int eClass = classifyTagHTML(i, keywordLists[KeywordIndex_Tag], styler, tagDontFold, isXml, allowScripts);
if (eClass == SCE_H_SCRIPT || eClass == SCE_H_COMMENT) {
if (!tagClosing) {
inScriptType = eNonHtmlScript;
Expand Down Expand Up @@ -919,7 +911,7 @@ void ColouriseHyperTextDoc(Sci_PositionU startPos, Sci_Position length, int init
break;
case SCE_H_ATTRIBUTE:
if (!IsAttributeContinue(ch)) {
isLanguageType = classifyAttribHTML(inScriptType, styler.GetStartSegment(), i, keywordLists[KeywordIndex_Attribute], styler);
isLanguageType = classifyAttribHTML(inScriptType, i, keywordLists[KeywordIndex_Attribute], styler);
if (ch == '>') {
styler.ColorTo(i + 1, SCE_H_TAG);
if (inScriptType == eNonHtmlScript) {
Expand Down Expand Up @@ -1094,7 +1086,7 @@ void ColouriseHyperTextDoc(Sci_PositionU startPos, Sci_Position length, int init
if (state == SCE_HJ_NUMBER) {
styler.ColorTo(i, statePrintForState(SCE_HJ_NUMBER, inScriptType));
} else {
classifyWordHTJS(styler.GetStartSegment(), i, keywordLists[KeywordIndex_JavaScript], styler, inScriptType);
classifyWordHTJS(i, keywordLists[KeywordIndex_JavaScript], styler, inScriptType);
}
state = SCE_HJ_DEFAULT;
}
Expand Down Expand Up @@ -1201,7 +1193,7 @@ void ColouriseHyperTextDoc(Sci_PositionU startPos, Sci_Position length, int init
styler.ColorTo(i, statePrintForState(SCE_HB_NUMBER, inScriptType));
state = SCE_HB_DEFAULT;
} else {
state = classifyWordHTVB(styler.GetStartSegment(), i, keywordLists[KeywordIndex_VBScript], styler, inScriptType);
state = classifyWordHTVB(i, keywordLists[KeywordIndex_VBScript], styler, inScriptType);
}
}
break;
Expand Down Expand Up @@ -1272,15 +1264,16 @@ void ColouriseHyperTextDoc(Sci_PositionU startPos, Sci_Position length, int init

switch (state) {
case SCE_HJ_WORD:
classifyWordHTJS(styler.GetStartSegment(), lengthDoc, keywordLists[KeywordIndex_JavaScript], styler, inScriptType);
classifyWordHTJS(lengthDoc, keywordLists[KeywordIndex_JavaScript], styler, inScriptType);
break;
case SCE_HB_WORD:
classifyWordHTVB(styler.GetStartSegment(), lengthDoc, keywordLists[KeywordIndex_VBScript], styler, inScriptType);
classifyWordHTVB(lengthDoc, keywordLists[KeywordIndex_VBScript], styler, inScriptType);
break;
default:
StateToPrint = statePrintForState(state, inScriptType);
if (static_cast<Sci_Position>(styler.GetStartSegment()) < lengthDoc)
if (static_cast<Sci_Position>(styler.GetStartSegment()) < lengthDoc) {
StateToPrint = statePrintForState(state, inScriptType);
styler.ColorTo(lengthDoc, StateToPrint);
}
break;
}

Expand Down
3 changes: 2 additions & 1 deletion scintilla/src/PositionCache.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,8 @@ void LineLayout::AddLineStart(Sci::Position start) {
const int newMaxLines = lines + 20;
std::unique_ptr<int[]> newLineStarts = std::make_unique<int[]>(newMaxLines);
if (lenLineStarts) {
std::copy(lineStarts.get(), lineStarts.get() + lenLineStarts, newLineStarts.get());
//std::copy_n(lineStarts.get(), lenLineStarts, newLineStarts.get());
memcpy(newLineStarts.get(), lineStarts.get(), lenLineStarts*sizeof(int));
}
lineStarts = std::move(newLineStarts);
lenLineStarts = newMaxLines;
Expand Down
3 changes: 1 addition & 2 deletions scintilla/src/SplitVector.h
Original file line number Diff line number Diff line change
Expand Up @@ -263,9 +263,8 @@ class SplitVector {
memset(ptr, 0, insertLength*sizeof(T));
} else {
static_assert(std::is_nothrow_default_constructible_v<T>);
for (ptrdiff_t elem = 0; elem < insertLength; elem++) {
for (ptrdiff_t elem = 0; elem < insertLength; elem++, ptr++) {
::new (ptr)T();
ptr++;
}
}
lengthBody += insertLength;
Expand Down

0 comments on commit d791d68

Please sign in to comment.