From 5dd04adade89b32454b03bd5b12985a3a5ac439d Mon Sep 17 00:00:00 2001 From: Dylan Piercey Date: Thu, 11 Jun 2026 15:37:30 -0700 Subject: [PATCH 1/2] perf: refactor states to be able to parse multiple characters --- .changeset/petite-signs-dance.md | 5 + .../attr-grouped-no-whitespace.expected.txt | 14 + .../attr-grouped-no-whitespace/input.marko | 2 + .../regexp-escaped-newline.expected.txt | 4 + .../regexp-escaped-newline/input.marko | 2 + src/__tests__/validate.test.ts | 16 + src/core/Parser.ts | 37 +- src/states/ATTRIBUTE.ts | 177 +++---- src/states/BEGIN_DELIMITED_HTML_BLOCK.ts | 50 +- src/states/CDATA.ts | 28 +- src/states/CLOSE_TAG.ts | 26 +- src/states/CONCISE_HTML_CONTENT.ts | 71 ++- src/states/DECLARATION.ts | 31 +- src/states/DTD.ts | 25 +- src/states/EXPRESSION.ts | 435 +++++++++-------- src/states/HTML_COMMENT.ts | 33 +- src/states/HTML_CONTENT.ts | 231 +++++---- src/states/INLINE_SCRIPT.ts | 10 +- src/states/JS_COMMENT_BLOCK.ts | 28 +- src/states/JS_COMMENT_LINE.ts | 38 +- src/states/OPEN_TAG.ts | 459 +++++++++--------- src/states/PARSED_STRING.ts | 38 +- src/states/PARSED_TEXT_CONTENT.ts | 133 +++-- src/states/PLACEHOLDER.ts | 22 +- src/states/REGULAR_EXPRESSION.ts | 67 +-- src/states/STRING.ts | 30 +- src/states/TAG_NAME.ts | 82 ++-- src/states/TEMPLATE_STRING.ts | 45 +- src/util/validators.ts | 61 ++- 29 files changed, 1223 insertions(+), 977 deletions(-) create mode 100644 .changeset/petite-signs-dance.md create mode 100644 src/__tests__/fixtures/attr-grouped-no-whitespace/__snapshots__/attr-grouped-no-whitespace.expected.txt create mode 100644 src/__tests__/fixtures/attr-grouped-no-whitespace/input.marko create mode 100644 src/__tests__/fixtures/regexp-escaped-newline/__snapshots__/regexp-escaped-newline.expected.txt create mode 100644 src/__tests__/fixtures/regexp-escaped-newline/input.marko diff --git a/.changeset/petite-signs-dance.md b/.changeset/petite-signs-dance.md new file mode 100644 index 00000000..b353956a --- /dev/null +++ b/.changeset/petite-signs-dance.md @@ -0,0 +1,5 @@ +--- +"htmljs-parser": patch +--- + +Refactor parser to allow individual states to process multiple characters. This allows for eager scanning, simplifies things some, and improves performance by about 30% in realworld tempaltes. diff --git a/src/__tests__/fixtures/attr-grouped-no-whitespace/__snapshots__/attr-grouped-no-whitespace.expected.txt b/src/__tests__/fixtures/attr-grouped-no-whitespace/__snapshots__/attr-grouped-no-whitespace.expected.txt new file mode 100644 index 00000000..264a4afc --- /dev/null +++ b/src/__tests__/fixtures/attr-grouped-no-whitespace/__snapshots__/attr-grouped-no-whitespace.expected.txt @@ -0,0 +1,14 @@ +1╭─ div[a=1 b] + │ │ │││ ╰─ attrName + │ │ ││╰─ attrValue.value + │ │ │╰─ attrValue "=1" + │ │ ╰─ attrName + ╰─ ╰─ tagName "div" +2╭─ span[c] + │ │ │ ╰─ attrName + │ │ ╰─ tagName "span" + ╰─ ╰─ openTagEnd +3╭─ + │ ├─ openTagEnd + │ ├─ closeTagEnd(div) + ╰─ ╰─ closeTagEnd(span) \ No newline at end of file diff --git a/src/__tests__/fixtures/attr-grouped-no-whitespace/input.marko b/src/__tests__/fixtures/attr-grouped-no-whitespace/input.marko new file mode 100644 index 00000000..34a74905 --- /dev/null +++ b/src/__tests__/fixtures/attr-grouped-no-whitespace/input.marko @@ -0,0 +1,2 @@ +div[a=1 b] + span[c] diff --git a/src/__tests__/fixtures/regexp-escaped-newline/__snapshots__/regexp-escaped-newline.expected.txt b/src/__tests__/fixtures/regexp-escaped-newline/__snapshots__/regexp-escaped-newline.expected.txt new file mode 100644 index 00000000..a1419ab9 --- /dev/null +++ b/src/__tests__/fixtures/regexp-escaped-newline/__snapshots__/regexp-escaped-newline.expected.txt @@ -0,0 +1,4 @@ +1╭─ $ const a = /b\ + ╰─ ╰─ error(INVALID_REGULAR_EXPRESSION:EOL reached while parsing regular expression) +2├─ c/; +3╰─ \ No newline at end of file diff --git a/src/__tests__/fixtures/regexp-escaped-newline/input.marko b/src/__tests__/fixtures/regexp-escaped-newline/input.marko new file mode 100644 index 00000000..935482b5 --- /dev/null +++ b/src/__tests__/fixtures/regexp-escaped-newline/input.marko @@ -0,0 +1,2 @@ +$ const a = /b\ + c/; diff --git a/src/__tests__/validate.test.ts b/src/__tests__/validate.test.ts index 59f8f1a6..d2652da3 100644 --- a/src/__tests__/validate.test.ts +++ b/src/__tests__/validate.test.ts @@ -26,6 +26,14 @@ describe("validation helpers", () => { it("rejects mismatched closing groups", () => { assert.equal(isValidStatement(")"), 0); }); + + it("treats newlines in template literals as unguarded", () => { + assert.equal(isValidStatement("`foo\nbar`"), 1); + }); + + it("treats newlines in enclosed template literals as guarded", () => { + assert.equal(isValidStatement("(`foo\nbar`)"), 2); + }); }); describe("isValidScriptlet", () => { @@ -102,5 +110,13 @@ describe("validation helpers", () => { it("accepts continued multiline enclosed logical expression", () => { assert.equal(isValidAttrValue("a && (\nb\n)", true), 2); }); + + it("accepts keyword operator operand ending the input", () => { + assert.equal(isValidAttrValue("a as b", true), 2); + }); + + it("rejects keyword operator with no operand", () => { + assert.equal(isValidAttrValue("a as ", true), 0); + }); }); }); diff --git a/src/core/Parser.ts b/src/core/Parser.ts index 6e0ff88e..6b238142 100644 --- a/src/core/Parser.ts +++ b/src/core/Parser.ts @@ -23,9 +23,7 @@ export interface StateDefinition

{ pos: number, ) => Partial

; exit: (this: Parser, activeRange: P) => void; - char: (this: Parser, code: number, activeRange: P) => void; - eol: (this: Parser, length: number, activeRange: P) => void; - eof: (this: Parser, activeRange: P) => void; + parse: (this: Parser, data: string, maxPos: number, activeRange: P) => void; return: (this: Parser, child: Meta, activeRange: P) => void; } @@ -35,7 +33,6 @@ export class Parser { declare public data: string; declare public activeState: StateDefinition; declare public activeRange: Meta; - declare public forward: number; declare public activeTag: STATE.OpenTagMeta | undefined; // Used to reference the closest open tag declare public activeAttr: STATE.AttrMeta | undefined; // Used to reference the current attribute that is being parsed declare public indent: string; // Used to build the indent for the current concise line @@ -79,7 +76,6 @@ export class Parser { const { activeRange, activeState } = this; const parent = (this.activeRange = activeRange.parent); this.activeState = parent.state; - this.forward = 0; activeRange.end = this.pos; activeState.exit.call(this, activeRange); this.activeState.return.call(this, activeRange, parent); @@ -215,7 +211,6 @@ export class Parser { if (this.lookAheadFor(str, cur)) { this.pos = cur; - if (this.forward > 1) this.forward = 1; return true; } @@ -292,10 +287,12 @@ export class Parser { this.data = data; this.indent = ""; this.textPos = -1; - this.forward = 1; this.isConcise = true; this.beginMixedMode = this.endingMixedModeAtEOL = false; this.lines = this.activeTag = this.activeAttr = undefined; + // Drop any state left over from a previous parse so reusing a parser + // does not chain (and retain) the old state metas via parent references. + this.activeRange = undefined as unknown as Meta; // Skip the byte order mark (BOM) sequence // at the beginning of the file if there is one: @@ -304,30 +301,8 @@ export class Parser { this.pos = data.charCodeAt(0) === 0xfeff ? 1 : 0; this.enterState(STATE.CONCISE_HTML_CONTENT); - while (this.pos < maxPos) { - const code = data.charCodeAt(this.pos); - - if (code === CODE.NEWLINE) { - this.forward = 1; - this.activeState.eol.call(this, 1, this.activeRange); - } else if ( - code === CODE.CARRIAGE_RETURN && - data.charCodeAt(this.pos + 1) === CODE.NEWLINE - ) { - this.forward = 2; - this.activeState.eol.call(this, 2, this.activeRange); - } else { - this.forward = 1; - this.activeState.char.call(this, code, this.activeRange); - } - - this.pos += this.forward; - } - - while (this.pos === this.maxPos) { - this.forward = 1; - this.activeState.eof.call(this, this.activeRange); - if (this.forward !== 0) break; + while (this.pos <= maxPos) { + this.activeState.parse.call(this, data, maxPos, this.activeRange); } } } diff --git a/src/states/ATTRIBUTE.ts b/src/states/ATTRIBUTE.ts index 080e7b50..9beb47fa 100644 --- a/src/states/ATTRIBUTE.ts +++ b/src/states/ATTRIBUTE.ts @@ -58,94 +58,109 @@ export const ATTRIBUTE: StateDefinition = { this.activeAttr = undefined; }, - char(code, attr) { - if (isWhitespaceCode(code)) { - return; - } else if ( - code === CODE.EQUAL || - (code === CODE.COLON && this.lookAtCharCodeAhead(1) === CODE.EQUAL) || - (code === CODE.PERIOD && this.lookAheadFor("..")) - ) { - attr.valueStart = this.pos; - this.forward = 0; - - if (code === CODE.COLON) { - ensureAttrName(this, attr); - attr.bound = true; - this.pos += 2; // skip := - this.consumeWhitespace(); - } else if (code === CODE.PERIOD) { - attr.spread = true; - this.pos += 3; // skip ... - } else { - ensureAttrName(this, attr); - this.pos++; // skip = - this.consumeWhitespace(); + parse(data, maxPos, attr) { + while (this.pos < maxPos) { + const code = data.charCodeAt(this.pos); + + if (code === CODE.NEWLINE || code === CODE.CARRIAGE_RETURN) { + if (this.isConcise) { + this.exitState(); + return; // parent handles newline + } + this.pos += + code === CODE.CARRIAGE_RETURN && + data.charCodeAt(this.pos + 1) === CODE.NEWLINE + ? 2 + : 1; + continue; } - attr.stage = ATTR_STAGE.VALUE; - const expr = this.enterState(STATE.EXPRESSION); - expr.operators = true; - expr.terminatedByWhitespace = true; - expr.shouldTerminate = this.isConcise - ? this.activeTag!.stage === TAG_STAGE.ATTR_GROUP - ? shouldTerminateConciseGroupedAttrValue - : shouldTerminateConciseAttrValue - : shouldTerminateHtmlAttrValue; - } else if (code === CODE.OPEN_PAREN) { - ensureAttrName(this, attr); - attr.stage = ATTR_STAGE.ARGUMENT; - this.pos++; // skip ( - this.forward = 0; - this.enterState(STATE.EXPRESSION).shouldTerminate = matchesCloseParen; - } else if ( - code === CODE.OPEN_ANGLE_BRACKET && - attr.stage === ATTR_STAGE.NAME - ) { - attr.stage = ATTR_STAGE.TYPE_PARAMS; - this.pos++; // skip < - this.forward = 0; - const expr = this.enterState(STATE.EXPRESSION); - expr.inType = true; - expr.forceType = true; - expr.shouldTerminate = matchesCloseAngleBracket; - } else if (code === CODE.OPEN_CURLY_BRACE && attr.args) { - ensureAttrName(this, attr); - attr.stage = ATTR_STAGE.BLOCK; - this.pos++; // skip { - this.forward = 0; - this.enterState(STATE.EXPRESSION).shouldTerminate = - matchesCloseCurlyBrace; - } else if (attr.stage === ATTR_STAGE.UNKNOWN) { - if (code === CODE.OPEN_ANGLE_BRACKET) { - return this.emitError( - this.pos, - ErrorCode.INVALID_ATTRIBUTE_NAME, - 'Invalid attribute name. Attribute name cannot begin with the "<" character.', - ); + if (isWhitespaceCode(code)) { + this.pos++; + continue; } - attr.stage = ATTR_STAGE.NAME; - this.forward = 0; - const expr = this.enterState(STATE.EXPRESSION); - expr.terminatedByWhitespace = true; - expr.shouldTerminate = this.isConcise - ? this.activeTag!.stage === TAG_STAGE.ATTR_GROUP - ? shouldTerminateConciseGroupedAttrName - : shouldTerminateConciseAttrName - : shouldTerminateHtmlAttrName; - } else { - this.exitState(); - } - }, + if ( + code === CODE.EQUAL || + (code === CODE.COLON && data.charCodeAt(this.pos + 1) === CODE.EQUAL) || + (code === CODE.PERIOD && this.lookAheadFor("..")) + ) { + attr.valueStart = this.pos; - eol() { - if (this.isConcise) { - this.exitState(); + if (code === CODE.COLON) { + ensureAttrName(this, attr); + attr.bound = true; + this.pos += 2; // skip := + this.consumeWhitespace(); + } else if (code === CODE.PERIOD) { + attr.spread = true; + this.pos += 3; // skip ... + } else { + ensureAttrName(this, attr); + this.pos++; // skip = + this.consumeWhitespace(); + } + + attr.stage = ATTR_STAGE.VALUE; + const expr = this.enterState(STATE.EXPRESSION); + expr.operators = true; + expr.terminatedByWhitespace = true; + expr.shouldTerminate = this.isConcise + ? this.activeTag!.stage === TAG_STAGE.ATTR_GROUP + ? shouldTerminateConciseGroupedAttrValue + : shouldTerminateConciseAttrValue + : shouldTerminateHtmlAttrValue; + return; + } else if (code === CODE.OPEN_PAREN) { + ensureAttrName(this, attr); + attr.stage = ATTR_STAGE.ARGUMENT; + this.pos++; // skip ( + this.enterState(STATE.EXPRESSION).shouldTerminate = matchesCloseParen; + return; + } else if ( + code === CODE.OPEN_ANGLE_BRACKET && + attr.stage === ATTR_STAGE.NAME + ) { + attr.stage = ATTR_STAGE.TYPE_PARAMS; + this.pos++; // skip < + const expr = this.enterState(STATE.EXPRESSION); + expr.inType = true; + expr.forceType = true; + expr.shouldTerminate = matchesCloseAngleBracket; + return; + } else if (code === CODE.OPEN_CURLY_BRACE && attr.args) { + ensureAttrName(this, attr); + attr.stage = ATTR_STAGE.BLOCK; + this.pos++; // skip { + this.enterState(STATE.EXPRESSION).shouldTerminate = + matchesCloseCurlyBrace; + return; + } else if (attr.stage === ATTR_STAGE.UNKNOWN) { + if (code === CODE.OPEN_ANGLE_BRACKET) { + return this.emitError( + this.pos, + ErrorCode.INVALID_ATTRIBUTE_NAME, + 'Invalid attribute name. Attribute name cannot begin with the "<" character.', + ); + } + + attr.stage = ATTR_STAGE.NAME; + // Don't advance pos: EXPRESSION starts at current char + const expr = this.enterState(STATE.EXPRESSION); + expr.terminatedByWhitespace = true; + expr.shouldTerminate = this.isConcise + ? this.activeTag!.stage === TAG_STAGE.ATTR_GROUP + ? shouldTerminateConciseGroupedAttrName + : shouldTerminateConciseAttrName + : shouldTerminateHtmlAttrName; + return; + } else { + this.exitState(); + return; + } } - }, - eof(attr) { + // EOF if (this.isConcise) { this.exitState(); } else { diff --git a/src/states/BEGIN_DELIMITED_HTML_BLOCK.ts b/src/states/BEGIN_DELIMITED_HTML_BLOCK.ts index b0655bac..bf0928c2 100644 --- a/src/states/BEGIN_DELIMITED_HTML_BLOCK.ts +++ b/src/states/BEGIN_DELIMITED_HTML_BLOCK.ts @@ -33,28 +33,48 @@ export const BEGIN_DELIMITED_HTML_BLOCK: StateDefinition exit() {}, - char(code, block) { - if (code === CODE.HYPHEN) { - block.delimiter += "-"; - } else { + parse(data, maxPos, block) { + if (this.pos === maxPos) { + htmlEOF.call(this); + this.pos++; + return; + } + + while (this.pos < maxPos) { + const code = data.charCodeAt(this.pos); + + if (code === CODE.NEWLINE || code === CODE.CARRIAGE_RETURN) { + const len = + code === CODE.CARRIAGE_RETURN && + data.charCodeAt(this.pos + 1) === CODE.NEWLINE + ? 2 + : 1; + const prevPos = this.pos; + this.beginHtmlBlock(block.delimiter, false); + handleDelimitedBlockEOL(this, true, len, block); + if (this.pos === prevPos) this.pos += len; // advance past newline if not already advanced + return; + } + + if (code === CODE.HYPHEN) { + block.delimiter += "-"; + this.pos++; + continue; + } + + // Non-hyphen, non-newline: check if whitespace-only remains on line const startPos = this.pos; if (!this.consumeWhitespaceOnLine()) { + // Non-whitespace content on this line: start single-line HTML block this.pos = startPos + 1; - this.forward = 0; this.beginHtmlBlock(undefined, true); + return; } + // Only whitespace to EOL: consumeWhitespaceOnLine set pos to newline + // Continue to let the newline trigger EOL handling above } }, - eol(len, block) { - // We have reached the end of the first delimiter... we need to skip over any indentation on the next - // line and we might also find that the multi-line, delimited block is immediately ended - this.beginHtmlBlock(block.delimiter, false); - handleDelimitedBlockEOL(this, true, len, block); - }, - - eof: htmlEOF, - return() {}, }; @@ -99,7 +119,6 @@ function handleDelimitedBlockEOL( if (parser.lookAheadFor(endHtmlBlockLookahead, parser.pos + newLineLength)) { parser.endText(); parser.pos += newLineLength + endHtmlBlockLookahead.length; - parser.forward = 0; if (parser.consumeWhitespaceOnLine(0)) { parser.exitState(); @@ -119,7 +138,6 @@ function handleDelimitedBlockEOL( // is any indentation that we need to skip over as we continue parsing the HTML in this // multiline HTML block parser.pos += indent.length; - parser.forward = 0; parser.startText(); // We stay in the same state since we are still parsing a multiline, delimited HTML block } else if (indent && !parser.onlyWhitespaceRemainsOnLine(newLineLength)) { diff --git a/src/states/CDATA.ts b/src/states/CDATA.ts index 4fceb28b..de8d912a 100644 --- a/src/states/CDATA.ts +++ b/src/states/CDATA.ts @@ -1,4 +1,4 @@ -import { CODE, ErrorCode, Parser, type StateDefinition } from "../internal"; +import { ErrorCode, Parser, type StateDefinition } from "../internal"; // We enter STATE.CDATA after we see "")) { - this.pos += 3; // skip ]]> - this.exitState(); - return; + parse(data, maxPos, cdata) { + const idx = data.indexOf("]]>", this.pos); + if (idx === -1) { + return this.emitError( + cdata, + ErrorCode.MALFORMED_CDATA, + "EOF reached while parsing CDATA", + ); } - }, - - eol() {}, - eof(cdata) { - this.emitError( - cdata, - ErrorCode.MALFORMED_CDATA, - "EOF reached while parsing CDATA", - ); + this.pos = idx + 3; // skip ]]> + this.exitState(); }, return() {}, @@ -49,7 +45,7 @@ export function checkForCDATA(parser: Parser) { if (parser.lookAheadFor("![CDATA[")) { parser.endText(); parser.enterState(CDATA); - parser.pos += 8; // skip ![CDATA[ + parser.pos += 9; // skip - this.exitState(); - ensureExpectedCloseTag(this, closeTag); + parse(data, maxPos, closeTag) { + const idx = data.indexOf(">", this.pos); + if (idx === -1) { + return this.emitError( + closeTag, + ErrorCode.MALFORMED_CLOSE_TAG, + "EOF reached while parsing closing tag", + ); } - }, - - eol() {}, - eof(closeTag) { - this.emitError( - closeTag, - ErrorCode.MALFORMED_CLOSE_TAG, - "EOF reached while parsing closing tag", - ); + this.pos = idx + 1; // skip > + this.exitState(); + ensureExpectedCloseTag(this, closeTag); }, return() {}, diff --git a/src/states/CONCISE_HTML_CONTENT.ts b/src/states/CONCISE_HTML_CONTENT.ts index 6a916418..7d75c9e5 100644 --- a/src/states/CONCISE_HTML_CONTENT.ts +++ b/src/states/CONCISE_HTML_CONTENT.ts @@ -25,10 +25,43 @@ export const CONCISE_HTML_CONTENT: StateDefinition = { exit() {}, - char(code) { - if (isWhitespaceCode(code)) { - this.indent += this.data[this.pos]; - } else { + parse(data, maxPos) { + if (this.pos === maxPos) { + htmlEOF.call(this); + this.pos++; + return; + } + + while (this.pos < maxPos) { + const code = data.charCodeAt(this.pos); + + if (code === CODE.NEWLINE || code === CODE.CARRIAGE_RETURN) { + this.indent = ""; + this.pos += + code === CODE.CARRIAGE_RETURN && + data.charCodeAt(this.pos + 1) === CODE.NEWLINE + ? 2 + : 1; + continue; + } + + if (isWhitespaceCode(code)) { + // Eagerly consume the indent up to the end of the line. + const start = this.pos; + let next: number; + do { + this.pos++; + } while ( + this.pos < maxPos && + isWhitespaceCode((next = data.charCodeAt(this.pos))) && + next !== CODE.NEWLINE && + next !== CODE.CARRIAGE_RETURN + ); + this.indent += data.slice(start, this.pos); + continue; + } + + // Non-whitespace character: dispatch based on current indent level const curIndent = this.indent.length; const indentStart = this.pos - curIndent - 1; let parentTag = this.activeTag; @@ -74,38 +107,36 @@ export const CONCISE_HTML_CONTENT: StateDefinition = { switch (code) { case CODE.OPEN_ANGLE_BRACKET: this.beginMixedMode = true; - this.pos--; - this.beginHtmlBlock(undefined, false); + this.beginHtmlBlock(undefined, false); // pos stays at < return; case CODE.DOLLAR: - if (isWhitespaceCode(this.lookAtCharCodeAhead(1))) { - this.pos++; // skip space after $ + if (isWhitespaceCode(data.charCodeAt(this.pos + 1))) { + this.pos++; // skip $, INLINE_SCRIPT starts at space this.enterState(STATE.INLINE_SCRIPT); return; } - break; + break; // fall through to enter OPEN_TAG case CODE.HYPHEN: - if (this.lookAtCharCodeAhead(1) === CODE.HYPHEN) { + if (data.charCodeAt(this.pos + 1) === CODE.HYPHEN) { this.enterState(STATE.BEGIN_DELIMITED_HTML_BLOCK); - this.pos--; + return; // pos stays at the first -, BEGIN_DELIMITED_HTML_BLOCK parses it } else { this.emitError( this.pos, ErrorCode.INVALID_LINE_START, 'A line in concise mode cannot start with a single hyphen. Use "--" instead. See: https://github.com/marko-js/htmljs-parser/issues/43', ); + return; } - return; case CODE.FORWARD_SLASH: - // Check next character to see if we are in a comment - switch (this.lookAtCharCodeAhead(1)) { + switch (data.charCodeAt(this.pos + 1)) { case CODE.FORWARD_SLASH: this.enterState(STATE.JS_COMMENT_LINE); - this.pos++; // skip / + this.pos += 2; // skip // return; case CODE.ASTERISK: this.enterState(STATE.JS_COMMENT_BLOCK); - this.pos++; // skip * + this.pos += 2; // skip /* return; default: this.emitError( @@ -118,16 +149,10 @@ export const CONCISE_HTML_CONTENT: StateDefinition = { } this.enterState(STATE.OPEN_TAG); - this.forward = 0; // START_TAG_NAME expects to start at the first character + return; // pos stays at current char, OPEN_TAG sees it } }, - eol() { - this.indent = ""; - }, - - eof: htmlEOF, - return(child) { this.indent = ""; this.isConcise = true; diff --git a/src/states/DECLARATION.ts b/src/states/DECLARATION.ts index b683ddd8..0c6b7efc 100644 --- a/src/states/DECLARATION.ts +++ b/src/states/DECLARATION.ts @@ -25,24 +25,23 @@ export const DECLARATION: StateDefinition = { exit() {}, - char(code, declaration) { - if (code === CODE.QUESTION) { - if (this.lookAtCharCodeAhead(1) === CODE.CLOSE_ANGLE_BRACKET) { - exitDeclaration(this, declaration, 2); // will skip ?> - } - } else if (code === CODE.CLOSE_ANGLE_BRACKET) { - exitDeclaration(this, declaration, 1); // will skip > + parse(data, maxPos, declaration) { + const idx = data.indexOf(">", this.pos); + if (idx === -1) { + return this.emitError( + declaration, + ErrorCode.MALFORMED_DECLARATION, + "EOF reached while parsing declaration", + ); } - }, - - eol() {}, - eof(declaration) { - this.emitError( - declaration, - ErrorCode.MALFORMED_DECLARATION, - "EOF reached while parsing declaration", - ); + if (idx > this.pos && data.charCodeAt(idx - 1) === CODE.QUESTION) { + this.pos = idx - 1; + exitDeclaration(this, declaration, 2); // skip ?> + } else { + this.pos = idx; + exitDeclaration(this, declaration, 1); // skip > + } }, return() {}, diff --git a/src/states/DTD.ts b/src/states/DTD.ts index 945921d6..87877f3f 100644 --- a/src/states/DTD.ts +++ b/src/states/DTD.ts @@ -1,4 +1,4 @@ -import { CODE, ErrorCode, type StateDefinition } from "../internal"; +import { ErrorCode, type StateDefinition } from "../internal"; // We enter STATE.DTD after we encounter a "". @@ -26,21 +26,18 @@ export const DTD: StateDefinition = { }); }, - char(code) { - if (code === CODE.CLOSE_ANGLE_BRACKET) { - this.pos++; // skip > - this.exitState(); + parse(data, maxPos, documentType) { + const idx = data.indexOf(">", this.pos); + if (idx === -1) { + return this.emitError( + documentType, + ErrorCode.MALFORMED_DOCUMENT_TYPE, + "EOF reached while parsing document type", + ); } - }, - - eol() {}, - eof(documentType) { - this.emitError( - documentType, - ErrorCode.MALFORMED_DOCUMENT_TYPE, - "EOF reached while parsing document type", - ); + this.pos = idx + 1; // skip > + this.exitState(); }, return() {}, diff --git a/src/states/EXPRESSION.ts b/src/states/EXPRESSION.ts index 6e02e694..bb4e4b3f 100644 --- a/src/states/EXPRESSION.ts +++ b/src/states/EXPRESSION.ts @@ -13,6 +13,7 @@ export interface ExpressionMeta extends Meta { groupStack: number[]; operators: boolean; wasComment: boolean; + hadUnguardedNewline: boolean; inType: boolean; forceType: boolean; ternaryDepth: number; @@ -71,6 +72,7 @@ export const EXPRESSION: StateDefinition = { shouldTerminate, operators: false, wasComment: false, + hadUnguardedNewline: false, inType: false, forceType: false, ternaryDepth: 0, @@ -82,245 +84,285 @@ export const EXPRESSION: StateDefinition = { exit() {}, - char(code, expression) { - if (!expression.groupStack.length) { - if (expression.terminatedByWhitespace && isWhitespaceCode(code)) { - if (!checkForOperators(this, expression, false)) { + parse(data, maxPos, expression) { + while (this.pos < maxPos) { + const code = data.charCodeAt(this.pos); + + // EOL handling + if (code === CODE.NEWLINE || code === CODE.CARRIAGE_RETURN) { + const len = + code === CODE.CARRIAGE_RETURN && + data.charCodeAt(this.pos + 1) === CODE.NEWLINE + ? 2 + : 1; + + const prevPos = this.pos; + if ( + !expression.groupStack.length && + (expression.terminatedByEOL || expression.terminatedByWhitespace) && + (expression.wasComment || + !checkForOperators(this, expression, true)) && + !( + expression.consumeIndentedContent && + isIndentCode(data.charCodeAt(prevPos + len)) + ) + ) { + // Don't advance past the newline. this.exitState(); + return; } - return; + + expression.wasComment = false; + if (!expression.groupStack.length) + expression.hadUnguardedNewline = true; + // checkForOperators may have advanced pos; only advance by len if it didn't + if (this.pos === prevPos) this.pos += len; + continue; } - if (expression.shouldTerminate(code, this.data, this.pos, expression)) { - let wasExpression = false; - if (expression.operators) { - const prevNonWhitespacePos = lookBehindWhile( - isWhitespaceCode, - this.data, - this.pos - 1, - ); - if (prevNonWhitespacePos > expression.start) { - wasExpression = - lookBehindForOperator( - expression, - this.data, - prevNonWhitespacePos, - ) !== -1; + // Termination checks (no groupStack) + if (!expression.groupStack.length) { + if (expression.terminatedByWhitespace && isWhitespaceCode(code)) { + if (!checkForOperators(this, expression, false)) { + this.exitState(); + return; } + // checkForOperators already advanced this.pos + continue; } - if (!wasExpression) { - this.exitState(); - return; + if (expression.shouldTerminate(code, data, this.pos, expression)) { + let wasExpression = false; + if (expression.operators) { + const prevNonWhitespacePos = lookBehindWhile( + isWhitespaceCode, + data, + this.pos - 1, + ); + if (prevNonWhitespacePos > expression.start) { + wasExpression = + lookBehindForOperator( + expression, + data, + prevNonWhitespacePos, + ) !== -1; + } + } + + if (!wasExpression) { + this.exitState(); + return; + } } } - } - switch (code) { - case CODE.DOUBLE_QUOTE: - this.enterState(STATE.STRING); - break; - case CODE.SINGLE_QUOTE: - this.enterState(STATE.STRING).quoteCharCode = code; - break; - case CODE.BACKTICK: - this.enterState(STATE.TEMPLATE_STRING); - break; - case CODE.QUESTION: - if (expression.operators && !expression.groupStack.length) { - expression.ternaryDepth++; + switch (code) { + case CODE.DOUBLE_QUOTE: + this.enterState(STATE.STRING); + this.pos++; // skip " + return; + case CODE.SINGLE_QUOTE: + this.enterState(STATE.STRING).quoteCharCode = code; + this.pos++; // skip ' + return; + case CODE.BACKTICK: + this.enterState(STATE.TEMPLATE_STRING); + this.pos++; // skip ` + return; + case CODE.QUESTION: + if (expression.operators && !expression.groupStack.length) { + expression.ternaryDepth++; + this.pos++; // skip ? + this.consumeWhitespace(); + continue; + } this.pos++; - this.forward = 0; - this.consumeWhitespace(); - } - break; - case CODE.COLON: - if (expression.operators && !expression.groupStack.length) { - if (expression.ternaryDepth) { - expression.ternaryDepth--; - } else { - expression.inType = true; + break; + case CODE.COLON: + if (expression.operators && !expression.groupStack.length) { + if (expression.ternaryDepth) { + expression.ternaryDepth--; + } else { + expression.inType = true; + } + this.pos++; // skip : + this.consumeWhitespace(); + continue; } - this.pos++; - this.forward = 0; - this.consumeWhitespace(); - } - break; - case CODE.EQUAL: - if (expression.operators) { - if (this.lookAtCharCodeAhead(1) === CODE.CLOSE_ANGLE_BRACKET) { - if ( - expression.inType && - !expression.forceType && - this.getPreviousNonWhitespaceCharCode() !== CODE.CLOSE_PAREN + break; + case CODE.EQUAL: + if (expression.operators) { + if (data.charCodeAt(this.pos + 1) === CODE.CLOSE_ANGLE_BRACKET) { + if ( + expression.inType && + !expression.forceType && + this.getPreviousNonWhitespaceCharCode() !== CODE.CLOSE_PAREN + ) { + expression.inType = false; + } + this.pos++; // skip =, outer iteration handles > + } else if ( + !(expression.forceType || expression.groupStack.length) ) { expression.inType = false; } - this.pos++; - } else if (!(expression.forceType || expression.groupStack.length)) { - expression.inType = false; + this.pos++; // skip = (or the char after =>) + this.consumeWhitespace(); + continue; } - this.pos++; - this.forward = 0; - this.consumeWhitespace(); - } - break; - case CODE.FORWARD_SLASH: - // Check next character to see if we are in a comment or regexp - switch (this.lookAtCharCodeAhead(1)) { - case CODE.FORWARD_SLASH: - this.enterState(STATE.JS_COMMENT_LINE); + break; + case CODE.FORWARD_SLASH: + switch (data.charCodeAt(this.pos + 1)) { + case CODE.FORWARD_SLASH: + this.enterState(STATE.JS_COMMENT_LINE); + this.pos += 2; // skip // + return; + case CODE.ASTERISK: + this.enterState(STATE.JS_COMMENT_BLOCK); + this.pos += 2; // skip /* + return; + default: + if (canFollowDivision(this.getPreviousNonWhitespaceCharCode())) { + this.pos++; + this.consumeWhitespace(); + continue; + } else { + this.enterState(STATE.REGULAR_EXPRESSION); + this.pos++; // skip /, REGULAR_EXPRESSION starts after + return; + } + } + case CODE.OPEN_PAREN: + expression.groupStack.push(CODE.CLOSE_PAREN); + this.pos++; + break; + case CODE.OPEN_SQUARE_BRACKET: + expression.groupStack.push(CODE.CLOSE_SQUARE_BRACKET); + this.pos++; + break; + case CODE.OPEN_CURLY_BRACE: + expression.groupStack.push(CODE.CLOSE_CURLY_BRACE); + this.pos++; + break; + case CODE.OPEN_ANGLE_BRACKET: + if (expression.inType) { + expression.groupStack.push(CODE.CLOSE_ANGLE_BRACKET); this.pos++; - break; - case CODE.ASTERISK: - this.enterState(STATE.JS_COMMENT_BLOCK); + } else if (expression.operators && !expression.groupStack.length) { this.pos++; - break; - default: { - if (canFollowDivision(this.getPreviousNonWhitespaceCharCode())) { + this.consumeWhitespace(); + continue; + } else { + this.pos++; + } + break; + + case CODE.CLOSE_PAREN: + case CODE.CLOSE_SQUARE_BRACKET: + case CODE.CLOSE_CURLY_BRACE: + case CODE.CLOSE_ANGLE_BRACKET: { + if (code === CODE.CLOSE_ANGLE_BRACKET) { + if ( + !expression.inType || + data.charCodeAt(this.pos - 1) === CODE.EQUAL + ) { this.pos++; - this.forward = 0; - this.consumeWhitespace(); - } else { - this.enterState(STATE.REGULAR_EXPRESSION); + break; } - break; } - } - break; - case CODE.OPEN_PAREN: - expression.groupStack.push(CODE.CLOSE_PAREN); - break; - case CODE.OPEN_SQUARE_BRACKET: - expression.groupStack.push(CODE.CLOSE_SQUARE_BRACKET); - break; - case CODE.OPEN_CURLY_BRACE: - expression.groupStack.push(CODE.CLOSE_CURLY_BRACE); - break; - case CODE.OPEN_ANGLE_BRACKET: - if (expression.inType) { - expression.groupStack.push(CODE.CLOSE_ANGLE_BRACKET); - } else if (expression.operators && !expression.groupStack.length) { - this.pos++; - this.forward = 0; - this.consumeWhitespace(); - } - break; - - case CODE.CLOSE_PAREN: - case CODE.CLOSE_SQUARE_BRACKET: - case CODE.CLOSE_CURLY_BRACE: - case CODE.CLOSE_ANGLE_BRACKET: { - if (code === CODE.CLOSE_ANGLE_BRACKET) { - if ( - !expression.inType || - this.lookAtCharCodeAhead(-1) === CODE.EQUAL - ) { - break; + + if (!expression.groupStack.length) { + return this.emitError( + expression, + ErrorCode.INVALID_EXPRESSION, + 'Mismatched group. A closing "' + + String.fromCharCode(code) + + '" character was found but it is not matched with a corresponding opening character.', + ); } - } - if (!expression.groupStack.length) { - return this.emitError( - expression, - ErrorCode.INVALID_EXPRESSION, - 'Mismatched group. A closing "' + - String.fromCharCode(code) + - '" character was found but it is not matched with a corresponding opening character.', - ); - } + const expectedCode = expression.groupStack.pop()!; + if (expectedCode !== code) { + return this.emitError( + expression, + ErrorCode.INVALID_EXPRESSION, + 'Mismatched group. A "' + + String.fromCharCode(code) + + '" character was found when "' + + String.fromCharCode(expectedCode) + + '" was expected.', + ); + } - const expectedCode = expression.groupStack.pop()!; - if (expectedCode !== code) { - return this.emitError( - expression, - ErrorCode.INVALID_EXPRESSION, - 'Mismatched group. A "' + - String.fromCharCode(code) + - '" character was found when "' + - String.fromCharCode(expectedCode) + - '" was expected.', - ); + this.pos++; + break; } - break; + default: + this.pos++; + break; } } - }, - - eol(len, expression) { - if ( - !expression.groupStack.length && - (expression.terminatedByEOL || expression.terminatedByWhitespace) && - (expression.wasComment || !checkForOperators(this, expression, true)) && - !( - expression.consumeIndentedContent && - isIndentCode(this.lookAtCharCodeAhead(len)) - ) - ) { - this.exitState(); - } - expression.wasComment = false; - }, - eof(expression) { + // EOF if ( !expression.groupStack.length && (this.isConcise || expression.terminatedByEOL) ) { this.exitState(); - } else { - const { parent } = expression; + return; + } - switch (parent.state) { - case STATE.ATTRIBUTE: { - const attr = parent as STATE.AttrMeta; - if (!attr.spread && !attr.name) { - return this.emitError( - expression, - ErrorCode.MALFORMED_OPEN_TAG, - 'EOF reached while parsing attribute name for the "' + - this.read(this.activeTag!.tagName) + - '" tag', - ); - } + const { parent } = expression; + switch (parent.state) { + case STATE.ATTRIBUTE: { + const attr = parent as STATE.AttrMeta; + if (!attr.spread && !attr.name) { return this.emitError( expression, ErrorCode.MALFORMED_OPEN_TAG, - `EOF reached while parsing attribute value for the ${ - attr.spread - ? "..." - : attr.name - ? `"${this.read(attr.name)}"` - : `"default"` - } attribute`, + 'EOF reached while parsing attribute name for the "' + + this.read(this.activeTag!.tagName) + + '" tag', ); } - case STATE.TAG_NAME: - return this.emitError( - expression, - ErrorCode.MALFORMED_OPEN_TAG, - "EOF reached while parsing tag name", - ); - - case STATE.PLACEHOLDER: - return this.emitError( - expression, - ErrorCode.MALFORMED_PLACEHOLDER, - "EOF reached while parsing placeholder", - ); + return this.emitError( + expression, + ErrorCode.MALFORMED_OPEN_TAG, + `EOF reached while parsing attribute value for the ${ + attr.spread + ? "..." + : attr.name + ? `"${this.read(attr.name)}"` + : `"default"` + } attribute`, + ); } - return this.emitError( - expression, - ErrorCode.INVALID_EXPRESSION, - "EOF reached while parsing expression", - ); + case STATE.TAG_NAME: + return this.emitError( + expression, + ErrorCode.MALFORMED_OPEN_TAG, + "EOF reached while parsing tag name", + ); + + case STATE.PLACEHOLDER: + return this.emitError( + expression, + ErrorCode.MALFORMED_PLACEHOLDER, + "EOF reached while parsing placeholder", + ); } + + return this.emitError( + expression, + ErrorCode.INVALID_EXPRESSION, + "EOF reached while parsing expression", + ); }, return(child, expression) { @@ -340,7 +382,6 @@ function checkForOperators( const { pos, data } = parser; if (lookBehindForOperator(expression, data, pos) !== -1) { parser.consumeWhitespace(); - parser.forward = 0; return true; } @@ -363,7 +404,6 @@ function checkForOperators( const lookAheadPos = lookAheadForOperator(expression, data, nextNonSpace); if (lookAheadPos !== -1) { parser.pos = lookAheadPos; - parser.forward = 0; return true; } } @@ -478,9 +518,10 @@ function lookAheadForOperator( if (keywordPos === -1) continue; if (!isWhitespaceCode(data.charCodeAt(keywordPos + 1))) break; - // skip any whitespace after the operator + // skip any whitespace after the operator; + // there must be an operand before the end of the input. const nextPos = lookAheadWhile(isWhitespaceCode, data, keywordPos + 2); - if (nextPos === data.length - 1) break; + if (nextPos === data.length) break; // finally check that this is not followed by a terminator. switch (data.charCodeAt(nextPos)) { @@ -552,7 +593,7 @@ function lookAheadWhile( if (!match(data.charCodeAt(i))) return i; } - return max - 1; + return max; } function lookBehindWhile( diff --git a/src/states/HTML_COMMENT.ts b/src/states/HTML_COMMENT.ts index 6edada38..6e4f3b44 100644 --- a/src/states/HTML_COMMENT.ts +++ b/src/states/HTML_COMMENT.ts @@ -1,4 +1,4 @@ -import { CODE, ErrorCode, type StateDefinition } from "../internal"; +import { ErrorCode, type StateDefinition } from "../internal"; // We enter STATE.HTML_COMMENT after we encounter a "<--" // while in the STATE.HTML_CONTENT. @@ -27,27 +27,20 @@ export const HTML_COMMENT: StateDefinition = { }); }, - char(code) { - if (code === CODE.HYPHEN) { - let offset = 1; - let next: number; - while ((next = this.lookAtCharCodeAhead(offset++)) === CODE.HYPHEN); - this.pos += offset; // skip all - - - if (next === CODE.CLOSE_ANGLE_BRACKET) { - this.exitState(); - } + parse(data, maxPos, comment) { + // The comment ends at the first "-" directly followed by ">", which also + // matches the final hyphens of "-->", "--->", etc. + const idx = data.indexOf("->", this.pos); + if (idx === -1) { + return this.emitError( + comment, + ErrorCode.MALFORMED_COMMENT, + "EOF reached while parsing comment", + ); } - }, - - eol() {}, - eof(comment) { - this.emitError( - comment, - ErrorCode.MALFORMED_COMMENT, - "EOF reached while parsing comment", - ); + this.pos = idx + 2; // skip -> + this.exitState(); }, return() {}, diff --git a/src/states/HTML_CONTENT.ts b/src/states/HTML_CONTENT.ts index a2b67efe..e24bb884 100644 --- a/src/states/HTML_CONTENT.ts +++ b/src/states/HTML_CONTENT.ts @@ -34,107 +34,147 @@ export const HTML_CONTENT: StateDefinition = { exit() {}, - char(code) { - if (code === CODE.OPEN_ANGLE_BRACKET) { - if (STATE.checkForCDATA(this)) return; + parse(data, maxPos, content) { + if (this.pos === maxPos) { + htmlEOF.call(this); + this.pos++; + return; + } + + while (this.pos < maxPos) { + const code = data.charCodeAt(this.pos); + + if (code === CODE.NEWLINE || code === CODE.CARRIAGE_RETURN) { + const len = + code === CODE.CARRIAGE_RETURN && + data.charCodeAt(this.pos + 1) === CODE.NEWLINE + ? 2 + : 1; + + if (this.beginMixedMode) { + this.beginMixedMode = false; + this.endText(); + this.exitState(); + return; // parent handles newline at same pos + } - const nextCode = this.lookAtCharCodeAhead(1); + if (this.endingMixedModeAtEOL) { + this.endingMixedModeAtEOL = false; + this.endText(); + this.exitState(); + return; // parent handles newline at same pos + } + + const prevState = this.activeState; + const prevPos = this.pos; + if (STATE.handleDelimitedEOL(this, len, content)) { + if (this.activeState !== prevState) return; + // Still in this delimited block; skip the newline if it wasn't consumed (eg a blank line). + if (this.pos === prevPos) this.pos += len; + continue; + } - if (nextCode === CODE.EXCLAMATION) { - if ( - this.lookAtCharCodeAhead(2) === CODE.HYPHEN && - this.lookAtCharCodeAhead(3) === CODE.HYPHEN + this.startText(); + this.pos += len; + continue; + } + + if (code === CODE.OPEN_ANGLE_BRACKET) { + if (STATE.checkForCDATA(this)) return; + + const nextCode = data.charCodeAt(this.pos + 1); + + if (nextCode === CODE.EXCLAMATION) { + if ( + data.charCodeAt(this.pos + 2) === CODE.HYPHEN && + data.charCodeAt(this.pos + 3) === CODE.HYPHEN + ) { + this.enterState(STATE.HTML_COMMENT); + this.pos += 4; // skip ", + "// line comment", + "/* block comment */", + "$ const scriptlet = 1;", + "$ { block(); }", + "static const statement = 1", + "(y) { body }>", + " ${placeholder}", + "", + " typeArgs=1/>", + "unclosed(", + ].join("\n"), + ); + }); + + it("reports locations for offset ranges", () => { + const parser = createParser({}); + parser.parse("

\n hi\n
"); + assert.deepEqual(parser.locationAt({ start: 8, end: 8 }), { + start: { line: 1, character: 2 }, + end: { line: 1, character: 2 }, + }); + assert.deepEqual(parser.locationAt({ start: 1, end: 12 }), { + start: { line: 0, character: 1 }, + end: { line: 2, character: 1 }, + }); + }); +}); + +describe("validation internals", () => { + it("rejects doubled operators followed by a terminator", () => { + assert.equal(isValidAttrValue(" ++ ,", true), 0); + }); + + it("continues past arrows before terminators", () => { + assert.equal(isValidAttrValue("a=>,b", true), 2); + }); + + it("rejects identifiers separated by whitespace at end of input", () => { + assert.equal(isValidAttrValue("x i", true), 0); + }); +}); diff --git a/src/__tests__/fixtures/argument-attr-ternary/__snapshots__/argument-attr-ternary.expected.txt b/src/__tests__/fixtures/argument-attr-ternary/__snapshots__/argument-attr-ternary.expected.txt new file mode 100644 index 00000000..1aeb9501 --- /dev/null +++ b/src/__tests__/fixtures/argument-attr-ternary/__snapshots__/argument-attr-ternary.expected.txt @@ -0,0 +1,10 @@ +1╭─
+ │ ││ │ ││ ││ │ ╰─ closeTagEnd(div) + │ ││ │ ││ ││ ╰─ closeTagName "div" + │ ││ │ ││ │╰─ closeTagStart " \ No newline at end of file diff --git a/src/__tests__/fixtures/attr-bound-concise/__snapshots__/attr-bound-concise.expected.txt b/src/__tests__/fixtures/attr-bound-concise/__snapshots__/attr-bound-concise.expected.txt new file mode 100644 index 00000000..68b7f7be --- /dev/null +++ b/src/__tests__/fixtures/attr-bound-concise/__snapshots__/attr-bound-concise.expected.txt @@ -0,0 +1,7 @@ +1╭─ div foo:=bar + │ │ │ │ │ ├─ closeTagEnd(div) + │ │ │ │ │ ╰─ openTagEnd + │ │ │ │ ╰─ attrValue:bound.value "bar" + │ │ │ ╰─ attrValue:bound ":=bar" + │ │ ╰─ attrName "foo" + ╰─ ╰─ tagName "div" \ No newline at end of file diff --git a/src/__tests__/fixtures/attr-bound-concise/input.marko b/src/__tests__/fixtures/attr-bound-concise/input.marko new file mode 100644 index 00000000..1448861e --- /dev/null +++ b/src/__tests__/fixtures/attr-bound-concise/input.marko @@ -0,0 +1 @@ +div foo:=bar \ No newline at end of file diff --git a/src/__tests__/fixtures/attr-eof-after-name/__snapshots__/attr-eof-after-name.expected.txt b/src/__tests__/fixtures/attr-eof-after-name/__snapshots__/attr-eof-after-name.expected.txt new file mode 100644 index 00000000..06e78100 --- /dev/null +++ b/src/__tests__/fixtures/attr-eof-after-name/__snapshots__/attr-eof-after-name.expected.txt @@ -0,0 +1,5 @@ +1╭─
x
+ │ ││││││ │ ╰─ closeTagEnd(div) + │ ││││││ ╰─ closeTagName "div" + │ │││││╰─ closeTagStart "x \ No newline at end of file diff --git a/src/__tests__/fixtures/cdata-eof/__snapshots__/cdata-eof.expected.txt b/src/__tests__/fixtures/cdata-eof/__snapshots__/cdata-eof.expected.txt new file mode 100644 index 00000000..5db5c0b6 --- /dev/null +++ b/src/__tests__/fixtures/cdata-eof/__snapshots__/cdata-eof.expected.txt @@ -0,0 +1,2 @@ +1╭─ + │ ││ │ ╰─ error(INVALID_EXPRESSION:Mismatched group. A "]" character was found when ")" was expected.) + │ ││ ╰─ attrName + │ │╰─ tagName "div" + ╰─ ╰─ openTagStart \ No newline at end of file diff --git a/src/__tests__/fixtures/expression-mismatched-group-pair/input.marko b/src/__tests__/fixtures/expression-mismatched-group-pair/input.marko new file mode 100644 index 00000000..7f17e0bb --- /dev/null +++ b/src/__tests__/fixtures/expression-mismatched-group-pair/input.marko @@ -0,0 +1 @@ +
\ No newline at end of file diff --git a/src/__tests__/fixtures/html-comment-eof/__snapshots__/html-comment-eof.expected.txt b/src/__tests__/fixtures/html-comment-eof/__snapshots__/html-comment-eof.expected.txt new file mode 100644 index 00000000..b80df7a3 --- /dev/null +++ b/src/__tests__/fixtures/html-comment-eof/__snapshots__/html-comment-eof.expected.txt @@ -0,0 +1,2 @@ +1╭─ + │ │ │ ╰─ comment.value " b " + │ │ ├─ comment "" + │ │ ╰─ openTagEnd + │ ├─ closeTagEnd(div) + ╰─ ╰─ tagName "span" +3╭─ + ╰─ ╰─ closeTagEnd(span) \ No newline at end of file diff --git a/src/__tests__/fixtures/semicolon-comments/input.marko b/src/__tests__/fixtures/semicolon-comments/input.marko new file mode 100644 index 00000000..3d65e25d --- /dev/null +++ b/src/__tests__/fixtures/semicolon-comments/input.marko @@ -0,0 +1,2 @@ +div; /* a */ +span; diff --git a/src/__tests__/fixtures/tag-name-placeholder-empty/__snapshots__/tag-name-placeholder-empty.expected.txt b/src/__tests__/fixtures/tag-name-placeholder-empty/__snapshots__/tag-name-placeholder-empty.expected.txt new file mode 100644 index 00000000..bfc8242f --- /dev/null +++ b/src/__tests__/fixtures/tag-name-placeholder-empty/__snapshots__/tag-name-placeholder-empty.expected.txt @@ -0,0 +1,3 @@ +1╭─ <${}/> + │ │ ╰─ error(MALFORMED_PLACEHOLDER:Invalid placeholder, the expression cannot be missing) + ╰─ ╰─ openTagStart \ No newline at end of file diff --git a/src/__tests__/fixtures/tag-name-placeholder-empty/input.marko b/src/__tests__/fixtures/tag-name-placeholder-empty/input.marko new file mode 100644 index 00000000..d68ea0c3 --- /dev/null +++ b/src/__tests__/fixtures/tag-name-placeholder-empty/input.marko @@ -0,0 +1 @@ +<${}/> \ No newline at end of file diff --git a/src/__tests__/fixtures/tag-name-placeholder-eof/__snapshots__/tag-name-placeholder-eof.expected.txt b/src/__tests__/fixtures/tag-name-placeholder-eof/__snapshots__/tag-name-placeholder-eof.expected.txt new file mode 100644 index 00000000..caf50d09 --- /dev/null +++ b/src/__tests__/fixtures/tag-name-placeholder-eof/__snapshots__/tag-name-placeholder-eof.expected.txt @@ -0,0 +1,3 @@ +1╭─ <${a + │ │ ╰─ error(MALFORMED_OPEN_TAG:EOF reached while parsing tag name) + ╰─ ╰─ openTagStart \ No newline at end of file diff --git a/src/__tests__/fixtures/tag-name-placeholder-eof/input.marko b/src/__tests__/fixtures/tag-name-placeholder-eof/input.marko new file mode 100644 index 00000000..16659d11 --- /dev/null +++ b/src/__tests__/fixtures/tag-name-placeholder-eof/input.marko @@ -0,0 +1 @@ +<${a \ No newline at end of file diff --git a/src/__tests__/fixtures/tag-var-comma-concise/__snapshots__/tag-var-comma-concise.expected.txt b/src/__tests__/fixtures/tag-var-comma-concise/__snapshots__/tag-var-comma-concise.expected.txt new file mode 100644 index 00000000..7eac43c4 --- /dev/null +++ b/src/__tests__/fixtures/tag-var-comma-concise/__snapshots__/tag-var-comma-concise.expected.txt @@ -0,0 +1,10 @@ +1╭─ div/v, a=1 + │ │ ││ ││╰─ attrValue.value + │ │ ││ │╰─ attrValue "=1" + │ │ ││ ╰─ attrName + │ │ │╰─ tagVar.value + │ │ ╰─ tagVar "/v" + ╰─ ╰─ tagName "div" +2╭─ + │ ├─ openTagEnd + ╰─ ╰─ closeTagEnd(div) \ No newline at end of file diff --git a/src/__tests__/fixtures/tag-var-comma-concise/input.marko b/src/__tests__/fixtures/tag-var-comma-concise/input.marko new file mode 100644 index 00000000..801eb4a6 --- /dev/null +++ b/src/__tests__/fixtures/tag-var-comma-concise/input.marko @@ -0,0 +1 @@ +div/v, a=1 diff --git a/src/__tests__/fixtures/template-string-dollar/__snapshots__/template-string-dollar.expected.txt b/src/__tests__/fixtures/template-string-dollar/__snapshots__/template-string-dollar.expected.txt new file mode 100644 index 00000000..213f9725 --- /dev/null +++ b/src/__tests__/fixtures/template-string-dollar/__snapshots__/template-string-dollar.expected.txt @@ -0,0 +1,7 @@ +1╭─
+ │ ││ │││ ╰─ openTagEnd:selfClosed "/>" + │ ││ ││╰─ attrValue.value "`x$y`" + │ ││ │╰─ attrValue "=`x$y`" + │ ││ ╰─ attrName + │ │╰─ tagName "div" + ╰─ ╰─ openTagStart \ No newline at end of file diff --git a/src/__tests__/fixtures/template-string-dollar/input.marko b/src/__tests__/fixtures/template-string-dollar/input.marko new file mode 100644 index 00000000..45c83e92 --- /dev/null +++ b/src/__tests__/fixtures/template-string-dollar/input.marko @@ -0,0 +1 @@ +
\ No newline at end of file diff --git a/src/__tests__/fixtures/template-string-empty-placeholder/__snapshots__/template-string-empty-placeholder.expected.txt b/src/__tests__/fixtures/template-string-empty-placeholder/__snapshots__/template-string-empty-placeholder.expected.txt new file mode 100644 index 00000000..95e15ecd --- /dev/null +++ b/src/__tests__/fixtures/template-string-empty-placeholder/__snapshots__/template-string-empty-placeholder.expected.txt @@ -0,0 +1,5 @@ +1╭─
+ │ ││ │ ╰─ error(MALFORMED_PLACEHOLDER:Invalid placeholder, the expression cannot be missing) + │ ││ ╰─ attrName + │ │╰─ tagName "div" + ╰─ ╰─ openTagStart \ No newline at end of file diff --git a/src/__tests__/fixtures/template-string-empty-placeholder/input.marko b/src/__tests__/fixtures/template-string-empty-placeholder/input.marko new file mode 100644 index 00000000..f5f03b50 --- /dev/null +++ b/src/__tests__/fixtures/template-string-empty-placeholder/input.marko @@ -0,0 +1 @@ +
\ No newline at end of file diff --git a/src/__tests__/fixtures/text-block-eof/__snapshots__/text-block-eof.expected.txt b/src/__tests__/fixtures/text-block-eof/__snapshots__/text-block-eof.expected.txt new file mode 100644 index 00000000..a98e1be7 --- /dev/null +++ b/src/__tests__/fixtures/text-block-eof/__snapshots__/text-block-eof.expected.txt @@ -0,0 +1 @@ +1╰─ -- \ No newline at end of file diff --git a/src/__tests__/fixtures/text-block-eof/input.marko b/src/__tests__/fixtures/text-block-eof/input.marko new file mode 100644 index 00000000..7489accb --- /dev/null +++ b/src/__tests__/fixtures/text-block-eof/input.marko @@ -0,0 +1 @@ +-- \ No newline at end of file diff --git a/src/__tests__/fixtures/text-block-trailing-space/__snapshots__/text-block-trailing-space.expected.txt b/src/__tests__/fixtures/text-block-trailing-space/__snapshots__/text-block-trailing-space.expected.txt new file mode 100644 index 00000000..68f2e0db --- /dev/null +++ b/src/__tests__/fixtures/text-block-trailing-space/__snapshots__/text-block-trailing-space.expected.txt @@ -0,0 +1,11 @@ +1╭─ div -- + │ │ ╰─ openTagEnd + ╰─ ╰─ tagName "div" +2╭─ hello + ╰─ ╰─ text "hello" +3╭─ div2 + │ ├─ closeTagEnd(div) + ╰─ ╰─ tagName "div2" +4╭─ + │ ├─ openTagEnd + ╰─ ╰─ closeTagEnd(div2) \ No newline at end of file diff --git a/src/__tests__/fixtures/text-block-trailing-space/input.marko b/src/__tests__/fixtures/text-block-trailing-space/input.marko new file mode 100644 index 00000000..b224eb9b --- /dev/null +++ b/src/__tests__/fixtures/text-block-trailing-space/input.marko @@ -0,0 +1,3 @@ +div -- + hello +div2 diff --git a/src/core/Parser.ts b/src/core/Parser.ts index 6b238142..6835024d 100644 --- a/src/core/Parser.ts +++ b/src/core/Parser.ts @@ -109,14 +109,6 @@ export class Parser { return true; } - matchAnyAtPos(a: Range, list: (Range | string)[]) { - for (const item of list) { - if (this.matchAtPos(a, item)) return true; - } - - return false; - } - /** * Look ahead to see if the given str matches the substring sequence * beyond @@ -223,7 +215,7 @@ export class Parser { return this.lookAtCharCodeAhead(behind); } - onlyWhitespaceRemainsOnLine(start = 1) { + onlyWhitespaceRemainsOnLine(start: number) { const maxOffset = this.maxPos - this.pos; let ahead = start; diff --git a/src/states/CDATA.ts b/src/states/CDATA.ts index de8d912a..f2f896ce 100644 --- a/src/states/CDATA.ts +++ b/src/states/CDATA.ts @@ -38,6 +38,7 @@ export const CDATA: StateDefinition = { this.exitState(); }, + /* c8 ignore next -- never has child states */ return() {}, }; diff --git a/src/states/CLOSE_TAG.ts b/src/states/CLOSE_TAG.ts index 6813de51..6ce46924 100644 --- a/src/states/CLOSE_TAG.ts +++ b/src/states/CLOSE_TAG.ts @@ -37,6 +37,7 @@ export const CLOSE_TAG: StateDefinition = { ensureExpectedCloseTag(this, closeTag); }, + /* c8 ignore next -- never has child states */ return() {}, }; @@ -50,17 +51,15 @@ export function checkForClosingTag(parser: Parser) { if (!match) { const { tagName } = parser.activeTag!; const tagNameLen = tagName.end - tagName.start; - if (tagNameLen) { - skip += tagNameLen; // skip - match = - (parser.lookAheadFor("/", curPos) && - parser.lookAheadFor(">", 1 + curPos + tagNameLen) && - parser.matchAtPos(tagName, { - start: 1 + curPos, - end: 1 + curPos + tagNameLen, - })) || - false; - } + skip += tagNameLen; // skip + match = + (parser.lookAheadFor("/", curPos) && + parser.lookAheadFor(">", 1 + curPos + tagNameLen) && + parser.matchAtPos(tagName, { + start: 1 + curPos, + end: 1 + curPos + tagNameLen, + })) || + false; } if (match) { @@ -70,14 +69,12 @@ export function checkForClosingTag(parser: Parser) { end: curPos + 1, }); - if ( - ensureExpectedCloseTag(parser, { - start: parser.pos, - end: (parser.pos += skip), - }) - ) { - parser.exitState(); - } + // Always succeeds since the closing tag name was matched above. + ensureExpectedCloseTag(parser, { + start: parser.pos, + end: (parser.pos += skip), + }); + parser.exitState(); return true; } diff --git a/src/states/CONCISE_HTML_CONTENT.ts b/src/states/CONCISE_HTML_CONTENT.ts index 7d75c9e5..75d06fe5 100644 --- a/src/states/CONCISE_HTML_CONTENT.ts +++ b/src/states/CONCISE_HTML_CONTENT.ts @@ -23,6 +23,7 @@ export const CONCISE_HTML_CONTENT: StateDefinition = { }; }, + /* c8 ignore next -- the root state never exits */ exit() {}, parse(data, maxPos) { diff --git a/src/states/DECLARATION.ts b/src/states/DECLARATION.ts index 0c6b7efc..aa3d10d5 100644 --- a/src/states/DECLARATION.ts +++ b/src/states/DECLARATION.ts @@ -44,6 +44,7 @@ export const DECLARATION: StateDefinition = { } }, + /* c8 ignore next -- never has child states */ return() {}, }; diff --git a/src/states/DTD.ts b/src/states/DTD.ts index 87877f3f..b27d385a 100644 --- a/src/states/DTD.ts +++ b/src/states/DTD.ts @@ -40,5 +40,6 @@ export const DTD: StateDefinition = { this.exitState(); }, + /* c8 ignore next -- never has child states */ return() {}, }; diff --git a/src/states/EXPRESSION.ts b/src/states/EXPRESSION.ts index bb4e4b3f..a874ec16 100644 --- a/src/states/EXPRESSION.ts +++ b/src/states/EXPRESSION.ts @@ -334,11 +334,8 @@ export const EXPRESSION: StateDefinition = { expression, ErrorCode.MALFORMED_OPEN_TAG, `EOF reached while parsing attribute value for the ${ - attr.spread - ? "..." - : attr.name - ? `"${this.read(attr.name)}"` - : `"default"` + // A missing name was reported above, so this is a spread or named attribute. + attr.spread ? "..." : `"${this.read(attr.name!)}"` } attribute`, ); } diff --git a/src/states/HTML_COMMENT.ts b/src/states/HTML_COMMENT.ts index 6e4f3b44..28f38c16 100644 --- a/src/states/HTML_COMMENT.ts +++ b/src/states/HTML_COMMENT.ts @@ -43,5 +43,6 @@ export const HTML_COMMENT: StateDefinition = { this.exitState(); }, + /* c8 ignore next -- never has child states */ return() {}, }; diff --git a/src/states/HTML_CONTENT.ts b/src/states/HTML_CONTENT.ts index e24bb884..12339973 100644 --- a/src/states/HTML_CONTENT.ts +++ b/src/states/HTML_CONTENT.ts @@ -229,6 +229,5 @@ function isBeginningOfLine(parser: Parser) { return false; } } while (pos > 0); - - return true; + return true; /* c8 ignore next -- unreachable: html mode always begins after a non-whitespace char */ } diff --git a/src/states/JS_COMMENT_BLOCK.ts b/src/states/JS_COMMENT_BLOCK.ts index 566cd5be..f8c89193 100644 --- a/src/states/JS_COMMENT_BLOCK.ts +++ b/src/states/JS_COMMENT_BLOCK.ts @@ -30,5 +30,6 @@ export const JS_COMMENT_BLOCK: StateDefinition = { this.exitState(); }, + /* c8 ignore next -- never has child states */ return() {}, }; diff --git a/src/states/JS_COMMENT_LINE.ts b/src/states/JS_COMMENT_LINE.ts index e20b568e..443d74e7 100644 --- a/src/states/JS_COMMENT_LINE.ts +++ b/src/states/JS_COMMENT_LINE.ts @@ -42,5 +42,6 @@ export const JS_COMMENT_LINE: StateDefinition = { this.exitState(); }, + /* c8 ignore next -- never has child states */ return() {}, }; diff --git a/src/states/PLACEHOLDER.ts b/src/states/PLACEHOLDER.ts index 3178cae7..1ed1f708 100644 --- a/src/states/PLACEHOLDER.ts +++ b/src/states/PLACEHOLDER.ts @@ -38,6 +38,7 @@ export const PLACEHOLDER: StateDefinition = { // Never parses directly: checkForPlaceholder immediately stacks EXPRESSION // on top, and return() exits this state as soon as the expression finishes. + /* c8 ignore next */ parse() {}, return(child) { diff --git a/src/states/REGULAR_EXPRESSION.ts b/src/states/REGULAR_EXPRESSION.ts index 14cc52aa..b7542cdf 100644 --- a/src/states/REGULAR_EXPRESSION.ts +++ b/src/states/REGULAR_EXPRESSION.ts @@ -65,5 +65,6 @@ export const REGULAR_EXPRESSION: StateDefinition = { ); }, + /* c8 ignore next -- never has child states */ return() {}, }; diff --git a/src/states/STRING.ts b/src/states/STRING.ts index acebae48..df69f7a9 100644 --- a/src/states/STRING.ts +++ b/src/states/STRING.ts @@ -42,5 +42,6 @@ export const STRING: StateDefinition = { ); }, + /* c8 ignore next -- never has child states */ return() {}, }; diff --git a/src/states/TAG_NAME.ts b/src/states/TAG_NAME.ts index 4d22f632..2ceef007 100644 --- a/src/states/TAG_NAME.ts +++ b/src/states/TAG_NAME.ts @@ -170,7 +170,6 @@ export const TAG_NAME: StateDefinition = { }, return(child, tagName) { - if ((child as STATE.ExpressionMeta).terminatedByEOL) return; if (child.start === child.end) { this.emitError( child, diff --git a/src/util/validators.ts b/src/util/validators.ts index 19ba540a..f07a0ca4 100644 --- a/src/util/validators.ts +++ b/src/util/validators.ts @@ -10,6 +10,9 @@ import { shouldTerminateHtmlAttrValue, } from "../states"; +// The stubs only satisfy the StateDefinition interface; the root state is +// assigned directly (never entered) and the driving loop stops it from parsing. +/* c8 ignore start */ const ROOT_STATE: StateDefinition = { name: "ROOT", enter() { @@ -19,6 +22,7 @@ const ROOT_STATE: StateDefinition = { parse() {}, return() {}, }; +/* c8 ignore stop */ const ROOT_RANGE = { state: ROOT_STATE, parent: undefined as unknown as Meta,