/** * @import { * Code, * Construct, * Resolver, * State, * TokenizeContext, * Tokenizer * } from 'micromark-util-types' */ import { asciiAlphanumeric, asciiAlpha, markdownLineEndingOrSpace, markdownLineEnding, markdownSpace } from 'micromark-util-character'; import { htmlBlockNames, htmlRawNames } from 'micromark-util-html-tag-name'; import { blankLine } from './blank-line.js'; /** @type {Construct} */ export const htmlFlow = { concrete: true, name: 'htmlFlow', resolveTo: resolveToHtmlFlow, tokenize: tokenizeHtmlFlow }; /** @type {Construct} */ const blankLineBefore = { partial: true, tokenize: tokenizeBlankLineBefore }; const nonLazyContinuationStart = { partial: true, tokenize: tokenizeNonLazyContinuationStart }; /** @type {Resolver} */ function resolveToHtmlFlow(events) { let index = events.length; while (index--) { if (events[index][0] === 'enter' && events[index][1].type === "htmlFlow") { break; } } if (index > 1 && events[index - 2][1].type === "linePrefix") { // Add the prefix start to the HTML token. events[index][1].start = events[index - 2][1].start; // Add the prefix start to the HTML line token. events[index + 1][1].start = events[index - 2][1].start; // Remove the line prefix. events.splice(index - 2, 2); } return events; } /** * @this {TokenizeContext} * Context. * @type {Tokenizer} */ function tokenizeHtmlFlow(effects, ok, nok) { const self = this; /** @type {number} */ let marker; /** @type {boolean} */ let closingTag; /** @type {string} */ let buffer; /** @type {number} */ let index; /** @type {Code} */ let markerB; return start; /** * Start of HTML (flow). * * ```markdown * > | * ^ * ``` * * @type {State} */ function start(code) { // To do: parse indent like `markdown-rs`. return before(code); } /** * At `<`, after optional whitespace. * * ```markdown * > | * ^ * ``` * * @type {State} */ function before(code) { effects.enter("htmlFlow"); effects.enter("htmlFlowData"); effects.consume(code); return open; } /** * After `<`, at tag name or other stuff. * * ```markdown * > | * ^ * > | * ^ * > | * ^ * ``` * * @type {State} */ function open(code) { if (code === 33) { effects.consume(code); return declarationOpen; } if (code === 47) { effects.consume(code); closingTag = true; return tagCloseStart; } if (code === 63) { effects.consume(code); marker = 3; // To do: // tokenizer.concrete = true // To do: use `markdown-rs` style interrupt. // While we’re in an instruction instead of a declaration, we’re on a `?` // right now, so we do need to search for `>`, similar to declarations. return self.interrupt ? ok : continuationDeclarationInside; } // ASCII alphabetical. if (asciiAlpha(code)) { // Always the case. effects.consume(code); buffer = String.fromCharCode(code); return tagName; } return nok(code); } /** * After ` | * ^ * > | * ^ * > | &<]]> * ^ * ``` * * @type {State} */ function declarationOpen(code) { if (code === 45) { effects.consume(code); marker = 2; return commentOpenInside; } if (code === 91) { effects.consume(code); marker = 5; index = 0; return cdataOpenInside; } // ASCII alphabetical. if (asciiAlpha(code)) { effects.consume(code); marker = 4; // // Do not form containers. // tokenizer.concrete = true return self.interrupt ? ok : continuationDeclarationInside; } return nok(code); } /** * After ` | * ^ * ``` * * @type {State} */ function commentOpenInside(code) { if (code === 45) { effects.consume(code); // // Do not form containers. // tokenizer.concrete = true return self.interrupt ? ok : continuationDeclarationInside; } return nok(code); } /** * After ` | &<]]> * ^^^^^^ * ``` * * @type {State} */ function cdataOpenInside(code) { const value = "CDATA["; if (code === value.charCodeAt(index++)) { effects.consume(code); if (index === value.length) { // // Do not form containers. // tokenizer.concrete = true return self.interrupt ? ok : continuation; } return cdataOpenInside; } return nok(code); } /** * After ` | * ^ * ``` * * @type {State} */ function tagCloseStart(code) { if (asciiAlpha(code)) { // Always the case. effects.consume(code); buffer = String.fromCharCode(code); return tagName; } return nok(code); } /** * In tag name. * * ```markdown * > | * ^^ * > | * ^^ * ``` * * @type {State} */ function tagName(code) { if (code === null || code === 47 || code === 62 || markdownLineEndingOrSpace(code)) { const slash = code === 47; const name = buffer.toLowerCase(); if (!slash && !closingTag && htmlRawNames.includes(name)) { marker = 1; // // Do not form containers. // tokenizer.concrete = true return self.interrupt ? ok(code) : continuation(code); } if (htmlBlockNames.includes(buffer.toLowerCase())) { marker = 6; if (slash) { effects.consume(code); return basicSelfClosing; } // // Do not form containers. // tokenizer.concrete = true return self.interrupt ? ok(code) : continuation(code); } marker = 7; // Do not support complete HTML when interrupting. return self.interrupt && !self.parser.lazy[self.now().line] ? nok(code) : closingTag ? completeClosingTagAfter(code) : completeAttributeNameBefore(code); } // ASCII alphanumerical and `-`. if (code === 45 || asciiAlphanumeric(code)) { effects.consume(code); buffer += String.fromCharCode(code); return tagName; } return nok(code); } /** * After closing slash of a basic tag name. * * ```markdown * > |
* ^ * ``` * * @type {State} */ function basicSelfClosing(code) { if (code === 62) { effects.consume(code); // // Do not form containers. // tokenizer.concrete = true return self.interrupt ? ok : continuation; } return nok(code); } /** * After closing slash of a complete tag name. * * ```markdown * > | * ^ * ``` * * @type {State} */ function completeClosingTagAfter(code) { if (markdownSpace(code)) { effects.consume(code); return completeClosingTagAfter; } return completeEnd(code); } /** * At an attribute name. * * At first, this state is used after a complete tag name, after whitespace, * where it expects optional attributes or the end of the tag. * It is also reused after attributes, when expecting more optional * attributes. * * ```markdown * > | * ^ * > | * ^ * > | * ^ * > | * ^ * > | * ^ * ``` * * @type {State} */ function completeAttributeNameBefore(code) { if (code === 47) { effects.consume(code); return completeEnd; } // ASCII alphanumerical and `:` and `_`. if (code === 58 || code === 95 || asciiAlpha(code)) { effects.consume(code); return completeAttributeName; } if (markdownSpace(code)) { effects.consume(code); return completeAttributeNameBefore; } return completeEnd(code); } /** * In attribute name. * * ```markdown * > | * ^ * > | * ^ * > | * ^ * ``` * * @type {State} */ function completeAttributeName(code) { // ASCII alphanumerical and `-`, `.`, `:`, and `_`. if (code === 45 || code === 46 || code === 58 || code === 95 || asciiAlphanumeric(code)) { effects.consume(code); return completeAttributeName; } return completeAttributeNameAfter(code); } /** * After attribute name, at an optional initializer, the end of the tag, or * whitespace. * * ```markdown * > | * ^ * > | * ^ * ``` * * @type {State} */ function completeAttributeNameAfter(code) { if (code === 61) { effects.consume(code); return completeAttributeValueBefore; } if (markdownSpace(code)) { effects.consume(code); return completeAttributeNameAfter; } return completeAttributeNameBefore(code); } /** * Before unquoted, double quoted, or single quoted attribute value, allowing * whitespace. * * ```markdown * > | * ^ * > | * ^ * ``` * * @type {State} */ function completeAttributeValueBefore(code) { if (code === null || code === 60 || code === 61 || code === 62 || code === 96) { return nok(code); } if (code === 34 || code === 39) { effects.consume(code); markerB = code; return completeAttributeValueQuoted; } if (markdownSpace(code)) { effects.consume(code); return completeAttributeValueBefore; } return completeAttributeValueUnquoted(code); } /** * In double or single quoted attribute value. * * ```markdown * > | * ^ * > | * ^ * ``` * * @type {State} */ function completeAttributeValueQuoted(code) { if (code === markerB) { effects.consume(code); markerB = null; return completeAttributeValueQuotedAfter; } if (code === null || markdownLineEnding(code)) { return nok(code); } effects.consume(code); return completeAttributeValueQuoted; } /** * In unquoted attribute value. * * ```markdown * > | * ^ * ``` * * @type {State} */ function completeAttributeValueUnquoted(code) { if (code === null || code === 34 || code === 39 || code === 47 || code === 60 || code === 61 || code === 62 || code === 96 || markdownLineEndingOrSpace(code)) { return completeAttributeNameAfter(code); } effects.consume(code); return completeAttributeValueUnquoted; } /** * After double or single quoted attribute value, before whitespace or the * end of the tag. * * ```markdown * > | * ^ * ``` * * @type {State} */ function completeAttributeValueQuotedAfter(code) { if (code === 47 || code === 62 || markdownSpace(code)) { return completeAttributeNameBefore(code); } return nok(code); } /** * In certain circumstances of a complete tag where only an `>` is allowed. * * ```markdown * > | * ^ * ``` * * @type {State} */ function completeEnd(code) { if (code === 62) { effects.consume(code); return completeAfter; } return nok(code); } /** * After `>` in a complete tag. * * ```markdown * > | * ^ * ``` * * @type {State} */ function completeAfter(code) { if (code === null || markdownLineEnding(code)) { // // Do not form containers. // tokenizer.concrete = true return continuation(code); } if (markdownSpace(code)) { effects.consume(code); return completeAfter; } return nok(code); } /** * In continuation of any HTML kind. * * ```markdown * > | * ^ * ``` * * @type {State} */ function continuation(code) { if (code === 45 && marker === 2) { effects.consume(code); return continuationCommentInside; } if (code === 60 && marker === 1) { effects.consume(code); return continuationRawTagOpen; } if (code === 62 && marker === 4) { effects.consume(code); return continuationClose; } if (code === 63 && marker === 3) { effects.consume(code); return continuationDeclarationInside; } if (code === 93 && marker === 5) { effects.consume(code); return continuationCdataInside; } if (markdownLineEnding(code) && (marker === 6 || marker === 7)) { effects.exit("htmlFlowData"); return effects.check(blankLineBefore, continuationAfter, continuationStart)(code); } if (code === null || markdownLineEnding(code)) { effects.exit("htmlFlowData"); return continuationStart(code); } effects.consume(code); return continuation; } /** * In continuation, at eol. * * ```markdown * > | * ^ * | asd * ``` * * @type {State} */ function continuationStart(code) { return effects.check(nonLazyContinuationStart, continuationStartNonLazy, continuationAfter)(code); } /** * In continuation, at eol, before non-lazy content. * * ```markdown * > | * ^ * | asd * ``` * * @type {State} */ function continuationStartNonLazy(code) { effects.enter("lineEnding"); effects.consume(code); effects.exit("lineEnding"); return continuationBefore; } /** * In continuation, before non-lazy content. * * ```markdown * | * > | asd * ^ * ``` * * @type {State} */ function continuationBefore(code) { if (code === null || markdownLineEnding(code)) { return continuationStart(code); } effects.enter("htmlFlowData"); return continuation(code); } /** * In comment continuation, after one `-`, expecting another. * * ```markdown * > | * ^ * ``` * * @type {State} */ function continuationCommentInside(code) { if (code === 45) { effects.consume(code); return continuationDeclarationInside; } return continuation(code); } /** * In raw continuation, after `<`, at `/`. * * ```markdown * > | * ^ * ``` * * @type {State} */ function continuationRawTagOpen(code) { if (code === 47) { effects.consume(code); buffer = ''; return continuationRawEndTag; } return continuation(code); } /** * In raw continuation, after ` | * ^^^^^^ * ``` * * @type {State} */ function continuationRawEndTag(code) { if (code === 62) { const name = buffer.toLowerCase(); if (htmlRawNames.includes(name)) { effects.consume(code); return continuationClose; } return continuation(code); } if (asciiAlpha(code) && buffer.length < 8) { // Always the case. effects.consume(code); buffer += String.fromCharCode(code); return continuationRawEndTag; } return continuation(code); } /** * In cdata continuation, after `]`, expecting `]>`. * * ```markdown * > | &<]]> * ^ * ``` * * @type {State} */ function continuationCdataInside(code) { if (code === 93) { effects.consume(code); return continuationDeclarationInside; } return continuation(code); } /** * In declaration or instruction continuation, at `>`. * * ```markdown * > | * ^ * > | * ^ * > | * ^ * > | * ^ * > | &<]]> * ^ * ``` * * @type {State} */ function continuationDeclarationInside(code) { if (code === 62) { effects.consume(code); return continuationClose; } // More dashes. if (code === 45 && marker === 2) { effects.consume(code); return continuationDeclarationInside; } return continuation(code); } /** * In closed continuation: everything we get until the eol/eof is part of it. * * ```markdown * > | * ^ * ``` * * @type {State} */ function continuationClose(code) { if (code === null || markdownLineEnding(code)) { effects.exit("htmlFlowData"); return continuationAfter(code); } effects.consume(code); return continuationClose; } /** * Done. * * ```markdown * > | * ^ * ``` * * @type {State} */ function continuationAfter(code) { effects.exit("htmlFlow"); // // Feel free to interrupt. // tokenizer.interrupt = false // // No longer concrete. // tokenizer.concrete = false return ok(code); } } /** * @this {TokenizeContext} * Context. * @type {Tokenizer} */ function tokenizeNonLazyContinuationStart(effects, ok, nok) { const self = this; return start; /** * At eol, before continuation. * * ```markdown * > | * ```js * ^ * | b * ``` * * @type {State} */ function start(code) { if (markdownLineEnding(code)) { effects.enter("lineEnding"); effects.consume(code); effects.exit("lineEnding"); return after; } return nok(code); } /** * A continuation. * * ```markdown * | * ```js * > | b * ^ * ``` * * @type {State} */ function after(code) { return self.parser.lazy[self.now().line] ? nok(code) : ok(code); } } /** * @this {TokenizeContext} * Context. * @type {Tokenizer} */ function tokenizeBlankLineBefore(effects, ok, nok) { return start; /** * Before eol, expecting blank line. * * ```markdown * > |
* ^ * | * ``` * * @type {State} */ function start(code) { effects.enter("lineEnding"); effects.consume(code); effects.exit("lineEnding"); return effects.attempt(blankLine, ok, nok); } }