knowledgebase_law/node_modules/micromark-core-commonmark/lib/html-flow.js

876 lines
18 KiB
JavaScript
Raw Normal View History

2025-04-11 23:47:09 +08:00
/**
* @import {
* Code,
* Construct,
* Resolver,
* State,
* TokenizeContext,
* Tokenizer
* } from 'micromark-util-types'
*/
import { asciiAlphanumeric, asciiAlpha, markdownLineEndingOrSpace, markdownLineEnding, markdownSpace } from 'micromark-util-character';
import { htmlBlockNames, htmlRawNames } from 'micromark-util-html-tag-name';
import { blankLine } from './blank-line.js';
/** @type {Construct} */
export const htmlFlow = {
concrete: true,
name: 'htmlFlow',
resolveTo: resolveToHtmlFlow,
tokenize: tokenizeHtmlFlow
};
/** @type {Construct} */
const blankLineBefore = {
partial: true,
tokenize: tokenizeBlankLineBefore
};
const nonLazyContinuationStart = {
partial: true,
tokenize: tokenizeNonLazyContinuationStart
};
/** @type {Resolver} */
function resolveToHtmlFlow(events) {
let index = events.length;
while (index--) {
if (events[index][0] === 'enter' && events[index][1].type === "htmlFlow") {
break;
}
}
if (index > 1 && events[index - 2][1].type === "linePrefix") {
// Add the prefix start to the HTML token.
events[index][1].start = events[index - 2][1].start;
// Add the prefix start to the HTML line token.
events[index + 1][1].start = events[index - 2][1].start;
// Remove the line prefix.
events.splice(index - 2, 2);
}
return events;
}
/**
* @this {TokenizeContext}
* Context.
* @type {Tokenizer}
*/
function tokenizeHtmlFlow(effects, ok, nok) {
const self = this;
/** @type {number} */
let marker;
/** @type {boolean} */
let closingTag;
/** @type {string} */
let buffer;
/** @type {number} */
let index;
/** @type {Code} */
let markerB;
return start;
/**
* Start of HTML (flow).
*
* ```markdown
* > | <x />
* ^
* ```
*
* @type {State}
*/
function start(code) {
// To do: parse indent like `markdown-rs`.
return before(code);
}
/**
* At `<`, after optional whitespace.
*
* ```markdown
* > | <x />
* ^
* ```
*
* @type {State}
*/
function before(code) {
effects.enter("htmlFlow");
effects.enter("htmlFlowData");
effects.consume(code);
return open;
}
/**
* After `<`, at tag name or other stuff.
*
* ```markdown
* > | <x />
* ^
* > | <!doctype>
* ^
* > | <!--xxx-->
* ^
* ```
*
* @type {State}
*/
function open(code) {
if (code === 33) {
effects.consume(code);
return declarationOpen;
}
if (code === 47) {
effects.consume(code);
closingTag = true;
return tagCloseStart;
}
if (code === 63) {
effects.consume(code);
marker = 3;
// To do:
// tokenizer.concrete = true
// To do: use `markdown-rs` style interrupt.
// While were in an instruction instead of a declaration, were on a `?`
// right now, so we do need to search for `>`, similar to declarations.
return self.interrupt ? ok : continuationDeclarationInside;
}
// ASCII alphabetical.
if (asciiAlpha(code)) {
// Always the case.
effects.consume(code);
buffer = String.fromCharCode(code);
return tagName;
}
return nok(code);
}
/**
* After `<!`, at declaration, comment, or CDATA.
*
* ```markdown
* > | <!doctype>
* ^
* > | <!--xxx-->
* ^
* > | <![CDATA[>&<]]>
* ^
* ```
*
* @type {State}
*/
function declarationOpen(code) {
if (code === 45) {
effects.consume(code);
marker = 2;
return commentOpenInside;
}
if (code === 91) {
effects.consume(code);
marker = 5;
index = 0;
return cdataOpenInside;
}
// ASCII alphabetical.
if (asciiAlpha(code)) {
effects.consume(code);
marker = 4;
// // Do not form containers.
// tokenizer.concrete = true
return self.interrupt ? ok : continuationDeclarationInside;
}
return nok(code);
}
/**
* After `<!-`, inside a comment, at another `-`.
*
* ```markdown
* > | <!--xxx-->
* ^
* ```
*
* @type {State}
*/
function commentOpenInside(code) {
if (code === 45) {
effects.consume(code);
// // Do not form containers.
// tokenizer.concrete = true
return self.interrupt ? ok : continuationDeclarationInside;
}
return nok(code);
}
/**
* After `<![`, inside CDATA, expecting `CDATA[`.
*
* ```markdown
* > | <![CDATA[>&<]]>
* ^^^^^^
* ```
*
* @type {State}
*/
function cdataOpenInside(code) {
const value = "CDATA[";
if (code === value.charCodeAt(index++)) {
effects.consume(code);
if (index === value.length) {
// // Do not form containers.
// tokenizer.concrete = true
return self.interrupt ? ok : continuation;
}
return cdataOpenInside;
}
return nok(code);
}
/**
* After `</`, in closing tag, at tag name.
*
* ```markdown
* > | </x>
* ^
* ```
*
* @type {State}
*/
function tagCloseStart(code) {
if (asciiAlpha(code)) {
// Always the case.
effects.consume(code);
buffer = String.fromCharCode(code);
return tagName;
}
return nok(code);
}
/**
* In tag name.
*
* ```markdown
* > | <ab>
* ^^
* > | </ab>
* ^^
* ```
*
* @type {State}
*/
function tagName(code) {
if (code === null || code === 47 || code === 62 || markdownLineEndingOrSpace(code)) {
const slash = code === 47;
const name = buffer.toLowerCase();
if (!slash && !closingTag && htmlRawNames.includes(name)) {
marker = 1;
// // Do not form containers.
// tokenizer.concrete = true
return self.interrupt ? ok(code) : continuation(code);
}
if (htmlBlockNames.includes(buffer.toLowerCase())) {
marker = 6;
if (slash) {
effects.consume(code);
return basicSelfClosing;
}
// // Do not form containers.
// tokenizer.concrete = true
return self.interrupt ? ok(code) : continuation(code);
}
marker = 7;
// Do not support complete HTML when interrupting.
return self.interrupt && !self.parser.lazy[self.now().line] ? nok(code) : closingTag ? completeClosingTagAfter(code) : completeAttributeNameBefore(code);
}
// ASCII alphanumerical and `-`.
if (code === 45 || asciiAlphanumeric(code)) {
effects.consume(code);
buffer += String.fromCharCode(code);
return tagName;
}
return nok(code);
}
/**
* After closing slash of a basic tag name.
*
* ```markdown
* > | <div/>
* ^
* ```
*
* @type {State}
*/
function basicSelfClosing(code) {
if (code === 62) {
effects.consume(code);
// // Do not form containers.
// tokenizer.concrete = true
return self.interrupt ? ok : continuation;
}
return nok(code);
}
/**
* After closing slash of a complete tag name.
*
* ```markdown
* > | <x/>
* ^
* ```
*
* @type {State}
*/
function completeClosingTagAfter(code) {
if (markdownSpace(code)) {
effects.consume(code);
return completeClosingTagAfter;
}
return completeEnd(code);
}
/**
* At an attribute name.
*
* At first, this state is used after a complete tag name, after whitespace,
* where it expects optional attributes or the end of the tag.
* It is also reused after attributes, when expecting more optional
* attributes.
*
* ```markdown
* > | <a />
* ^
* > | <a :b>
* ^
* > | <a _b>
* ^
* > | <a b>
* ^
* > | <a >
* ^
* ```
*
* @type {State}
*/
function completeAttributeNameBefore(code) {
if (code === 47) {
effects.consume(code);
return completeEnd;
}
// ASCII alphanumerical and `:` and `_`.
if (code === 58 || code === 95 || asciiAlpha(code)) {
effects.consume(code);
return completeAttributeName;
}
if (markdownSpace(code)) {
effects.consume(code);
return completeAttributeNameBefore;
}
return completeEnd(code);
}
/**
* In attribute name.
*
* ```markdown
* > | <a :b>
* ^
* > | <a _b>
* ^
* > | <a b>
* ^
* ```
*
* @type {State}
*/
function completeAttributeName(code) {
// ASCII alphanumerical and `-`, `.`, `:`, and `_`.
if (code === 45 || code === 46 || code === 58 || code === 95 || asciiAlphanumeric(code)) {
effects.consume(code);
return completeAttributeName;
}
return completeAttributeNameAfter(code);
}
/**
* After attribute name, at an optional initializer, the end of the tag, or
* whitespace.
*
* ```markdown
* > | <a b>
* ^
* > | <a b=c>
* ^
* ```
*
* @type {State}
*/
function completeAttributeNameAfter(code) {
if (code === 61) {
effects.consume(code);
return completeAttributeValueBefore;
}
if (markdownSpace(code)) {
effects.consume(code);
return completeAttributeNameAfter;
}
return completeAttributeNameBefore(code);
}
/**
* Before unquoted, double quoted, or single quoted attribute value, allowing
* whitespace.
*
* ```markdown
* > | <a b=c>
* ^
* > | <a b="c">
* ^
* ```
*
* @type {State}
*/
function completeAttributeValueBefore(code) {
if (code === null || code === 60 || code === 61 || code === 62 || code === 96) {
return nok(code);
}
if (code === 34 || code === 39) {
effects.consume(code);
markerB = code;
return completeAttributeValueQuoted;
}
if (markdownSpace(code)) {
effects.consume(code);
return completeAttributeValueBefore;
}
return completeAttributeValueUnquoted(code);
}
/**
* In double or single quoted attribute value.
*
* ```markdown
* > | <a b="c">
* ^
* > | <a b='c'>
* ^
* ```
*
* @type {State}
*/
function completeAttributeValueQuoted(code) {
if (code === markerB) {
effects.consume(code);
markerB = null;
return completeAttributeValueQuotedAfter;
}
if (code === null || markdownLineEnding(code)) {
return nok(code);
}
effects.consume(code);
return completeAttributeValueQuoted;
}
/**
* In unquoted attribute value.
*
* ```markdown
* > | <a b=c>
* ^
* ```
*
* @type {State}
*/
function completeAttributeValueUnquoted(code) {
if (code === null || code === 34 || code === 39 || code === 47 || code === 60 || code === 61 || code === 62 || code === 96 || markdownLineEndingOrSpace(code)) {
return completeAttributeNameAfter(code);
}
effects.consume(code);
return completeAttributeValueUnquoted;
}
/**
* After double or single quoted attribute value, before whitespace or the
* end of the tag.
*
* ```markdown
* > | <a b="c">
* ^
* ```
*
* @type {State}
*/
function completeAttributeValueQuotedAfter(code) {
if (code === 47 || code === 62 || markdownSpace(code)) {
return completeAttributeNameBefore(code);
}
return nok(code);
}
/**
* In certain circumstances of a complete tag where only an `>` is allowed.
*
* ```markdown
* > | <a b="c">
* ^
* ```
*
* @type {State}
*/
function completeEnd(code) {
if (code === 62) {
effects.consume(code);
return completeAfter;
}
return nok(code);
}
/**
* After `>` in a complete tag.
*
* ```markdown
* > | <x>
* ^
* ```
*
* @type {State}
*/
function completeAfter(code) {
if (code === null || markdownLineEnding(code)) {
// // Do not form containers.
// tokenizer.concrete = true
return continuation(code);
}
if (markdownSpace(code)) {
effects.consume(code);
return completeAfter;
}
return nok(code);
}
/**
* In continuation of any HTML kind.
*
* ```markdown
* > | <!--xxx-->
* ^
* ```
*
* @type {State}
*/
function continuation(code) {
if (code === 45 && marker === 2) {
effects.consume(code);
return continuationCommentInside;
}
if (code === 60 && marker === 1) {
effects.consume(code);
return continuationRawTagOpen;
}
if (code === 62 && marker === 4) {
effects.consume(code);
return continuationClose;
}
if (code === 63 && marker === 3) {
effects.consume(code);
return continuationDeclarationInside;
}
if (code === 93 && marker === 5) {
effects.consume(code);
return continuationCdataInside;
}
if (markdownLineEnding(code) && (marker === 6 || marker === 7)) {
effects.exit("htmlFlowData");
return effects.check(blankLineBefore, continuationAfter, continuationStart)(code);
}
if (code === null || markdownLineEnding(code)) {
effects.exit("htmlFlowData");
return continuationStart(code);
}
effects.consume(code);
return continuation;
}
/**
* In continuation, at eol.
*
* ```markdown
* > | <x>
* ^
* | asd
* ```
*
* @type {State}
*/
function continuationStart(code) {
return effects.check(nonLazyContinuationStart, continuationStartNonLazy, continuationAfter)(code);
}
/**
* In continuation, at eol, before non-lazy content.
*
* ```markdown
* > | <x>
* ^
* | asd
* ```
*
* @type {State}
*/
function continuationStartNonLazy(code) {
effects.enter("lineEnding");
effects.consume(code);
effects.exit("lineEnding");
return continuationBefore;
}
/**
* In continuation, before non-lazy content.
*
* ```markdown
* | <x>
* > | asd
* ^
* ```
*
* @type {State}
*/
function continuationBefore(code) {
if (code === null || markdownLineEnding(code)) {
return continuationStart(code);
}
effects.enter("htmlFlowData");
return continuation(code);
}
/**
* In comment continuation, after one `-`, expecting another.
*
* ```markdown
* > | <!--xxx-->
* ^
* ```
*
* @type {State}
*/
function continuationCommentInside(code) {
if (code === 45) {
effects.consume(code);
return continuationDeclarationInside;
}
return continuation(code);
}
/**
* In raw continuation, after `<`, at `/`.
*
* ```markdown
* > | <script>console.log(1)</script>
* ^
* ```
*
* @type {State}
*/
function continuationRawTagOpen(code) {
if (code === 47) {
effects.consume(code);
buffer = '';
return continuationRawEndTag;
}
return continuation(code);
}
/**
* In raw continuation, after `</`, in a raw tag name.
*
* ```markdown
* > | <script>console.log(1)</script>
* ^^^^^^
* ```
*
* @type {State}
*/
function continuationRawEndTag(code) {
if (code === 62) {
const name = buffer.toLowerCase();
if (htmlRawNames.includes(name)) {
effects.consume(code);
return continuationClose;
}
return continuation(code);
}
if (asciiAlpha(code) && buffer.length < 8) {
// Always the case.
effects.consume(code);
buffer += String.fromCharCode(code);
return continuationRawEndTag;
}
return continuation(code);
}
/**
* In cdata continuation, after `]`, expecting `]>`.
*
* ```markdown
* > | <![CDATA[>&<]]>
* ^
* ```
*
* @type {State}
*/
function continuationCdataInside(code) {
if (code === 93) {
effects.consume(code);
return continuationDeclarationInside;
}
return continuation(code);
}
/**
* In declaration or instruction continuation, at `>`.
*
* ```markdown
* > | <!-->
* ^
* > | <?>
* ^
* > | <!q>
* ^
* > | <!--ab-->
* ^
* > | <![CDATA[>&<]]>
* ^
* ```
*
* @type {State}
*/
function continuationDeclarationInside(code) {
if (code === 62) {
effects.consume(code);
return continuationClose;
}
// More dashes.
if (code === 45 && marker === 2) {
effects.consume(code);
return continuationDeclarationInside;
}
return continuation(code);
}
/**
* In closed continuation: everything we get until the eol/eof is part of it.
*
* ```markdown
* > | <!doctype>
* ^
* ```
*
* @type {State}
*/
function continuationClose(code) {
if (code === null || markdownLineEnding(code)) {
effects.exit("htmlFlowData");
return continuationAfter(code);
}
effects.consume(code);
return continuationClose;
}
/**
* Done.
*
* ```markdown
* > | <!doctype>
* ^
* ```
*
* @type {State}
*/
function continuationAfter(code) {
effects.exit("htmlFlow");
// // Feel free to interrupt.
// tokenizer.interrupt = false
// // No longer concrete.
// tokenizer.concrete = false
return ok(code);
}
}
/**
* @this {TokenizeContext}
* Context.
* @type {Tokenizer}
*/
function tokenizeNonLazyContinuationStart(effects, ok, nok) {
const self = this;
return start;
/**
* At eol, before continuation.
*
* ```markdown
* > | * ```js
* ^
* | b
* ```
*
* @type {State}
*/
function start(code) {
if (markdownLineEnding(code)) {
effects.enter("lineEnding");
effects.consume(code);
effects.exit("lineEnding");
return after;
}
return nok(code);
}
/**
* A continuation.
*
* ```markdown
* | * ```js
* > | b
* ^
* ```
*
* @type {State}
*/
function after(code) {
return self.parser.lazy[self.now().line] ? nok(code) : ok(code);
}
}
/**
* @this {TokenizeContext}
* Context.
* @type {Tokenizer}
*/
function tokenizeBlankLineBefore(effects, ok, nok) {
return start;
/**
* Before eol, expecting blank line.
*
* ```markdown
* > | <div>
* ^
* |
* ```
*
* @type {State}
*/
function start(code) {
effects.enter("lineEnding");
effects.consume(code);
effects.exit("lineEnding");
return effects.attempt(blankLine, ok, nok);
}
}