knowledgebase_law/node_modules/micromark-core-commonmark/lib/html-text.js

678 lines
13 KiB
JavaScript
Raw Normal View History

2025-04-11 23:47:09 +08:00
/**
* @import {
* Code,
* Construct,
* State,
* TokenizeContext,
* Tokenizer
* } from 'micromark-util-types'
*/
import { factorySpace } from 'micromark-factory-space';
import { asciiAlphanumeric, asciiAlpha, markdownLineEndingOrSpace, markdownLineEnding, markdownSpace } from 'micromark-util-character';
/** @type {Construct} */
export const htmlText = {
name: 'htmlText',
tokenize: tokenizeHtmlText
};
/**
* @this {TokenizeContext}
* Context.
* @type {Tokenizer}
*/
function tokenizeHtmlText(effects, ok, nok) {
const self = this;
/** @type {NonNullable<Code> | undefined} */
let marker;
/** @type {number} */
let index;
/** @type {State} */
let returnState;
return start;
/**
* Start of HTML (text).
*
* ```markdown
* > | a <b> c
* ^
* ```
*
* @type {State}
*/
function start(code) {
effects.enter("htmlText");
effects.enter("htmlTextData");
effects.consume(code);
return open;
}
/**
* After `<`, at tag name or other stuff.
*
* ```markdown
* > | a <b> c
* ^
* > | a <!doctype> c
* ^
* > | a <!--b--> c
* ^
* ```
*
* @type {State}
*/
function open(code) {
if (code === 33) {
effects.consume(code);
return declarationOpen;
}
if (code === 47) {
effects.consume(code);
return tagCloseStart;
}
if (code === 63) {
effects.consume(code);
return instruction;
}
// ASCII alphabetical.
if (asciiAlpha(code)) {
effects.consume(code);
return tagOpen;
}
return nok(code);
}
/**
* After `<!`, at declaration, comment, or CDATA.
*
* ```markdown
* > | a <!doctype> c
* ^
* > | a <!--b--> c
* ^
* > | a <![CDATA[>&<]]> c
* ^
* ```
*
* @type {State}
*/
function declarationOpen(code) {
if (code === 45) {
effects.consume(code);
return commentOpenInside;
}
if (code === 91) {
effects.consume(code);
index = 0;
return cdataOpenInside;
}
if (asciiAlpha(code)) {
effects.consume(code);
return declaration;
}
return nok(code);
}
/**
* In a comment, after `<!-`, at another `-`.
*
* ```markdown
* > | a <!--b--> c
* ^
* ```
*
* @type {State}
*/
function commentOpenInside(code) {
if (code === 45) {
effects.consume(code);
return commentEnd;
}
return nok(code);
}
/**
* In comment.
*
* ```markdown
* > | a <!--b--> c
* ^
* ```
*
* @type {State}
*/
function comment(code) {
if (code === null) {
return nok(code);
}
if (code === 45) {
effects.consume(code);
return commentClose;
}
if (markdownLineEnding(code)) {
returnState = comment;
return lineEndingBefore(code);
}
effects.consume(code);
return comment;
}
/**
* In comment, after `-`.
*
* ```markdown
* > | a <!--b--> c
* ^
* ```
*
* @type {State}
*/
function commentClose(code) {
if (code === 45) {
effects.consume(code);
return commentEnd;
}
return comment(code);
}
/**
* In comment, after `--`.
*
* ```markdown
* > | a <!--b--> c
* ^
* ```
*
* @type {State}
*/
function commentEnd(code) {
return code === 62 ? end(code) : code === 45 ? commentClose(code) : comment(code);
}
/**
* After `<![`, in CDATA, expecting `CDATA[`.
*
* ```markdown
* > | a <![CDATA[>&<]]> b
* ^^^^^^
* ```
*
* @type {State}
*/
function cdataOpenInside(code) {
const value = "CDATA[";
if (code === value.charCodeAt(index++)) {
effects.consume(code);
return index === value.length ? cdata : cdataOpenInside;
}
return nok(code);
}
/**
* In CDATA.
*
* ```markdown
* > | a <![CDATA[>&<]]> b
* ^^^
* ```
*
* @type {State}
*/
function cdata(code) {
if (code === null) {
return nok(code);
}
if (code === 93) {
effects.consume(code);
return cdataClose;
}
if (markdownLineEnding(code)) {
returnState = cdata;
return lineEndingBefore(code);
}
effects.consume(code);
return cdata;
}
/**
* In CDATA, after `]`, at another `]`.
*
* ```markdown
* > | a <![CDATA[>&<]]> b
* ^
* ```
*
* @type {State}
*/
function cdataClose(code) {
if (code === 93) {
effects.consume(code);
return cdataEnd;
}
return cdata(code);
}
/**
* In CDATA, after `]]`, at `>`.
*
* ```markdown
* > | a <![CDATA[>&<]]> b
* ^
* ```
*
* @type {State}
*/
function cdataEnd(code) {
if (code === 62) {
return end(code);
}
if (code === 93) {
effects.consume(code);
return cdataEnd;
}
return cdata(code);
}
/**
* In declaration.
*
* ```markdown
* > | a <!b> c
* ^
* ```
*
* @type {State}
*/
function declaration(code) {
if (code === null || code === 62) {
return end(code);
}
if (markdownLineEnding(code)) {
returnState = declaration;
return lineEndingBefore(code);
}
effects.consume(code);
return declaration;
}
/**
* In instruction.
*
* ```markdown
* > | a <?b?> c
* ^
* ```
*
* @type {State}
*/
function instruction(code) {
if (code === null) {
return nok(code);
}
if (code === 63) {
effects.consume(code);
return instructionClose;
}
if (markdownLineEnding(code)) {
returnState = instruction;
return lineEndingBefore(code);
}
effects.consume(code);
return instruction;
}
/**
* In instruction, after `?`, at `>`.
*
* ```markdown
* > | a <?b?> c
* ^
* ```
*
* @type {State}
*/
function instructionClose(code) {
return code === 62 ? end(code) : instruction(code);
}
/**
* After `</`, in closing tag, at tag name.
*
* ```markdown
* > | a </b> c
* ^
* ```
*
* @type {State}
*/
function tagCloseStart(code) {
// ASCII alphabetical.
if (asciiAlpha(code)) {
effects.consume(code);
return tagClose;
}
return nok(code);
}
/**
* After `</x`, in a tag name.
*
* ```markdown
* > | a </b> c
* ^
* ```
*
* @type {State}
*/
function tagClose(code) {
// ASCII alphanumerical and `-`.
if (code === 45 || asciiAlphanumeric(code)) {
effects.consume(code);
return tagClose;
}
return tagCloseBetween(code);
}
/**
* In closing tag, after tag name.
*
* ```markdown
* > | a </b> c
* ^
* ```
*
* @type {State}
*/
function tagCloseBetween(code) {
if (markdownLineEnding(code)) {
returnState = tagCloseBetween;
return lineEndingBefore(code);
}
if (markdownSpace(code)) {
effects.consume(code);
return tagCloseBetween;
}
return end(code);
}
/**
* After `<x`, in opening tag name.
*
* ```markdown
* > | a <b> c
* ^
* ```
*
* @type {State}
*/
function tagOpen(code) {
// ASCII alphanumerical and `-`.
if (code === 45 || asciiAlphanumeric(code)) {
effects.consume(code);
return tagOpen;
}
if (code === 47 || code === 62 || markdownLineEndingOrSpace(code)) {
return tagOpenBetween(code);
}
return nok(code);
}
/**
* In opening tag, after tag name.
*
* ```markdown
* > | a <b> c
* ^
* ```
*
* @type {State}
*/
function tagOpenBetween(code) {
if (code === 47) {
effects.consume(code);
return end;
}
// ASCII alphabetical and `:` and `_`.
if (code === 58 || code === 95 || asciiAlpha(code)) {
effects.consume(code);
return tagOpenAttributeName;
}
if (markdownLineEnding(code)) {
returnState = tagOpenBetween;
return lineEndingBefore(code);
}
if (markdownSpace(code)) {
effects.consume(code);
return tagOpenBetween;
}
return end(code);
}
/**
* In attribute name.
*
* ```markdown
* > | a <b c> d
* ^
* ```
*
* @type {State}
*/
function tagOpenAttributeName(code) {
// ASCII alphabetical and `-`, `.`, `:`, and `_`.
if (code === 45 || code === 46 || code === 58 || code === 95 || asciiAlphanumeric(code)) {
effects.consume(code);
return tagOpenAttributeName;
}
return tagOpenAttributeNameAfter(code);
}
/**
* After attribute name, before initializer, the end of the tag, or
* whitespace.
*
* ```markdown
* > | a <b c> d
* ^
* ```
*
* @type {State}
*/
function tagOpenAttributeNameAfter(code) {
if (code === 61) {
effects.consume(code);
return tagOpenAttributeValueBefore;
}
if (markdownLineEnding(code)) {
returnState = tagOpenAttributeNameAfter;
return lineEndingBefore(code);
}
if (markdownSpace(code)) {
effects.consume(code);
return tagOpenAttributeNameAfter;
}
return tagOpenBetween(code);
}
/**
* Before unquoted, double quoted, or single quoted attribute value, allowing
* whitespace.
*
* ```markdown
* > | a <b c=d> e
* ^
* ```
*
* @type {State}
*/
function tagOpenAttributeValueBefore(code) {
if (code === null || code === 60 || code === 61 || code === 62 || code === 96) {
return nok(code);
}
if (code === 34 || code === 39) {
effects.consume(code);
marker = code;
return tagOpenAttributeValueQuoted;
}
if (markdownLineEnding(code)) {
returnState = tagOpenAttributeValueBefore;
return lineEndingBefore(code);
}
if (markdownSpace(code)) {
effects.consume(code);
return tagOpenAttributeValueBefore;
}
effects.consume(code);
return tagOpenAttributeValueUnquoted;
}
/**
* In double or single quoted attribute value.
*
* ```markdown
* > | a <b c="d"> e
* ^
* ```
*
* @type {State}
*/
function tagOpenAttributeValueQuoted(code) {
if (code === marker) {
effects.consume(code);
marker = undefined;
return tagOpenAttributeValueQuotedAfter;
}
if (code === null) {
return nok(code);
}
if (markdownLineEnding(code)) {
returnState = tagOpenAttributeValueQuoted;
return lineEndingBefore(code);
}
effects.consume(code);
return tagOpenAttributeValueQuoted;
}
/**
* In unquoted attribute value.
*
* ```markdown
* > | a <b c=d> e
* ^
* ```
*
* @type {State}
*/
function tagOpenAttributeValueUnquoted(code) {
if (code === null || code === 34 || code === 39 || code === 60 || code === 61 || code === 96) {
return nok(code);
}
if (code === 47 || code === 62 || markdownLineEndingOrSpace(code)) {
return tagOpenBetween(code);
}
effects.consume(code);
return tagOpenAttributeValueUnquoted;
}
/**
* After double or single quoted attribute value, before whitespace or the end
* of the tag.
*
* ```markdown
* > | a <b c="d"> e
* ^
* ```
*
* @type {State}
*/
function tagOpenAttributeValueQuotedAfter(code) {
if (code === 47 || code === 62 || markdownLineEndingOrSpace(code)) {
return tagOpenBetween(code);
}
return nok(code);
}
/**
* In certain circumstances of a tag where only an `>` is allowed.
*
* ```markdown
* > | a <b c="d"> e
* ^
* ```
*
* @type {State}
*/
function end(code) {
if (code === 62) {
effects.consume(code);
effects.exit("htmlTextData");
effects.exit("htmlText");
return ok;
}
return nok(code);
}
/**
* At eol.
*
* > 👉 **Note**: we cant have blank lines in text, so no need to worry about
* > empty tokens.
*
* ```markdown
* > | a <!--a
* ^
* | b-->
* ```
*
* @type {State}
*/
function lineEndingBefore(code) {
effects.exit("htmlTextData");
effects.enter("lineEnding");
effects.consume(code);
effects.exit("lineEnding");
return lineEndingAfter;
}
/**
* After eol, at optional whitespace.
*
* > 👉 **Note**: we cant have blank lines in text, so no need to worry about
* > empty tokens.
*
* ```markdown
* | a <!--a
* > | b-->
* ^
* ```
*
* @type {State}
*/
function lineEndingAfter(code) {
// Always populated by defaults.
return markdownSpace(code) ? factorySpace(effects, lineEndingAfterPrefix, "linePrefix", self.parser.constructs.disable.null.includes('codeIndented') ? undefined : 4)(code) : lineEndingAfterPrefix(code);
}
/**
* After eol, after optional whitespace.
*
* > 👉 **Note**: we cant have blank lines in text, so no need to worry about
* > empty tokens.
*
* ```markdown
* | a <!--a
* > | b-->
* ^
* ```
*
* @type {State}
*/
function lineEndingAfterPrefix(code) {
effects.enter("htmlTextData");
return returnState(code);
}
}