mirror of
https://github.com/Funkoala14/knowledgebase_law.git
synced 2025-06-09 04:38:15 +08:00
678 lines
13 KiB
JavaScript
678 lines
13 KiB
JavaScript
|
/**
|
|||
|
* @import {
|
|||
|
* Code,
|
|||
|
* Construct,
|
|||
|
* State,
|
|||
|
* TokenizeContext,
|
|||
|
* Tokenizer
|
|||
|
* } from 'micromark-util-types'
|
|||
|
*/
|
|||
|
|
|||
|
import { factorySpace } from 'micromark-factory-space';
|
|||
|
import { asciiAlphanumeric, asciiAlpha, markdownLineEndingOrSpace, markdownLineEnding, markdownSpace } from 'micromark-util-character';
|
|||
|
/** @type {Construct} */
|
|||
|
export const htmlText = {
|
|||
|
name: 'htmlText',
|
|||
|
tokenize: tokenizeHtmlText
|
|||
|
};
|
|||
|
|
|||
|
/**
|
|||
|
* @this {TokenizeContext}
|
|||
|
* Context.
|
|||
|
* @type {Tokenizer}
|
|||
|
*/
|
|||
|
function tokenizeHtmlText(effects, ok, nok) {
|
|||
|
const self = this;
|
|||
|
/** @type {NonNullable<Code> | undefined} */
|
|||
|
let marker;
|
|||
|
/** @type {number} */
|
|||
|
let index;
|
|||
|
/** @type {State} */
|
|||
|
let returnState;
|
|||
|
return start;
|
|||
|
|
|||
|
/**
|
|||
|
* Start of HTML (text).
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | a <b> c
|
|||
|
* ^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @type {State}
|
|||
|
*/
|
|||
|
function start(code) {
|
|||
|
effects.enter("htmlText");
|
|||
|
effects.enter("htmlTextData");
|
|||
|
effects.consume(code);
|
|||
|
return open;
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* After `<`, at tag name or other stuff.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | a <b> c
|
|||
|
* ^
|
|||
|
* > | a <!doctype> c
|
|||
|
* ^
|
|||
|
* > | a <!--b--> c
|
|||
|
* ^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @type {State}
|
|||
|
*/
|
|||
|
function open(code) {
|
|||
|
if (code === 33) {
|
|||
|
effects.consume(code);
|
|||
|
return declarationOpen;
|
|||
|
}
|
|||
|
if (code === 47) {
|
|||
|
effects.consume(code);
|
|||
|
return tagCloseStart;
|
|||
|
}
|
|||
|
if (code === 63) {
|
|||
|
effects.consume(code);
|
|||
|
return instruction;
|
|||
|
}
|
|||
|
|
|||
|
// ASCII alphabetical.
|
|||
|
if (asciiAlpha(code)) {
|
|||
|
effects.consume(code);
|
|||
|
return tagOpen;
|
|||
|
}
|
|||
|
return nok(code);
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* After `<!`, at declaration, comment, or CDATA.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | a <!doctype> c
|
|||
|
* ^
|
|||
|
* > | a <!--b--> c
|
|||
|
* ^
|
|||
|
* > | a <![CDATA[>&<]]> c
|
|||
|
* ^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @type {State}
|
|||
|
*/
|
|||
|
function declarationOpen(code) {
|
|||
|
if (code === 45) {
|
|||
|
effects.consume(code);
|
|||
|
return commentOpenInside;
|
|||
|
}
|
|||
|
if (code === 91) {
|
|||
|
effects.consume(code);
|
|||
|
index = 0;
|
|||
|
return cdataOpenInside;
|
|||
|
}
|
|||
|
if (asciiAlpha(code)) {
|
|||
|
effects.consume(code);
|
|||
|
return declaration;
|
|||
|
}
|
|||
|
return nok(code);
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* In a comment, after `<!-`, at another `-`.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | a <!--b--> c
|
|||
|
* ^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @type {State}
|
|||
|
*/
|
|||
|
function commentOpenInside(code) {
|
|||
|
if (code === 45) {
|
|||
|
effects.consume(code);
|
|||
|
return commentEnd;
|
|||
|
}
|
|||
|
return nok(code);
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* In comment.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | a <!--b--> c
|
|||
|
* ^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @type {State}
|
|||
|
*/
|
|||
|
function comment(code) {
|
|||
|
if (code === null) {
|
|||
|
return nok(code);
|
|||
|
}
|
|||
|
if (code === 45) {
|
|||
|
effects.consume(code);
|
|||
|
return commentClose;
|
|||
|
}
|
|||
|
if (markdownLineEnding(code)) {
|
|||
|
returnState = comment;
|
|||
|
return lineEndingBefore(code);
|
|||
|
}
|
|||
|
effects.consume(code);
|
|||
|
return comment;
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* In comment, after `-`.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | a <!--b--> c
|
|||
|
* ^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @type {State}
|
|||
|
*/
|
|||
|
function commentClose(code) {
|
|||
|
if (code === 45) {
|
|||
|
effects.consume(code);
|
|||
|
return commentEnd;
|
|||
|
}
|
|||
|
return comment(code);
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* In comment, after `--`.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | a <!--b--> c
|
|||
|
* ^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @type {State}
|
|||
|
*/
|
|||
|
function commentEnd(code) {
|
|||
|
return code === 62 ? end(code) : code === 45 ? commentClose(code) : comment(code);
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* After `<![`, in CDATA, expecting `CDATA[`.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | a <![CDATA[>&<]]> b
|
|||
|
* ^^^^^^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @type {State}
|
|||
|
*/
|
|||
|
function cdataOpenInside(code) {
|
|||
|
const value = "CDATA[";
|
|||
|
if (code === value.charCodeAt(index++)) {
|
|||
|
effects.consume(code);
|
|||
|
return index === value.length ? cdata : cdataOpenInside;
|
|||
|
}
|
|||
|
return nok(code);
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* In CDATA.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | a <![CDATA[>&<]]> b
|
|||
|
* ^^^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @type {State}
|
|||
|
*/
|
|||
|
function cdata(code) {
|
|||
|
if (code === null) {
|
|||
|
return nok(code);
|
|||
|
}
|
|||
|
if (code === 93) {
|
|||
|
effects.consume(code);
|
|||
|
return cdataClose;
|
|||
|
}
|
|||
|
if (markdownLineEnding(code)) {
|
|||
|
returnState = cdata;
|
|||
|
return lineEndingBefore(code);
|
|||
|
}
|
|||
|
effects.consume(code);
|
|||
|
return cdata;
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* In CDATA, after `]`, at another `]`.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | a <![CDATA[>&<]]> b
|
|||
|
* ^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @type {State}
|
|||
|
*/
|
|||
|
function cdataClose(code) {
|
|||
|
if (code === 93) {
|
|||
|
effects.consume(code);
|
|||
|
return cdataEnd;
|
|||
|
}
|
|||
|
return cdata(code);
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* In CDATA, after `]]`, at `>`.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | a <![CDATA[>&<]]> b
|
|||
|
* ^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @type {State}
|
|||
|
*/
|
|||
|
function cdataEnd(code) {
|
|||
|
if (code === 62) {
|
|||
|
return end(code);
|
|||
|
}
|
|||
|
if (code === 93) {
|
|||
|
effects.consume(code);
|
|||
|
return cdataEnd;
|
|||
|
}
|
|||
|
return cdata(code);
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* In declaration.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | a <!b> c
|
|||
|
* ^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @type {State}
|
|||
|
*/
|
|||
|
function declaration(code) {
|
|||
|
if (code === null || code === 62) {
|
|||
|
return end(code);
|
|||
|
}
|
|||
|
if (markdownLineEnding(code)) {
|
|||
|
returnState = declaration;
|
|||
|
return lineEndingBefore(code);
|
|||
|
}
|
|||
|
effects.consume(code);
|
|||
|
return declaration;
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* In instruction.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | a <?b?> c
|
|||
|
* ^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @type {State}
|
|||
|
*/
|
|||
|
function instruction(code) {
|
|||
|
if (code === null) {
|
|||
|
return nok(code);
|
|||
|
}
|
|||
|
if (code === 63) {
|
|||
|
effects.consume(code);
|
|||
|
return instructionClose;
|
|||
|
}
|
|||
|
if (markdownLineEnding(code)) {
|
|||
|
returnState = instruction;
|
|||
|
return lineEndingBefore(code);
|
|||
|
}
|
|||
|
effects.consume(code);
|
|||
|
return instruction;
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* In instruction, after `?`, at `>`.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | a <?b?> c
|
|||
|
* ^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @type {State}
|
|||
|
*/
|
|||
|
function instructionClose(code) {
|
|||
|
return code === 62 ? end(code) : instruction(code);
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* After `</`, in closing tag, at tag name.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | a </b> c
|
|||
|
* ^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @type {State}
|
|||
|
*/
|
|||
|
function tagCloseStart(code) {
|
|||
|
// ASCII alphabetical.
|
|||
|
if (asciiAlpha(code)) {
|
|||
|
effects.consume(code);
|
|||
|
return tagClose;
|
|||
|
}
|
|||
|
return nok(code);
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* After `</x`, in a tag name.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | a </b> c
|
|||
|
* ^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @type {State}
|
|||
|
*/
|
|||
|
function tagClose(code) {
|
|||
|
// ASCII alphanumerical and `-`.
|
|||
|
if (code === 45 || asciiAlphanumeric(code)) {
|
|||
|
effects.consume(code);
|
|||
|
return tagClose;
|
|||
|
}
|
|||
|
return tagCloseBetween(code);
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* In closing tag, after tag name.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | a </b> c
|
|||
|
* ^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @type {State}
|
|||
|
*/
|
|||
|
function tagCloseBetween(code) {
|
|||
|
if (markdownLineEnding(code)) {
|
|||
|
returnState = tagCloseBetween;
|
|||
|
return lineEndingBefore(code);
|
|||
|
}
|
|||
|
if (markdownSpace(code)) {
|
|||
|
effects.consume(code);
|
|||
|
return tagCloseBetween;
|
|||
|
}
|
|||
|
return end(code);
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* After `<x`, in opening tag name.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | a <b> c
|
|||
|
* ^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @type {State}
|
|||
|
*/
|
|||
|
function tagOpen(code) {
|
|||
|
// ASCII alphanumerical and `-`.
|
|||
|
if (code === 45 || asciiAlphanumeric(code)) {
|
|||
|
effects.consume(code);
|
|||
|
return tagOpen;
|
|||
|
}
|
|||
|
if (code === 47 || code === 62 || markdownLineEndingOrSpace(code)) {
|
|||
|
return tagOpenBetween(code);
|
|||
|
}
|
|||
|
return nok(code);
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* In opening tag, after tag name.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | a <b> c
|
|||
|
* ^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @type {State}
|
|||
|
*/
|
|||
|
function tagOpenBetween(code) {
|
|||
|
if (code === 47) {
|
|||
|
effects.consume(code);
|
|||
|
return end;
|
|||
|
}
|
|||
|
|
|||
|
// ASCII alphabetical and `:` and `_`.
|
|||
|
if (code === 58 || code === 95 || asciiAlpha(code)) {
|
|||
|
effects.consume(code);
|
|||
|
return tagOpenAttributeName;
|
|||
|
}
|
|||
|
if (markdownLineEnding(code)) {
|
|||
|
returnState = tagOpenBetween;
|
|||
|
return lineEndingBefore(code);
|
|||
|
}
|
|||
|
if (markdownSpace(code)) {
|
|||
|
effects.consume(code);
|
|||
|
return tagOpenBetween;
|
|||
|
}
|
|||
|
return end(code);
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* In attribute name.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | a <b c> d
|
|||
|
* ^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @type {State}
|
|||
|
*/
|
|||
|
function tagOpenAttributeName(code) {
|
|||
|
// ASCII alphabetical and `-`, `.`, `:`, and `_`.
|
|||
|
if (code === 45 || code === 46 || code === 58 || code === 95 || asciiAlphanumeric(code)) {
|
|||
|
effects.consume(code);
|
|||
|
return tagOpenAttributeName;
|
|||
|
}
|
|||
|
return tagOpenAttributeNameAfter(code);
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* After attribute name, before initializer, the end of the tag, or
|
|||
|
* whitespace.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | a <b c> d
|
|||
|
* ^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @type {State}
|
|||
|
*/
|
|||
|
function tagOpenAttributeNameAfter(code) {
|
|||
|
if (code === 61) {
|
|||
|
effects.consume(code);
|
|||
|
return tagOpenAttributeValueBefore;
|
|||
|
}
|
|||
|
if (markdownLineEnding(code)) {
|
|||
|
returnState = tagOpenAttributeNameAfter;
|
|||
|
return lineEndingBefore(code);
|
|||
|
}
|
|||
|
if (markdownSpace(code)) {
|
|||
|
effects.consume(code);
|
|||
|
return tagOpenAttributeNameAfter;
|
|||
|
}
|
|||
|
return tagOpenBetween(code);
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* Before unquoted, double quoted, or single quoted attribute value, allowing
|
|||
|
* whitespace.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | a <b c=d> e
|
|||
|
* ^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @type {State}
|
|||
|
*/
|
|||
|
function tagOpenAttributeValueBefore(code) {
|
|||
|
if (code === null || code === 60 || code === 61 || code === 62 || code === 96) {
|
|||
|
return nok(code);
|
|||
|
}
|
|||
|
if (code === 34 || code === 39) {
|
|||
|
effects.consume(code);
|
|||
|
marker = code;
|
|||
|
return tagOpenAttributeValueQuoted;
|
|||
|
}
|
|||
|
if (markdownLineEnding(code)) {
|
|||
|
returnState = tagOpenAttributeValueBefore;
|
|||
|
return lineEndingBefore(code);
|
|||
|
}
|
|||
|
if (markdownSpace(code)) {
|
|||
|
effects.consume(code);
|
|||
|
return tagOpenAttributeValueBefore;
|
|||
|
}
|
|||
|
effects.consume(code);
|
|||
|
return tagOpenAttributeValueUnquoted;
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* In double or single quoted attribute value.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | a <b c="d"> e
|
|||
|
* ^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @type {State}
|
|||
|
*/
|
|||
|
function tagOpenAttributeValueQuoted(code) {
|
|||
|
if (code === marker) {
|
|||
|
effects.consume(code);
|
|||
|
marker = undefined;
|
|||
|
return tagOpenAttributeValueQuotedAfter;
|
|||
|
}
|
|||
|
if (code === null) {
|
|||
|
return nok(code);
|
|||
|
}
|
|||
|
if (markdownLineEnding(code)) {
|
|||
|
returnState = tagOpenAttributeValueQuoted;
|
|||
|
return lineEndingBefore(code);
|
|||
|
}
|
|||
|
effects.consume(code);
|
|||
|
return tagOpenAttributeValueQuoted;
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* In unquoted attribute value.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | a <b c=d> e
|
|||
|
* ^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @type {State}
|
|||
|
*/
|
|||
|
function tagOpenAttributeValueUnquoted(code) {
|
|||
|
if (code === null || code === 34 || code === 39 || code === 60 || code === 61 || code === 96) {
|
|||
|
return nok(code);
|
|||
|
}
|
|||
|
if (code === 47 || code === 62 || markdownLineEndingOrSpace(code)) {
|
|||
|
return tagOpenBetween(code);
|
|||
|
}
|
|||
|
effects.consume(code);
|
|||
|
return tagOpenAttributeValueUnquoted;
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* After double or single quoted attribute value, before whitespace or the end
|
|||
|
* of the tag.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | a <b c="d"> e
|
|||
|
* ^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @type {State}
|
|||
|
*/
|
|||
|
function tagOpenAttributeValueQuotedAfter(code) {
|
|||
|
if (code === 47 || code === 62 || markdownLineEndingOrSpace(code)) {
|
|||
|
return tagOpenBetween(code);
|
|||
|
}
|
|||
|
return nok(code);
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* In certain circumstances of a tag where only an `>` is allowed.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | a <b c="d"> e
|
|||
|
* ^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @type {State}
|
|||
|
*/
|
|||
|
function end(code) {
|
|||
|
if (code === 62) {
|
|||
|
effects.consume(code);
|
|||
|
effects.exit("htmlTextData");
|
|||
|
effects.exit("htmlText");
|
|||
|
return ok;
|
|||
|
}
|
|||
|
return nok(code);
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* At eol.
|
|||
|
*
|
|||
|
* > 👉 **Note**: we can’t have blank lines in text, so no need to worry about
|
|||
|
* > empty tokens.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | a <!--a
|
|||
|
* ^
|
|||
|
* | b-->
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @type {State}
|
|||
|
*/
|
|||
|
function lineEndingBefore(code) {
|
|||
|
effects.exit("htmlTextData");
|
|||
|
effects.enter("lineEnding");
|
|||
|
effects.consume(code);
|
|||
|
effects.exit("lineEnding");
|
|||
|
return lineEndingAfter;
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* After eol, at optional whitespace.
|
|||
|
*
|
|||
|
* > 👉 **Note**: we can’t have blank lines in text, so no need to worry about
|
|||
|
* > empty tokens.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* | a <!--a
|
|||
|
* > | b-->
|
|||
|
* ^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @type {State}
|
|||
|
*/
|
|||
|
function lineEndingAfter(code) {
|
|||
|
// Always populated by defaults.
|
|||
|
|
|||
|
return markdownSpace(code) ? factorySpace(effects, lineEndingAfterPrefix, "linePrefix", self.parser.constructs.disable.null.includes('codeIndented') ? undefined : 4)(code) : lineEndingAfterPrefix(code);
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* After eol, after optional whitespace.
|
|||
|
*
|
|||
|
* > 👉 **Note**: we can’t have blank lines in text, so no need to worry about
|
|||
|
* > empty tokens.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* | a <!--a
|
|||
|
* > | b-->
|
|||
|
* ^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @type {State}
|
|||
|
*/
|
|||
|
function lineEndingAfterPrefix(code) {
|
|||
|
effects.enter("htmlTextData");
|
|||
|
return returnState(code);
|
|||
|
}
|
|||
|
}
|