/** * @import { * Code, * Construct, * State, * TokenizeContext, * Tokenizer * } from 'micromark-util-types' */ import { factorySpace } from 'micromark-factory-space'; import { asciiAlphanumeric, asciiAlpha, markdownLineEndingOrSpace, markdownLineEnding, markdownSpace } from 'micromark-util-character'; /** @type {Construct} */ export const htmlText = { name: 'htmlText', tokenize: tokenizeHtmlText }; /** * @this {TokenizeContext} * Context. * @type {Tokenizer} */ function tokenizeHtmlText(effects, ok, nok) { const self = this; /** @type {NonNullable | undefined} */ let marker; /** @type {number} */ let index; /** @type {State} */ let returnState; return start; /** * Start of HTML (text). * * ```markdown * > | a c * ^ * ``` * * @type {State} */ function start(code) { effects.enter("htmlText"); effects.enter("htmlTextData"); effects.consume(code); return open; } /** * After `<`, at tag name or other stuff. * * ```markdown * > | a c * ^ * > | a c * ^ * > | a c * ^ * ``` * * @type {State} */ function open(code) { if (code === 33) { effects.consume(code); return declarationOpen; } if (code === 47) { effects.consume(code); return tagCloseStart; } if (code === 63) { effects.consume(code); return instruction; } // ASCII alphabetical. if (asciiAlpha(code)) { effects.consume(code); return tagOpen; } return nok(code); } /** * After ` | a c * ^ * > | a c * ^ * > | a &<]]> c * ^ * ``` * * @type {State} */ function declarationOpen(code) { if (code === 45) { effects.consume(code); return commentOpenInside; } if (code === 91) { effects.consume(code); index = 0; return cdataOpenInside; } if (asciiAlpha(code)) { effects.consume(code); return declaration; } return nok(code); } /** * In a comment, after ` | a c * ^ * ``` * * @type {State} */ function commentOpenInside(code) { if (code === 45) { effects.consume(code); return commentEnd; } return nok(code); } /** * In comment. * * ```markdown * > | a c * ^ * ``` * * @type {State} */ function comment(code) { if (code === null) { return nok(code); } if (code === 45) { effects.consume(code); return commentClose; } if (markdownLineEnding(code)) { returnState = comment; return lineEndingBefore(code); } effects.consume(code); return comment; } /** * In comment, after `-`. * * ```markdown * > | a c * ^ * ``` * * @type {State} */ function commentClose(code) { if (code === 45) { effects.consume(code); return commentEnd; } return comment(code); } /** * In comment, after `--`. * * ```markdown * > | a c * ^ * ``` * * @type {State} */ function commentEnd(code) { return code === 62 ? end(code) : code === 45 ? commentClose(code) : comment(code); } /** * After ` | a &<]]> b * ^^^^^^ * ``` * * @type {State} */ function cdataOpenInside(code) { const value = "CDATA["; if (code === value.charCodeAt(index++)) { effects.consume(code); return index === value.length ? cdata : cdataOpenInside; } return nok(code); } /** * In CDATA. * * ```markdown * > | a &<]]> b * ^^^ * ``` * * @type {State} */ function cdata(code) { if (code === null) { return nok(code); } if (code === 93) { effects.consume(code); return cdataClose; } if (markdownLineEnding(code)) { returnState = cdata; return lineEndingBefore(code); } effects.consume(code); return cdata; } /** * In CDATA, after `]`, at another `]`. * * ```markdown * > | a &<]]> b * ^ * ``` * * @type {State} */ function cdataClose(code) { if (code === 93) { effects.consume(code); return cdataEnd; } return cdata(code); } /** * In CDATA, after `]]`, at `>`. * * ```markdown * > | a &<]]> b * ^ * ``` * * @type {State} */ function cdataEnd(code) { if (code === 62) { return end(code); } if (code === 93) { effects.consume(code); return cdataEnd; } return cdata(code); } /** * In declaration. * * ```markdown * > | a c * ^ * ``` * * @type {State} */ function declaration(code) { if (code === null || code === 62) { return end(code); } if (markdownLineEnding(code)) { returnState = declaration; return lineEndingBefore(code); } effects.consume(code); return declaration; } /** * In instruction. * * ```markdown * > | a c * ^ * ``` * * @type {State} */ function instruction(code) { if (code === null) { return nok(code); } if (code === 63) { effects.consume(code); return instructionClose; } if (markdownLineEnding(code)) { returnState = instruction; return lineEndingBefore(code); } effects.consume(code); return instruction; } /** * In instruction, after `?`, at `>`. * * ```markdown * > | a c * ^ * ``` * * @type {State} */ function instructionClose(code) { return code === 62 ? end(code) : instruction(code); } /** * After ` | a c * ^ * ``` * * @type {State} */ function tagCloseStart(code) { // ASCII alphabetical. if (asciiAlpha(code)) { effects.consume(code); return tagClose; } return nok(code); } /** * After ` | a c * ^ * ``` * * @type {State} */ function tagClose(code) { // ASCII alphanumerical and `-`. if (code === 45 || asciiAlphanumeric(code)) { effects.consume(code); return tagClose; } return tagCloseBetween(code); } /** * In closing tag, after tag name. * * ```markdown * > | a c * ^ * ``` * * @type {State} */ function tagCloseBetween(code) { if (markdownLineEnding(code)) { returnState = tagCloseBetween; return lineEndingBefore(code); } if (markdownSpace(code)) { effects.consume(code); return tagCloseBetween; } return end(code); } /** * After ` | a c * ^ * ``` * * @type {State} */ function tagOpen(code) { // ASCII alphanumerical and `-`. if (code === 45 || asciiAlphanumeric(code)) { effects.consume(code); return tagOpen; } if (code === 47 || code === 62 || markdownLineEndingOrSpace(code)) { return tagOpenBetween(code); } return nok(code); } /** * In opening tag, after tag name. * * ```markdown * > | a c * ^ * ``` * * @type {State} */ function tagOpenBetween(code) { if (code === 47) { effects.consume(code); return end; } // ASCII alphabetical and `:` and `_`. if (code === 58 || code === 95 || asciiAlpha(code)) { effects.consume(code); return tagOpenAttributeName; } if (markdownLineEnding(code)) { returnState = tagOpenBetween; return lineEndingBefore(code); } if (markdownSpace(code)) { effects.consume(code); return tagOpenBetween; } return end(code); } /** * In attribute name. * * ```markdown * > | a d * ^ * ``` * * @type {State} */ function tagOpenAttributeName(code) { // ASCII alphabetical and `-`, `.`, `:`, and `_`. if (code === 45 || code === 46 || code === 58 || code === 95 || asciiAlphanumeric(code)) { effects.consume(code); return tagOpenAttributeName; } return tagOpenAttributeNameAfter(code); } /** * After attribute name, before initializer, the end of the tag, or * whitespace. * * ```markdown * > | a d * ^ * ``` * * @type {State} */ function tagOpenAttributeNameAfter(code) { if (code === 61) { effects.consume(code); return tagOpenAttributeValueBefore; } if (markdownLineEnding(code)) { returnState = tagOpenAttributeNameAfter; return lineEndingBefore(code); } if (markdownSpace(code)) { effects.consume(code); return tagOpenAttributeNameAfter; } return tagOpenBetween(code); } /** * Before unquoted, double quoted, or single quoted attribute value, allowing * whitespace. * * ```markdown * > | a e * ^ * ``` * * @type {State} */ function tagOpenAttributeValueBefore(code) { if (code === null || code === 60 || code === 61 || code === 62 || code === 96) { return nok(code); } if (code === 34 || code === 39) { effects.consume(code); marker = code; return tagOpenAttributeValueQuoted; } if (markdownLineEnding(code)) { returnState = tagOpenAttributeValueBefore; return lineEndingBefore(code); } if (markdownSpace(code)) { effects.consume(code); return tagOpenAttributeValueBefore; } effects.consume(code); return tagOpenAttributeValueUnquoted; } /** * In double or single quoted attribute value. * * ```markdown * > | a e * ^ * ``` * * @type {State} */ function tagOpenAttributeValueQuoted(code) { if (code === marker) { effects.consume(code); marker = undefined; return tagOpenAttributeValueQuotedAfter; } if (code === null) { return nok(code); } if (markdownLineEnding(code)) { returnState = tagOpenAttributeValueQuoted; return lineEndingBefore(code); } effects.consume(code); return tagOpenAttributeValueQuoted; } /** * In unquoted attribute value. * * ```markdown * > | a e * ^ * ``` * * @type {State} */ function tagOpenAttributeValueUnquoted(code) { if (code === null || code === 34 || code === 39 || code === 60 || code === 61 || code === 96) { return nok(code); } if (code === 47 || code === 62 || markdownLineEndingOrSpace(code)) { return tagOpenBetween(code); } effects.consume(code); return tagOpenAttributeValueUnquoted; } /** * After double or single quoted attribute value, before whitespace or the end * of the tag. * * ```markdown * > | a e * ^ * ``` * * @type {State} */ function tagOpenAttributeValueQuotedAfter(code) { if (code === 47 || code === 62 || markdownLineEndingOrSpace(code)) { return tagOpenBetween(code); } return nok(code); } /** * In certain circumstances of a tag where only an `>` is allowed. * * ```markdown * > | a e * ^ * ``` * * @type {State} */ function end(code) { if (code === 62) { effects.consume(code); effects.exit("htmlTextData"); effects.exit("htmlText"); return ok; } return nok(code); } /** * At eol. * * > 👉 **Note**: we can’t have blank lines in text, so no need to worry about * > empty tokens. * * ```markdown * > | a * ``` * * @type {State} */ function lineEndingBefore(code) { effects.exit("htmlTextData"); effects.enter("lineEnding"); effects.consume(code); effects.exit("lineEnding"); return lineEndingAfter; } /** * After eol, at optional whitespace. * * > 👉 **Note**: we can’t have blank lines in text, so no need to worry about * > empty tokens. * * ```markdown * | a * ^ * ``` * * @type {State} */ function lineEndingAfter(code) { // Always populated by defaults. return markdownSpace(code) ? factorySpace(effects, lineEndingAfterPrefix, "linePrefix", self.parser.constructs.disable.null.includes('codeIndented') ? undefined : 4)(code) : lineEndingAfterPrefix(code); } /** * After eol, after optional whitespace. * * > 👉 **Note**: we can’t have blank lines in text, so no need to worry about * > empty tokens. * * ```markdown * | a * ^ * ``` * * @type {State} */ function lineEndingAfterPrefix(code) { effects.enter("htmlTextData"); return returnState(code); } }