/** * @import { * Code, * Construct, * State, * TokenizeContext, * Tokenizer * } from 'micromark-util-types' */ import { decodeNamedCharacterReference } from 'decode-named-character-reference'; import { asciiAlphanumeric, asciiDigit, asciiHexDigit } from 'micromark-util-character'; /** @type {Construct} */ export const characterReference = { name: 'characterReference', tokenize: tokenizeCharacterReference }; /** * @this {TokenizeContext} * Context. * @type {Tokenizer} */ function tokenizeCharacterReference(effects, ok, nok) { const self = this; let size = 0; /** @type {number} */ let max; /** @type {(code: Code) => boolean} */ let test; return start; /** * Start of character reference. * * ```markdown * > | a&b * ^ * > | a{b * ^ * > | a b * ^ * ``` * * @type {State} */ function start(code) { effects.enter("characterReference"); effects.enter("characterReferenceMarker"); effects.consume(code); effects.exit("characterReferenceMarker"); return open; } /** * After `&`, at `#` for numeric references or alphanumeric for named * references. * * ```markdown * > | a&b * ^ * > | a{b * ^ * > | a b * ^ * ``` * * @type {State} */ function open(code) { if (code === 35) { effects.enter("characterReferenceMarkerNumeric"); effects.consume(code); effects.exit("characterReferenceMarkerNumeric"); return numeric; } effects.enter("characterReferenceValue"); max = 31; test = asciiAlphanumeric; return value(code); } /** * After `#`, at `x` for hexadecimals or digit for decimals. * * ```markdown * > | a{b * ^ * > | a b * ^ * ``` * * @type {State} */ function numeric(code) { if (code === 88 || code === 120) { effects.enter("characterReferenceMarkerHexadecimal"); effects.consume(code); effects.exit("characterReferenceMarkerHexadecimal"); effects.enter("characterReferenceValue"); max = 6; test = asciiHexDigit; return value; } effects.enter("characterReferenceValue"); max = 7; test = asciiDigit; return value(code); } /** * After markers (`&#x`, `&#`, or `&`), in value, before `;`. * * The character reference kind defines what and how many characters are * allowed. * * ```markdown * > | a&b * ^^^ * > | a{b * ^^^ * > | a b * ^ * ``` * * @type {State} */ function value(code) { if (code === 59 && size) { const token = effects.exit("characterReferenceValue"); if (test === asciiAlphanumeric && !decodeNamedCharacterReference(self.sliceSerialize(token))) { return nok(code); } // To do: `markdown-rs` uses a different name: // `CharacterReferenceMarkerSemi`. effects.enter("characterReferenceMarker"); effects.consume(code); effects.exit("characterReferenceMarker"); effects.exit("characterReference"); return ok; } if (test(code) && size++ < max) { effects.consume(code); return value; } return nok(code); } }