knowledgebase_law/node_modules/micromark-core-commonmark/lib/character-reference.js

149 lines
3.2 KiB
JavaScript
Raw Normal View History

2025-04-11 23:47:09 +08:00
/**
* @import {
* Code,
* Construct,
* State,
* TokenizeContext,
* Tokenizer
* } from 'micromark-util-types'
*/
import { decodeNamedCharacterReference } from 'decode-named-character-reference';
import { asciiAlphanumeric, asciiDigit, asciiHexDigit } from 'micromark-util-character';
/** @type {Construct} */
export const characterReference = {
name: 'characterReference',
tokenize: tokenizeCharacterReference
};
/**
* @this {TokenizeContext}
* Context.
* @type {Tokenizer}
*/
function tokenizeCharacterReference(effects, ok, nok) {
const self = this;
let size = 0;
/** @type {number} */
let max;
/** @type {(code: Code) => boolean} */
let test;
return start;
/**
* Start of character reference.
*
* ```markdown
* > | a&b
* ^
* > | a{b
* ^
* > | a	b
* ^
* ```
*
* @type {State}
*/
function start(code) {
effects.enter("characterReference");
effects.enter("characterReferenceMarker");
effects.consume(code);
effects.exit("characterReferenceMarker");
return open;
}
/**
* After `&`, at `#` for numeric references or alphanumeric for named
* references.
*
* ```markdown
* > | a&b
* ^
* > | a{b
* ^
* > | a	b
* ^
* ```
*
* @type {State}
*/
function open(code) {
if (code === 35) {
effects.enter("characterReferenceMarkerNumeric");
effects.consume(code);
effects.exit("characterReferenceMarkerNumeric");
return numeric;
}
effects.enter("characterReferenceValue");
max = 31;
test = asciiAlphanumeric;
return value(code);
}
/**
* After `#`, at `x` for hexadecimals or digit for decimals.
*
* ```markdown
* > | a{b
* ^
* > | a	b
* ^
* ```
*
* @type {State}
*/
function numeric(code) {
if (code === 88 || code === 120) {
effects.enter("characterReferenceMarkerHexadecimal");
effects.consume(code);
effects.exit("characterReferenceMarkerHexadecimal");
effects.enter("characterReferenceValue");
max = 6;
test = asciiHexDigit;
return value;
}
effects.enter("characterReferenceValue");
max = 7;
test = asciiDigit;
return value(code);
}
/**
* After markers (`&#x`, `&#`, or `&`), in value, before `;`.
*
* The character reference kind defines what and how many characters are
* allowed.
*
* ```markdown
* > | a&b
* ^^^
* > | a{b
* ^^^
* > | a	b
* ^
* ```
*
* @type {State}
*/
function value(code) {
if (code === 59 && size) {
const token = effects.exit("characterReferenceValue");
if (test === asciiAlphanumeric && !decodeNamedCharacterReference(self.sliceSerialize(token))) {
return nok(code);
}
// To do: `markdown-rs` uses a different name:
// `CharacterReferenceMarkerSemi`.
effects.enter("characterReferenceMarker");
effects.consume(code);
effects.exit("characterReferenceMarker");
effects.exit("characterReference");
return ok;
}
if (test(code) && size++ < max) {
effects.consume(code);
return value;
}
return nok(code);
}
}