mirror of
https://github.com/Funkoala14/knowledgebase_law.git
synced 2025-06-09 04:38:15 +08:00
149 lines
3.2 KiB
JavaScript
149 lines
3.2 KiB
JavaScript
|
/**
|
||
|
* @import {
|
||
|
* Code,
|
||
|
* Construct,
|
||
|
* State,
|
||
|
* TokenizeContext,
|
||
|
* Tokenizer
|
||
|
* } from 'micromark-util-types'
|
||
|
*/
|
||
|
|
||
|
import { decodeNamedCharacterReference } from 'decode-named-character-reference';
|
||
|
import { asciiAlphanumeric, asciiDigit, asciiHexDigit } from 'micromark-util-character';
|
||
|
/** @type {Construct} */
|
||
|
export const characterReference = {
|
||
|
name: 'characterReference',
|
||
|
tokenize: tokenizeCharacterReference
|
||
|
};
|
||
|
|
||
|
/**
|
||
|
* @this {TokenizeContext}
|
||
|
* Context.
|
||
|
* @type {Tokenizer}
|
||
|
*/
|
||
|
function tokenizeCharacterReference(effects, ok, nok) {
|
||
|
const self = this;
|
||
|
let size = 0;
|
||
|
/** @type {number} */
|
||
|
let max;
|
||
|
/** @type {(code: Code) => boolean} */
|
||
|
let test;
|
||
|
return start;
|
||
|
|
||
|
/**
|
||
|
* Start of character reference.
|
||
|
*
|
||
|
* ```markdown
|
||
|
* > | a&b
|
||
|
* ^
|
||
|
* > | a{b
|
||
|
* ^
|
||
|
* > | a	b
|
||
|
* ^
|
||
|
* ```
|
||
|
*
|
||
|
* @type {State}
|
||
|
*/
|
||
|
function start(code) {
|
||
|
effects.enter("characterReference");
|
||
|
effects.enter("characterReferenceMarker");
|
||
|
effects.consume(code);
|
||
|
effects.exit("characterReferenceMarker");
|
||
|
return open;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* After `&`, at `#` for numeric references or alphanumeric for named
|
||
|
* references.
|
||
|
*
|
||
|
* ```markdown
|
||
|
* > | a&b
|
||
|
* ^
|
||
|
* > | a{b
|
||
|
* ^
|
||
|
* > | a	b
|
||
|
* ^
|
||
|
* ```
|
||
|
*
|
||
|
* @type {State}
|
||
|
*/
|
||
|
function open(code) {
|
||
|
if (code === 35) {
|
||
|
effects.enter("characterReferenceMarkerNumeric");
|
||
|
effects.consume(code);
|
||
|
effects.exit("characterReferenceMarkerNumeric");
|
||
|
return numeric;
|
||
|
}
|
||
|
effects.enter("characterReferenceValue");
|
||
|
max = 31;
|
||
|
test = asciiAlphanumeric;
|
||
|
return value(code);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* After `#`, at `x` for hexadecimals or digit for decimals.
|
||
|
*
|
||
|
* ```markdown
|
||
|
* > | a{b
|
||
|
* ^
|
||
|
* > | a	b
|
||
|
* ^
|
||
|
* ```
|
||
|
*
|
||
|
* @type {State}
|
||
|
*/
|
||
|
function numeric(code) {
|
||
|
if (code === 88 || code === 120) {
|
||
|
effects.enter("characterReferenceMarkerHexadecimal");
|
||
|
effects.consume(code);
|
||
|
effects.exit("characterReferenceMarkerHexadecimal");
|
||
|
effects.enter("characterReferenceValue");
|
||
|
max = 6;
|
||
|
test = asciiHexDigit;
|
||
|
return value;
|
||
|
}
|
||
|
effects.enter("characterReferenceValue");
|
||
|
max = 7;
|
||
|
test = asciiDigit;
|
||
|
return value(code);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* After markers (`&#x`, `&#`, or `&`), in value, before `;`.
|
||
|
*
|
||
|
* The character reference kind defines what and how many characters are
|
||
|
* allowed.
|
||
|
*
|
||
|
* ```markdown
|
||
|
* > | a&b
|
||
|
* ^^^
|
||
|
* > | a{b
|
||
|
* ^^^
|
||
|
* > | a	b
|
||
|
* ^
|
||
|
* ```
|
||
|
*
|
||
|
* @type {State}
|
||
|
*/
|
||
|
function value(code) {
|
||
|
if (code === 59 && size) {
|
||
|
const token = effects.exit("characterReferenceValue");
|
||
|
if (test === asciiAlphanumeric && !decodeNamedCharacterReference(self.sliceSerialize(token))) {
|
||
|
return nok(code);
|
||
|
}
|
||
|
|
||
|
// To do: `markdown-rs` uses a different name:
|
||
|
// `CharacterReferenceMarkerSemi`.
|
||
|
effects.enter("characterReferenceMarker");
|
||
|
effects.consume(code);
|
||
|
effects.exit("characterReferenceMarker");
|
||
|
effects.exit("characterReference");
|
||
|
return ok;
|
||
|
}
|
||
|
if (test(code) && size++ < max) {
|
||
|
effects.consume(code);
|
||
|
return value;
|
||
|
}
|
||
|
return nok(code);
|
||
|
}
|
||
|
}
|