mirror of
https://github.com/Funkoala14/knowledgebase_law.git
synced 2025-06-09 16:28:15 +08:00
212 lines
5.4 KiB
JavaScript
212 lines
5.4 KiB
JavaScript
|
/**
|
|||
|
* @import {
|
|||
|
* Code,
|
|||
|
* InitialConstruct,
|
|||
|
* Initializer,
|
|||
|
* Resolver,
|
|||
|
* State,
|
|||
|
* TokenizeContext
|
|||
|
* } from 'micromark-util-types'
|
|||
|
*/
|
|||
|
|
|||
|
export const resolver = {
|
|||
|
resolveAll: createResolver()
|
|||
|
};
|
|||
|
export const string = initializeFactory('string');
|
|||
|
export const text = initializeFactory('text');
|
|||
|
|
|||
|
/**
|
|||
|
* @param {'string' | 'text'} field
|
|||
|
* Field.
|
|||
|
* @returns {InitialConstruct}
|
|||
|
* Construct.
|
|||
|
*/
|
|||
|
function initializeFactory(field) {
|
|||
|
return {
|
|||
|
resolveAll: createResolver(field === 'text' ? resolveAllLineSuffixes : undefined),
|
|||
|
tokenize: initializeText
|
|||
|
};
|
|||
|
|
|||
|
/**
|
|||
|
* @this {TokenizeContext}
|
|||
|
* Context.
|
|||
|
* @type {Initializer}
|
|||
|
*/
|
|||
|
function initializeText(effects) {
|
|||
|
const self = this;
|
|||
|
const constructs = this.parser.constructs[field];
|
|||
|
const text = effects.attempt(constructs, start, notText);
|
|||
|
return start;
|
|||
|
|
|||
|
/** @type {State} */
|
|||
|
function start(code) {
|
|||
|
return atBreak(code) ? text(code) : notText(code);
|
|||
|
}
|
|||
|
|
|||
|
/** @type {State} */
|
|||
|
function notText(code) {
|
|||
|
if (code === null) {
|
|||
|
effects.consume(code);
|
|||
|
return;
|
|||
|
}
|
|||
|
effects.enter("data");
|
|||
|
effects.consume(code);
|
|||
|
return data;
|
|||
|
}
|
|||
|
|
|||
|
/** @type {State} */
|
|||
|
function data(code) {
|
|||
|
if (atBreak(code)) {
|
|||
|
effects.exit("data");
|
|||
|
return text(code);
|
|||
|
}
|
|||
|
|
|||
|
// Data.
|
|||
|
effects.consume(code);
|
|||
|
return data;
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* @param {Code} code
|
|||
|
* Code.
|
|||
|
* @returns {boolean}
|
|||
|
* Whether the code is a break.
|
|||
|
*/
|
|||
|
function atBreak(code) {
|
|||
|
if (code === null) {
|
|||
|
return true;
|
|||
|
}
|
|||
|
const list = constructs[code];
|
|||
|
let index = -1;
|
|||
|
if (list) {
|
|||
|
// Always populated by defaults.
|
|||
|
|
|||
|
while (++index < list.length) {
|
|||
|
const item = list[index];
|
|||
|
if (!item.previous || item.previous.call(self, self.previous)) {
|
|||
|
return true;
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
return false;
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* @param {Resolver | undefined} [extraResolver]
|
|||
|
* Resolver.
|
|||
|
* @returns {Resolver}
|
|||
|
* Resolver.
|
|||
|
*/
|
|||
|
function createResolver(extraResolver) {
|
|||
|
return resolveAllText;
|
|||
|
|
|||
|
/** @type {Resolver} */
|
|||
|
function resolveAllText(events, context) {
|
|||
|
let index = -1;
|
|||
|
/** @type {number | undefined} */
|
|||
|
let enter;
|
|||
|
|
|||
|
// A rather boring computation (to merge adjacent `data` events) which
|
|||
|
// improves mm performance by 29%.
|
|||
|
while (++index <= events.length) {
|
|||
|
if (enter === undefined) {
|
|||
|
if (events[index] && events[index][1].type === "data") {
|
|||
|
enter = index;
|
|||
|
index++;
|
|||
|
}
|
|||
|
} else if (!events[index] || events[index][1].type !== "data") {
|
|||
|
// Don’t do anything if there is one data token.
|
|||
|
if (index !== enter + 2) {
|
|||
|
events[enter][1].end = events[index - 1][1].end;
|
|||
|
events.splice(enter + 2, index - enter - 2);
|
|||
|
index = enter + 2;
|
|||
|
}
|
|||
|
enter = undefined;
|
|||
|
}
|
|||
|
}
|
|||
|
return extraResolver ? extraResolver(events, context) : events;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* A rather ugly set of instructions which again looks at chunks in the input
|
|||
|
* stream.
|
|||
|
* The reason to do this here is that it is *much* faster to parse in reverse.
|
|||
|
* And that we can’t hook into `null` to split the line suffix before an EOF.
|
|||
|
* To do: figure out if we can make this into a clean utility, or even in core.
|
|||
|
* As it will be useful for GFMs literal autolink extension (and maybe even
|
|||
|
* tables?)
|
|||
|
*
|
|||
|
* @type {Resolver}
|
|||
|
*/
|
|||
|
function resolveAllLineSuffixes(events, context) {
|
|||
|
let eventIndex = 0; // Skip first.
|
|||
|
|
|||
|
while (++eventIndex <= events.length) {
|
|||
|
if ((eventIndex === events.length || events[eventIndex][1].type === "lineEnding") && events[eventIndex - 1][1].type === "data") {
|
|||
|
const data = events[eventIndex - 1][1];
|
|||
|
const chunks = context.sliceStream(data);
|
|||
|
let index = chunks.length;
|
|||
|
let bufferIndex = -1;
|
|||
|
let size = 0;
|
|||
|
/** @type {boolean | undefined} */
|
|||
|
let tabs;
|
|||
|
while (index--) {
|
|||
|
const chunk = chunks[index];
|
|||
|
if (typeof chunk === 'string') {
|
|||
|
bufferIndex = chunk.length;
|
|||
|
while (chunk.charCodeAt(bufferIndex - 1) === 32) {
|
|||
|
size++;
|
|||
|
bufferIndex--;
|
|||
|
}
|
|||
|
if (bufferIndex) break;
|
|||
|
bufferIndex = -1;
|
|||
|
}
|
|||
|
// Number
|
|||
|
else if (chunk === -2) {
|
|||
|
tabs = true;
|
|||
|
size++;
|
|||
|
} else if (chunk === -1) {
|
|||
|
// Empty
|
|||
|
} else {
|
|||
|
// Replacement character, exit.
|
|||
|
index++;
|
|||
|
break;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
// Allow final trailing whitespace.
|
|||
|
if (context._contentTypeTextTrailing && eventIndex === events.length) {
|
|||
|
size = 0;
|
|||
|
}
|
|||
|
if (size) {
|
|||
|
const token = {
|
|||
|
type: eventIndex === events.length || tabs || size < 2 ? "lineSuffix" : "hardBreakTrailing",
|
|||
|
start: {
|
|||
|
_bufferIndex: index ? bufferIndex : data.start._bufferIndex + bufferIndex,
|
|||
|
_index: data.start._index + index,
|
|||
|
line: data.end.line,
|
|||
|
column: data.end.column - size,
|
|||
|
offset: data.end.offset - size
|
|||
|
},
|
|||
|
end: {
|
|||
|
...data.end
|
|||
|
}
|
|||
|
};
|
|||
|
data.end = {
|
|||
|
...token.start
|
|||
|
};
|
|||
|
if (data.start.offset === data.end.offset) {
|
|||
|
Object.assign(data, token);
|
|||
|
} else {
|
|||
|
events.splice(eventIndex, 0, ['enter', token, context], ['exit', token, context]);
|
|||
|
eventIndex += 2;
|
|||
|
}
|
|||
|
}
|
|||
|
eventIndex++;
|
|||
|
}
|
|||
|
}
|
|||
|
return events;
|
|||
|
}
|