mirror of
https://github.com/Funkoala14/knowledgebase_law.git
synced 2025-06-08 16:08:16 +08:00
228 lines
6.7 KiB
JavaScript
228 lines
6.7 KiB
JavaScript
/**
|
||
* @import {Chunk, Event, Token} from 'micromark-util-types'
|
||
*/
|
||
|
||
import { splice } from 'micromark-util-chunked';
|
||
import { SpliceBuffer } from './lib/splice-buffer.js';
|
||
|
||
// Hidden API exposed for testing.
|
||
export { SpliceBuffer } from './lib/splice-buffer.js';
|
||
|
||
/**
|
||
* Tokenize subcontent.
|
||
*
|
||
* @param {Array<Event>} eventsArray
|
||
* List of events.
|
||
* @returns {boolean}
|
||
* Whether subtokens were found.
|
||
*/
|
||
// eslint-disable-next-line complexity
|
||
export function subtokenize(eventsArray) {
|
||
/** @type {Record<string, number>} */
|
||
const jumps = {};
|
||
let index = -1;
|
||
/** @type {Event} */
|
||
let event;
|
||
/** @type {number | undefined} */
|
||
let lineIndex;
|
||
/** @type {number} */
|
||
let otherIndex;
|
||
/** @type {Event} */
|
||
let otherEvent;
|
||
/** @type {Array<Event>} */
|
||
let parameters;
|
||
/** @type {Array<Event>} */
|
||
let subevents;
|
||
/** @type {boolean | undefined} */
|
||
let more;
|
||
const events = new SpliceBuffer(eventsArray);
|
||
while (++index < events.length) {
|
||
while (index in jumps) {
|
||
index = jumps[index];
|
||
}
|
||
event = events.get(index);
|
||
|
||
// Add a hook for the GFM tasklist extension, which needs to know if text
|
||
// is in the first content of a list item.
|
||
if (index && event[1].type === "chunkFlow" && events.get(index - 1)[1].type === "listItemPrefix") {
|
||
subevents = event[1]._tokenizer.events;
|
||
otherIndex = 0;
|
||
if (otherIndex < subevents.length && subevents[otherIndex][1].type === "lineEndingBlank") {
|
||
otherIndex += 2;
|
||
}
|
||
if (otherIndex < subevents.length && subevents[otherIndex][1].type === "content") {
|
||
while (++otherIndex < subevents.length) {
|
||
if (subevents[otherIndex][1].type === "content") {
|
||
break;
|
||
}
|
||
if (subevents[otherIndex][1].type === "chunkText") {
|
||
subevents[otherIndex][1]._isInFirstContentOfListItem = true;
|
||
otherIndex++;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// Enter.
|
||
if (event[0] === 'enter') {
|
||
if (event[1].contentType) {
|
||
Object.assign(jumps, subcontent(events, index));
|
||
index = jumps[index];
|
||
more = true;
|
||
}
|
||
}
|
||
// Exit.
|
||
else if (event[1]._container) {
|
||
otherIndex = index;
|
||
lineIndex = undefined;
|
||
while (otherIndex--) {
|
||
otherEvent = events.get(otherIndex);
|
||
if (otherEvent[1].type === "lineEnding" || otherEvent[1].type === "lineEndingBlank") {
|
||
if (otherEvent[0] === 'enter') {
|
||
if (lineIndex) {
|
||
events.get(lineIndex)[1].type = "lineEndingBlank";
|
||
}
|
||
otherEvent[1].type = "lineEnding";
|
||
lineIndex = otherIndex;
|
||
}
|
||
} else if (otherEvent[1].type === "linePrefix" || otherEvent[1].type === "listItemIndent") {
|
||
// Move past.
|
||
} else {
|
||
break;
|
||
}
|
||
}
|
||
if (lineIndex) {
|
||
// Fix position.
|
||
event[1].end = {
|
||
...events.get(lineIndex)[1].start
|
||
};
|
||
|
||
// Switch container exit w/ line endings.
|
||
parameters = events.slice(lineIndex, index);
|
||
parameters.unshift(event);
|
||
events.splice(lineIndex, index - lineIndex + 1, parameters);
|
||
}
|
||
}
|
||
}
|
||
|
||
// The changes to the `events` buffer must be copied back into the eventsArray
|
||
splice(eventsArray, 0, Number.POSITIVE_INFINITY, events.slice(0));
|
||
return !more;
|
||
}
|
||
|
||
/**
|
||
* Tokenize embedded tokens.
|
||
*
|
||
* @param {SpliceBuffer<Event>} events
|
||
* Events.
|
||
* @param {number} eventIndex
|
||
* Index.
|
||
* @returns {Record<string, number>}
|
||
* Gaps.
|
||
*/
|
||
function subcontent(events, eventIndex) {
|
||
const token = events.get(eventIndex)[1];
|
||
const context = events.get(eventIndex)[2];
|
||
let startPosition = eventIndex - 1;
|
||
/** @type {Array<number>} */
|
||
const startPositions = [];
|
||
let tokenizer = token._tokenizer;
|
||
if (!tokenizer) {
|
||
tokenizer = context.parser[token.contentType](token.start);
|
||
if (token._contentTypeTextTrailing) {
|
||
tokenizer._contentTypeTextTrailing = true;
|
||
}
|
||
}
|
||
const childEvents = tokenizer.events;
|
||
/** @type {Array<[number, number]>} */
|
||
const jumps = [];
|
||
/** @type {Record<string, number>} */
|
||
const gaps = {};
|
||
/** @type {Array<Chunk>} */
|
||
let stream;
|
||
/** @type {Token | undefined} */
|
||
let previous;
|
||
let index = -1;
|
||
/** @type {Token | undefined} */
|
||
let current = token;
|
||
let adjust = 0;
|
||
let start = 0;
|
||
const breaks = [start];
|
||
|
||
// Loop forward through the linked tokens to pass them in order to the
|
||
// subtokenizer.
|
||
while (current) {
|
||
// Find the position of the event for this token.
|
||
while (events.get(++startPosition)[1] !== current) {
|
||
// Empty.
|
||
}
|
||
startPositions.push(startPosition);
|
||
if (!current._tokenizer) {
|
||
stream = context.sliceStream(current);
|
||
if (!current.next) {
|
||
stream.push(null);
|
||
}
|
||
if (previous) {
|
||
tokenizer.defineSkip(current.start);
|
||
}
|
||
if (current._isInFirstContentOfListItem) {
|
||
tokenizer._gfmTasklistFirstContentOfListItem = true;
|
||
}
|
||
tokenizer.write(stream);
|
||
if (current._isInFirstContentOfListItem) {
|
||
tokenizer._gfmTasklistFirstContentOfListItem = undefined;
|
||
}
|
||
}
|
||
|
||
// Unravel the next token.
|
||
previous = current;
|
||
current = current.next;
|
||
}
|
||
|
||
// Now, loop back through all events (and linked tokens), to figure out which
|
||
// parts belong where.
|
||
current = token;
|
||
while (++index < childEvents.length) {
|
||
if (
|
||
// Find a void token that includes a break.
|
||
childEvents[index][0] === 'exit' && childEvents[index - 1][0] === 'enter' && childEvents[index][1].type === childEvents[index - 1][1].type && childEvents[index][1].start.line !== childEvents[index][1].end.line) {
|
||
start = index + 1;
|
||
breaks.push(start);
|
||
// Help GC.
|
||
current._tokenizer = undefined;
|
||
current.previous = undefined;
|
||
current = current.next;
|
||
}
|
||
}
|
||
|
||
// Help GC.
|
||
tokenizer.events = [];
|
||
|
||
// If there’s one more token (which is the cases for lines that end in an
|
||
// EOF), that’s perfect: the last point we found starts it.
|
||
// If there isn’t then make sure any remaining content is added to it.
|
||
if (current) {
|
||
// Help GC.
|
||
current._tokenizer = undefined;
|
||
current.previous = undefined;
|
||
} else {
|
||
breaks.pop();
|
||
}
|
||
|
||
// Now splice the events from the subtokenizer into the current events,
|
||
// moving back to front so that splice indices aren’t affected.
|
||
index = breaks.length;
|
||
while (index--) {
|
||
const slice = childEvents.slice(breaks[index], breaks[index + 1]);
|
||
const start = startPositions.pop();
|
||
jumps.push([start, start + slice.length - 1]);
|
||
events.splice(start, 2, slice);
|
||
}
|
||
jumps.reverse();
|
||
index = -1;
|
||
while (++index < jumps.length) {
|
||
gaps[adjust + jumps[index][0]] = adjust + jumps[index][1];
|
||
adjust += jumps[index][1] - jumps[index][0] - 1;
|
||
}
|
||
return gaps;
|
||
} |