knowledgebase_law/node_modules/micromark-extension-gfm-table/lib/syntax.js

819 lines
20 KiB
JavaScript
Raw Normal View History

2025-04-11 23:47:09 +08:00
/**
* @import {Event, Extension, Point, Resolver, State, Token, TokenizeContext, Tokenizer} from 'micromark-util-types'
*/
/**
* @typedef {[number, number, number, number]} Range
* Cell info.
*
* @typedef {0 | 1 | 2 | 3} RowKind
* Where we are: `1` for head row, `2` for delimiter row, `3` for body row.
*/
import { factorySpace } from 'micromark-factory-space';
import { markdownLineEnding, markdownLineEndingOrSpace, markdownSpace } from 'micromark-util-character';
import { EditMap } from './edit-map.js';
import { gfmTableAlign } from './infer.js';
/**
* Create an HTML extension for `micromark` to support GitHub tables syntax.
*
* @returns {Extension}
* Extension for `micromark` that can be passed in `extensions` to enable GFM
* table syntax.
*/
export function gfmTable() {
return {
flow: {
null: {
name: 'table',
tokenize: tokenizeTable,
resolveAll: resolveTable
}
}
};
}
/**
* @this {TokenizeContext}
* @type {Tokenizer}
*/
function tokenizeTable(effects, ok, nok) {
const self = this;
let size = 0;
let sizeB = 0;
/** @type {boolean | undefined} */
let seen;
return start;
/**
* Start of a GFM table.
*
* If there is a valid table row or table head before, then we try to parse
* another row.
* Otherwise, we try to parse a head.
*
* ```markdown
* > | | a |
* ^
* | | - |
* > | | b |
* ^
* ```
* @type {State}
*/
function start(code) {
let index = self.events.length - 1;
while (index > -1) {
const type = self.events[index][1].type;
if (type === "lineEnding" ||
// Note: markdown-rs uses `whitespace` instead of `linePrefix`
type === "linePrefix") index--;else break;
}
const tail = index > -1 ? self.events[index][1].type : null;
const next = tail === 'tableHead' || tail === 'tableRow' ? bodyRowStart : headRowBefore;
// Dont allow lazy body rows.
if (next === bodyRowStart && self.parser.lazy[self.now().line]) {
return nok(code);
}
return next(code);
}
/**
* Before table head row.
*
* ```markdown
* > | | a |
* ^
* | | - |
* | | b |
* ```
*
* @type {State}
*/
function headRowBefore(code) {
effects.enter('tableHead');
effects.enter('tableRow');
return headRowStart(code);
}
/**
* Before table head row, after whitespace.
*
* ```markdown
* > | | a |
* ^
* | | - |
* | | b |
* ```
*
* @type {State}
*/
function headRowStart(code) {
if (code === 124) {
return headRowBreak(code);
}
// To do: micromark-js should let us parse our own whitespace in extensions,
// like `markdown-rs`:
//
// ```js
// // 4+ spaces.
// if (markdownSpace(code)) {
// return nok(code)
// }
// ```
seen = true;
// Count the first character, that isnt a pipe, double.
sizeB += 1;
return headRowBreak(code);
}
/**
* At break in table head row.
*
* ```markdown
* > | | a |
* ^
* ^
* ^
* | | - |
* | | b |
* ```
*
* @type {State}
*/
function headRowBreak(code) {
if (code === null) {
// Note: in `markdown-rs`, we need to reset, in `micromark-js` we dont.
return nok(code);
}
if (markdownLineEnding(code)) {
// If anything other than one pipe (ignoring whitespace) was used, its fine.
if (sizeB > 1) {
sizeB = 0;
// To do: check if this works.
// Feel free to interrupt:
self.interrupt = true;
effects.exit('tableRow');
effects.enter("lineEnding");
effects.consume(code);
effects.exit("lineEnding");
return headDelimiterStart;
}
// Note: in `markdown-rs`, we need to reset, in `micromark-js` we dont.
return nok(code);
}
if (markdownSpace(code)) {
// To do: check if this is fine.
// effects.attempt(State::Next(StateName::GfmTableHeadRowBreak), State::Nok)
// State::Retry(space_or_tab(tokenizer))
return factorySpace(effects, headRowBreak, "whitespace")(code);
}
sizeB += 1;
if (seen) {
seen = false;
// Header cell count.
size += 1;
}
if (code === 124) {
effects.enter('tableCellDivider');
effects.consume(code);
effects.exit('tableCellDivider');
// Whether a delimiter was seen.
seen = true;
return headRowBreak;
}
// Anything else is cell data.
effects.enter("data");
return headRowData(code);
}
/**
* In table head row data.
*
* ```markdown
* > | | a |
* ^
* | | - |
* | | b |
* ```
*
* @type {State}
*/
function headRowData(code) {
if (code === null || code === 124 || markdownLineEndingOrSpace(code)) {
effects.exit("data");
return headRowBreak(code);
}
effects.consume(code);
return code === 92 ? headRowEscape : headRowData;
}
/**
* In table head row escape.
*
* ```markdown
* > | | a\-b |
* ^
* | | ---- |
* | | c |
* ```
*
* @type {State}
*/
function headRowEscape(code) {
if (code === 92 || code === 124) {
effects.consume(code);
return headRowData;
}
return headRowData(code);
}
/**
* Before delimiter row.
*
* ```markdown
* | | a |
* > | | - |
* ^
* | | b |
* ```
*
* @type {State}
*/
function headDelimiterStart(code) {
// Reset `interrupt`.
self.interrupt = false;
// Note: in `markdown-rs`, we need to handle piercing here too.
if (self.parser.lazy[self.now().line]) {
return nok(code);
}
effects.enter('tableDelimiterRow');
// Track if weve seen a `:` or `|`.
seen = false;
if (markdownSpace(code)) {
return factorySpace(effects, headDelimiterBefore, "linePrefix", self.parser.constructs.disable.null.includes('codeIndented') ? undefined : 4)(code);
}
return headDelimiterBefore(code);
}
/**
* Before delimiter row, after optional whitespace.
*
* Reused when a `|` is found later, to parse another cell.
*
* ```markdown
* | | a |
* > | | - |
* ^
* | | b |
* ```
*
* @type {State}
*/
function headDelimiterBefore(code) {
if (code === 45 || code === 58) {
return headDelimiterValueBefore(code);
}
if (code === 124) {
seen = true;
// If we start with a pipe, we open a cell marker.
effects.enter('tableCellDivider');
effects.consume(code);
effects.exit('tableCellDivider');
return headDelimiterCellBefore;
}
// More whitespace / empty row not allowed at start.
return headDelimiterNok(code);
}
/**
* After `|`, before delimiter cell.
*
* ```markdown
* | | a |
* > | | - |
* ^
* ```
*
* @type {State}
*/
function headDelimiterCellBefore(code) {
if (markdownSpace(code)) {
return factorySpace(effects, headDelimiterValueBefore, "whitespace")(code);
}
return headDelimiterValueBefore(code);
}
/**
* Before delimiter cell value.
*
* ```markdown
* | | a |
* > | | - |
* ^
* ```
*
* @type {State}
*/
function headDelimiterValueBefore(code) {
// Align: left.
if (code === 58) {
sizeB += 1;
seen = true;
effects.enter('tableDelimiterMarker');
effects.consume(code);
effects.exit('tableDelimiterMarker');
return headDelimiterLeftAlignmentAfter;
}
// Align: none.
if (code === 45) {
sizeB += 1;
// To do: seems weird that this *isnt* left aligned, but that state is used?
return headDelimiterLeftAlignmentAfter(code);
}
if (code === null || markdownLineEnding(code)) {
return headDelimiterCellAfter(code);
}
return headDelimiterNok(code);
}
/**
* After delimiter cell left alignment marker.
*
* ```markdown
* | | a |
* > | | :- |
* ^
* ```
*
* @type {State}
*/
function headDelimiterLeftAlignmentAfter(code) {
if (code === 45) {
effects.enter('tableDelimiterFiller');
return headDelimiterFiller(code);
}
// Anything else is not ok after the left-align colon.
return headDelimiterNok(code);
}
/**
* In delimiter cell filler.
*
* ```markdown
* | | a |
* > | | - |
* ^
* ```
*
* @type {State}
*/
function headDelimiterFiller(code) {
if (code === 45) {
effects.consume(code);
return headDelimiterFiller;
}
// Align is `center` if it was `left`, `right` otherwise.
if (code === 58) {
seen = true;
effects.exit('tableDelimiterFiller');
effects.enter('tableDelimiterMarker');
effects.consume(code);
effects.exit('tableDelimiterMarker');
return headDelimiterRightAlignmentAfter;
}
effects.exit('tableDelimiterFiller');
return headDelimiterRightAlignmentAfter(code);
}
/**
* After delimiter cell right alignment marker.
*
* ```markdown
* | | a |
* > | | -: |
* ^
* ```
*
* @type {State}
*/
function headDelimiterRightAlignmentAfter(code) {
if (markdownSpace(code)) {
return factorySpace(effects, headDelimiterCellAfter, "whitespace")(code);
}
return headDelimiterCellAfter(code);
}
/**
* After delimiter cell.
*
* ```markdown
* | | a |
* > | | -: |
* ^
* ```
*
* @type {State}
*/
function headDelimiterCellAfter(code) {
if (code === 124) {
return headDelimiterBefore(code);
}
if (code === null || markdownLineEnding(code)) {
// Exit when:
// * there was no `:` or `|` at all (its a thematic break or setext
// underline instead)
// * the header cell count is not the delimiter cell count
if (!seen || size !== sizeB) {
return headDelimiterNok(code);
}
// Note: in markdown-rs`, a reset is needed here.
effects.exit('tableDelimiterRow');
effects.exit('tableHead');
// To do: in `markdown-rs`, resolvers need to be registered manually.
// effects.register_resolver(ResolveName::GfmTable)
return ok(code);
}
return headDelimiterNok(code);
}
/**
* In delimiter row, at a disallowed byte.
*
* ```markdown
* | | a |
* > | | x |
* ^
* ```
*
* @type {State}
*/
function headDelimiterNok(code) {
// Note: in `markdown-rs`, we need to reset, in `micromark-js` we dont.
return nok(code);
}
/**
* Before table body row.
*
* ```markdown
* | | a |
* | | - |
* > | | b |
* ^
* ```
*
* @type {State}
*/
function bodyRowStart(code) {
// Note: in `markdown-rs` we need to manually take care of a prefix,
// but in `micromark-js` that is done for us, so if were here, were
// never at whitespace.
effects.enter('tableRow');
return bodyRowBreak(code);
}
/**
* At break in table body row.
*
* ```markdown
* | | a |
* | | - |
* > | | b |
* ^
* ^
* ^
* ```
*
* @type {State}
*/
function bodyRowBreak(code) {
if (code === 124) {
effects.enter('tableCellDivider');
effects.consume(code);
effects.exit('tableCellDivider');
return bodyRowBreak;
}
if (code === null || markdownLineEnding(code)) {
effects.exit('tableRow');
return ok(code);
}
if (markdownSpace(code)) {
return factorySpace(effects, bodyRowBreak, "whitespace")(code);
}
// Anything else is cell content.
effects.enter("data");
return bodyRowData(code);
}
/**
* In table body row data.
*
* ```markdown
* | | a |
* | | - |
* > | | b |
* ^
* ```
*
* @type {State}
*/
function bodyRowData(code) {
if (code === null || code === 124 || markdownLineEndingOrSpace(code)) {
effects.exit("data");
return bodyRowBreak(code);
}
effects.consume(code);
return code === 92 ? bodyRowEscape : bodyRowData;
}
/**
* In table body row escape.
*
* ```markdown
* | | a |
* | | ---- |
* > | | b\-c |
* ^
* ```
*
* @type {State}
*/
function bodyRowEscape(code) {
if (code === 92 || code === 124) {
effects.consume(code);
return bodyRowData;
}
return bodyRowData(code);
}
}
/** @type {Resolver} */
function resolveTable(events, context) {
let index = -1;
let inFirstCellAwaitingPipe = true;
/** @type {RowKind} */
let rowKind = 0;
/** @type {Range} */
let lastCell = [0, 0, 0, 0];
/** @type {Range} */
let cell = [0, 0, 0, 0];
let afterHeadAwaitingFirstBodyRow = false;
let lastTableEnd = 0;
/** @type {Token | undefined} */
let currentTable;
/** @type {Token | undefined} */
let currentBody;
/** @type {Token | undefined} */
let currentCell;
const map = new EditMap();
while (++index < events.length) {
const event = events[index];
const token = event[1];
if (event[0] === 'enter') {
// Start of head.
if (token.type === 'tableHead') {
afterHeadAwaitingFirstBodyRow = false;
// Inject previous (body end and) table end.
if (lastTableEnd !== 0) {
flushTableEnd(map, context, lastTableEnd, currentTable, currentBody);
currentBody = undefined;
lastTableEnd = 0;
}
// Inject table start.
currentTable = {
type: 'table',
start: Object.assign({}, token.start),
// Note: correct end is set later.
end: Object.assign({}, token.end)
};
map.add(index, 0, [['enter', currentTable, context]]);
} else if (token.type === 'tableRow' || token.type === 'tableDelimiterRow') {
inFirstCellAwaitingPipe = true;
currentCell = undefined;
lastCell = [0, 0, 0, 0];
cell = [0, index + 1, 0, 0];
// Inject table body start.
if (afterHeadAwaitingFirstBodyRow) {
afterHeadAwaitingFirstBodyRow = false;
currentBody = {
type: 'tableBody',
start: Object.assign({}, token.start),
// Note: correct end is set later.
end: Object.assign({}, token.end)
};
map.add(index, 0, [['enter', currentBody, context]]);
}
rowKind = token.type === 'tableDelimiterRow' ? 2 : currentBody ? 3 : 1;
}
// Cell data.
else if (rowKind && (token.type === "data" || token.type === 'tableDelimiterMarker' || token.type === 'tableDelimiterFiller')) {
inFirstCellAwaitingPipe = false;
// First value in cell.
if (cell[2] === 0) {
if (lastCell[1] !== 0) {
cell[0] = cell[1];
currentCell = flushCell(map, context, lastCell, rowKind, undefined, currentCell);
lastCell = [0, 0, 0, 0];
}
cell[2] = index;
}
} else if (token.type === 'tableCellDivider') {
if (inFirstCellAwaitingPipe) {
inFirstCellAwaitingPipe = false;
} else {
if (lastCell[1] !== 0) {
cell[0] = cell[1];
currentCell = flushCell(map, context, lastCell, rowKind, undefined, currentCell);
}
lastCell = cell;
cell = [lastCell[1], index, 0, 0];
}
}
}
// Exit events.
else if (token.type === 'tableHead') {
afterHeadAwaitingFirstBodyRow = true;
lastTableEnd = index;
} else if (token.type === 'tableRow' || token.type === 'tableDelimiterRow') {
lastTableEnd = index;
if (lastCell[1] !== 0) {
cell[0] = cell[1];
currentCell = flushCell(map, context, lastCell, rowKind, index, currentCell);
} else if (cell[1] !== 0) {
currentCell = flushCell(map, context, cell, rowKind, index, currentCell);
}
rowKind = 0;
} else if (rowKind && (token.type === "data" || token.type === 'tableDelimiterMarker' || token.type === 'tableDelimiterFiller')) {
cell[3] = index;
}
}
if (lastTableEnd !== 0) {
flushTableEnd(map, context, lastTableEnd, currentTable, currentBody);
}
map.consume(context.events);
// To do: move this into `html`, when events are exposed there.
// Thats what `markdown-rs` does.
// That needs updates to `mdast-util-gfm-table`.
index = -1;
while (++index < context.events.length) {
const event = context.events[index];
if (event[0] === 'enter' && event[1].type === 'table') {
event[1]._align = gfmTableAlign(context.events, index);
}
}
return events;
}
/**
* Generate a cell.
*
* @param {EditMap} map
* @param {Readonly<TokenizeContext>} context
* @param {Readonly<Range>} range
* @param {RowKind} rowKind
* @param {number | undefined} rowEnd
* @param {Token | undefined} previousCell
* @returns {Token | undefined}
*/
// eslint-disable-next-line max-params
function flushCell(map, context, range, rowKind, rowEnd, previousCell) {
// `markdown-rs` uses:
// rowKind === 2 ? 'tableDelimiterCell' : 'tableCell'
const groupName = rowKind === 1 ? 'tableHeader' : rowKind === 2 ? 'tableDelimiter' : 'tableData';
// `markdown-rs` uses:
// rowKind === 2 ? 'tableDelimiterCellValue' : 'tableCellText'
const valueName = 'tableContent';
// Insert an exit for the previous cell, if there is one.
//
// ```markdown
// > | | aa | bb | cc |
// ^-- exit
// ^^^^-- this cell
// ```
if (range[0] !== 0) {
previousCell.end = Object.assign({}, getPoint(context.events, range[0]));
map.add(range[0], 0, [['exit', previousCell, context]]);
}
// Insert enter of this cell.
//
// ```markdown
// > | | aa | bb | cc |
// ^-- enter
// ^^^^-- this cell
// ```
const now = getPoint(context.events, range[1]);
previousCell = {
type: groupName,
start: Object.assign({}, now),
// Note: correct end is set later.
end: Object.assign({}, now)
};
map.add(range[1], 0, [['enter', previousCell, context]]);
// Insert text start at first data start and end at last data end, and
// remove events between.
//
// ```markdown
// > | | aa | bb | cc |
// ^-- enter
// ^-- exit
// ^^^^-- this cell
// ```
if (range[2] !== 0) {
const relatedStart = getPoint(context.events, range[2]);
const relatedEnd = getPoint(context.events, range[3]);
/** @type {Token} */
const valueToken = {
type: valueName,
start: Object.assign({}, relatedStart),
end: Object.assign({}, relatedEnd)
};
map.add(range[2], 0, [['enter', valueToken, context]]);
if (rowKind !== 2) {
// Fix positional info on remaining events
const start = context.events[range[2]];
const end = context.events[range[3]];
start[1].end = Object.assign({}, end[1].end);
start[1].type = "chunkText";
start[1].contentType = "text";
// Remove if needed.
if (range[3] > range[2] + 1) {
const a = range[2] + 1;
const b = range[3] - range[2] - 1;
map.add(a, b, []);
}
}
map.add(range[3] + 1, 0, [['exit', valueToken, context]]);
}
// Insert an exit for the last cell, if at the row end.
//
// ```markdown
// > | | aa | bb | cc |
// ^-- exit
// ^^^^^^-- this cell (the last one contains two “between” parts)
// ```
if (rowEnd !== undefined) {
previousCell.end = Object.assign({}, getPoint(context.events, rowEnd));
map.add(rowEnd, 0, [['exit', previousCell, context]]);
previousCell = undefined;
}
return previousCell;
}
/**
* Generate table end (and table body end).
*
* @param {Readonly<EditMap>} map
* @param {Readonly<TokenizeContext>} context
* @param {number} index
* @param {Token} table
* @param {Token | undefined} tableBody
*/
// eslint-disable-next-line max-params
function flushTableEnd(map, context, index, table, tableBody) {
/** @type {Array<Event>} */
const exits = [];
const related = getPoint(context.events, index);
if (tableBody) {
tableBody.end = Object.assign({}, related);
exits.push(['exit', tableBody, context]);
}
table.end = Object.assign({}, related);
exits.push(['exit', table, context]);
map.add(index + 1, 0, exits);
}
/**
* @param {Readonly<Array<Event>>} events
* @param {number} index
* @returns {Readonly<Point>}
*/
function getPoint(events, index) {
const event = events[index];
const side = event[0] === 'enter' ? 'start' : 'end';
return event[1][side];
}