/** * @import { * Break, * Blockquote, * Code, * Definition, * Emphasis, * Heading, * Html, * Image, * InlineCode, * Link, * ListItem, * List, * Nodes, * Paragraph, * PhrasingContent, * ReferenceType, * Root, * Strong, * Text, * ThematicBreak * } from 'mdast' * @import { * Encoding, * Event, * Token, * Value * } from 'micromark-util-types' * @import {Point} from 'unist' * @import { * CompileContext, * CompileData, * Config, * Extension, * Handle, * OnEnterError, * Options * } from './types.js' */ import { toString } from 'mdast-util-to-string'; import { parse, postprocess, preprocess } from 'micromark'; import { decodeNumericCharacterReference } from 'micromark-util-decode-numeric-character-reference'; import { decodeString } from 'micromark-util-decode-string'; import { normalizeIdentifier } from 'micromark-util-normalize-identifier'; import { decodeNamedCharacterReference } from 'decode-named-character-reference'; import { stringifyPosition } from 'unist-util-stringify-position'; const own = {}.hasOwnProperty; /** * Turn markdown into a syntax tree. * * @overload * @param {Value} value * @param {Encoding | null | undefined} [encoding] * @param {Options | null | undefined} [options] * @returns {Root} * * @overload * @param {Value} value * @param {Options | null | undefined} [options] * @returns {Root} * * @param {Value} value * Markdown to parse. * @param {Encoding | Options | null | undefined} [encoding] * Character encoding for when `value` is `Buffer`. * @param {Options | null | undefined} [options] * Configuration. * @returns {Root} * mdast tree. */ export function fromMarkdown(value, encoding, options) { if (typeof encoding !== 'string') { options = encoding; encoding = undefined; } return compiler(options)(postprocess(parse(options).document().write(preprocess()(value, encoding, true)))); } /** * Note this compiler only understand complete buffering, not streaming. * * @param {Options | null | undefined} [options] */ function compiler(options) { /** @type {Config} */ const config = { transforms: [], canContainEols: ['emphasis', 'fragment', 'heading', 'paragraph', 'strong'], enter: { autolink: opener(link), autolinkProtocol: onenterdata, autolinkEmail: onenterdata, atxHeading: opener(heading), blockQuote: opener(blockQuote), characterEscape: onenterdata, characterReference: onenterdata, codeFenced: opener(codeFlow), codeFencedFenceInfo: buffer, codeFencedFenceMeta: buffer, codeIndented: opener(codeFlow, buffer), codeText: opener(codeText, buffer), codeTextData: onenterdata, data: onenterdata, codeFlowValue: onenterdata, definition: opener(definition), definitionDestinationString: buffer, definitionLabelString: buffer, definitionTitleString: buffer, emphasis: opener(emphasis), hardBreakEscape: opener(hardBreak), hardBreakTrailing: opener(hardBreak), htmlFlow: opener(html, buffer), htmlFlowData: onenterdata, htmlText: opener(html, buffer), htmlTextData: onenterdata, image: opener(image), label: buffer, link: opener(link), listItem: opener(listItem), listItemValue: onenterlistitemvalue, listOrdered: opener(list, onenterlistordered), listUnordered: opener(list), paragraph: opener(paragraph), reference: onenterreference, referenceString: buffer, resourceDestinationString: buffer, resourceTitleString: buffer, setextHeading: opener(heading), strong: opener(strong), thematicBreak: opener(thematicBreak) }, exit: { atxHeading: closer(), atxHeadingSequence: onexitatxheadingsequence, autolink: closer(), autolinkEmail: onexitautolinkemail, autolinkProtocol: onexitautolinkprotocol, blockQuote: closer(), characterEscapeValue: onexitdata, characterReferenceMarkerHexadecimal: onexitcharacterreferencemarker, characterReferenceMarkerNumeric: onexitcharacterreferencemarker, characterReferenceValue: onexitcharacterreferencevalue, characterReference: onexitcharacterreference, codeFenced: closer(onexitcodefenced), codeFencedFence: onexitcodefencedfence, codeFencedFenceInfo: onexitcodefencedfenceinfo, codeFencedFenceMeta: onexitcodefencedfencemeta, codeFlowValue: onexitdata, codeIndented: closer(onexitcodeindented), codeText: closer(onexitcodetext), codeTextData: onexitdata, data: onexitdata, definition: closer(), definitionDestinationString: onexitdefinitiondestinationstring, definitionLabelString: onexitdefinitionlabelstring, definitionTitleString: onexitdefinitiontitlestring, emphasis: closer(), hardBreakEscape: closer(onexithardbreak), hardBreakTrailing: closer(onexithardbreak), htmlFlow: closer(onexithtmlflow), htmlFlowData: onexitdata, htmlText: closer(onexithtmltext), htmlTextData: onexitdata, image: closer(onexitimage), label: onexitlabel, labelText: onexitlabeltext, lineEnding: onexitlineending, link: closer(onexitlink), listItem: closer(), listOrdered: closer(), listUnordered: closer(), paragraph: closer(), referenceString: onexitreferencestring, resourceDestinationString: onexitresourcedestinationstring, resourceTitleString: onexitresourcetitlestring, resource: onexitresource, setextHeading: closer(onexitsetextheading), setextHeadingLineSequence: onexitsetextheadinglinesequence, setextHeadingText: onexitsetextheadingtext, strong: closer(), thematicBreak: closer() } }; configure(config, (options || {}).mdastExtensions || []); /** @type {CompileData} */ const data = {}; return compile; /** * Turn micromark events into an mdast tree. * * @param {Array} events * Events. * @returns {Root} * mdast tree. */ function compile(events) { /** @type {Root} */ let tree = { type: 'root', children: [] }; /** @type {Omit} */ const context = { stack: [tree], tokenStack: [], config, enter, exit, buffer, resume, data }; /** @type {Array} */ const listStack = []; let index = -1; while (++index < events.length) { // We preprocess lists to add `listItem` tokens, and to infer whether // items the list itself are spread out. if (events[index][1].type === "listOrdered" || events[index][1].type === "listUnordered") { if (events[index][0] === 'enter') { listStack.push(index); } else { const tail = listStack.pop(); index = prepareList(events, tail, index); } } } index = -1; while (++index < events.length) { const handler = config[events[index][0]]; if (own.call(handler, events[index][1].type)) { handler[events[index][1].type].call(Object.assign({ sliceSerialize: events[index][2].sliceSerialize }, context), events[index][1]); } } // Handle tokens still being open. if (context.tokenStack.length > 0) { const tail = context.tokenStack[context.tokenStack.length - 1]; const handler = tail[1] || defaultOnError; handler.call(context, undefined, tail[0]); } // Figure out `root` position. tree.position = { start: point(events.length > 0 ? events[0][1].start : { line: 1, column: 1, offset: 0 }), end: point(events.length > 0 ? events[events.length - 2][1].end : { line: 1, column: 1, offset: 0 }) }; // Call transforms. index = -1; while (++index < config.transforms.length) { tree = config.transforms[index](tree) || tree; } return tree; } /** * @param {Array} events * @param {number} start * @param {number} length * @returns {number} */ function prepareList(events, start, length) { let index = start - 1; let containerBalance = -1; let listSpread = false; /** @type {Token | undefined} */ let listItem; /** @type {number | undefined} */ let lineIndex; /** @type {number | undefined} */ let firstBlankLineIndex; /** @type {boolean | undefined} */ let atMarker; while (++index <= length) { const event = events[index]; switch (event[1].type) { case "listUnordered": case "listOrdered": case "blockQuote": { if (event[0] === 'enter') { containerBalance++; } else { containerBalance--; } atMarker = undefined; break; } case "lineEndingBlank": { if (event[0] === 'enter') { if (listItem && !atMarker && !containerBalance && !firstBlankLineIndex) { firstBlankLineIndex = index; } atMarker = undefined; } break; } case "linePrefix": case "listItemValue": case "listItemMarker": case "listItemPrefix": case "listItemPrefixWhitespace": { // Empty. break; } default: { atMarker = undefined; } } if (!containerBalance && event[0] === 'enter' && event[1].type === "listItemPrefix" || containerBalance === -1 && event[0] === 'exit' && (event[1].type === "listUnordered" || event[1].type === "listOrdered")) { if (listItem) { let tailIndex = index; lineIndex = undefined; while (tailIndex--) { const tailEvent = events[tailIndex]; if (tailEvent[1].type === "lineEnding" || tailEvent[1].type === "lineEndingBlank") { if (tailEvent[0] === 'exit') continue; if (lineIndex) { events[lineIndex][1].type = "lineEndingBlank"; listSpread = true; } tailEvent[1].type = "lineEnding"; lineIndex = tailIndex; } else if (tailEvent[1].type === "linePrefix" || tailEvent[1].type === "blockQuotePrefix" || tailEvent[1].type === "blockQuotePrefixWhitespace" || tailEvent[1].type === "blockQuoteMarker" || tailEvent[1].type === "listItemIndent") { // Empty } else { break; } } if (firstBlankLineIndex && (!lineIndex || firstBlankLineIndex < lineIndex)) { listItem._spread = true; } // Fix position. listItem.end = Object.assign({}, lineIndex ? events[lineIndex][1].start : event[1].end); events.splice(lineIndex || index, 0, ['exit', listItem, event[2]]); index++; length++; } // Create a new list item. if (event[1].type === "listItemPrefix") { /** @type {Token} */ const item = { type: 'listItem', _spread: false, start: Object.assign({}, event[1].start), // @ts-expect-error: we’ll add `end` in a second. end: undefined }; listItem = item; events.splice(index, 0, ['enter', item, event[2]]); index++; length++; firstBlankLineIndex = undefined; atMarker = true; } } } events[start][1]._spread = listSpread; return length; } /** * Create an opener handle. * * @param {(token: Token) => Nodes} create * Create a node. * @param {Handle | undefined} [and] * Optional function to also run. * @returns {Handle} * Handle. */ function opener(create, and) { return open; /** * @this {CompileContext} * @param {Token} token * @returns {undefined} */ function open(token) { enter.call(this, create(token), token); if (and) and.call(this, token); } } /** * @type {CompileContext['buffer']} */ function buffer() { this.stack.push({ type: 'fragment', children: [] }); } /** * @type {CompileContext['enter']} */ function enter(node, token, errorHandler) { const parent = this.stack[this.stack.length - 1]; /** @type {Array} */ const siblings = parent.children; siblings.push(node); this.stack.push(node); this.tokenStack.push([token, errorHandler || undefined]); node.position = { start: point(token.start), // @ts-expect-error: `end` will be patched later. end: undefined }; } /** * Create a closer handle. * * @param {Handle | undefined} [and] * Optional function to also run. * @returns {Handle} * Handle. */ function closer(and) { return close; /** * @this {CompileContext} * @param {Token} token * @returns {undefined} */ function close(token) { if (and) and.call(this, token); exit.call(this, token); } } /** * @type {CompileContext['exit']} */ function exit(token, onExitError) { const node = this.stack.pop(); const open = this.tokenStack.pop(); if (!open) { throw new Error('Cannot close `' + token.type + '` (' + stringifyPosition({ start: token.start, end: token.end }) + '): it’s not open'); } else if (open[0].type !== token.type) { if (onExitError) { onExitError.call(this, token, open[0]); } else { const handler = open[1] || defaultOnError; handler.call(this, token, open[0]); } } node.position.end = point(token.end); } /** * @type {CompileContext['resume']} */ function resume() { return toString(this.stack.pop()); } // // Handlers. // /** * @this {CompileContext} * @type {Handle} */ function onenterlistordered() { this.data.expectingFirstListItemValue = true; } /** * @this {CompileContext} * @type {Handle} */ function onenterlistitemvalue(token) { if (this.data.expectingFirstListItemValue) { const ancestor = this.stack[this.stack.length - 2]; ancestor.start = Number.parseInt(this.sliceSerialize(token), 10); this.data.expectingFirstListItemValue = undefined; } } /** * @this {CompileContext} * @type {Handle} */ function onexitcodefencedfenceinfo() { const data = this.resume(); const node = this.stack[this.stack.length - 1]; node.lang = data; } /** * @this {CompileContext} * @type {Handle} */ function onexitcodefencedfencemeta() { const data = this.resume(); const node = this.stack[this.stack.length - 1]; node.meta = data; } /** * @this {CompileContext} * @type {Handle} */ function onexitcodefencedfence() { // Exit if this is the closing fence. if (this.data.flowCodeInside) return; this.buffer(); this.data.flowCodeInside = true; } /** * @this {CompileContext} * @type {Handle} */ function onexitcodefenced() { const data = this.resume(); const node = this.stack[this.stack.length - 1]; node.value = data.replace(/^(\r?\n|\r)|(\r?\n|\r)$/g, ''); this.data.flowCodeInside = undefined; } /** * @this {CompileContext} * @type {Handle} */ function onexitcodeindented() { const data = this.resume(); const node = this.stack[this.stack.length - 1]; node.value = data.replace(/(\r?\n|\r)$/g, ''); } /** * @this {CompileContext} * @type {Handle} */ function onexitdefinitionlabelstring(token) { const label = this.resume(); const node = this.stack[this.stack.length - 1]; node.label = label; node.identifier = normalizeIdentifier(this.sliceSerialize(token)).toLowerCase(); } /** * @this {CompileContext} * @type {Handle} */ function onexitdefinitiontitlestring() { const data = this.resume(); const node = this.stack[this.stack.length - 1]; node.title = data; } /** * @this {CompileContext} * @type {Handle} */ function onexitdefinitiondestinationstring() { const data = this.resume(); const node = this.stack[this.stack.length - 1]; node.url = data; } /** * @this {CompileContext} * @type {Handle} */ function onexitatxheadingsequence(token) { const node = this.stack[this.stack.length - 1]; if (!node.depth) { const depth = this.sliceSerialize(token).length; node.depth = depth; } } /** * @this {CompileContext} * @type {Handle} */ function onexitsetextheadingtext() { this.data.setextHeadingSlurpLineEnding = true; } /** * @this {CompileContext} * @type {Handle} */ function onexitsetextheadinglinesequence(token) { const node = this.stack[this.stack.length - 1]; node.depth = this.sliceSerialize(token).codePointAt(0) === 61 ? 1 : 2; } /** * @this {CompileContext} * @type {Handle} */ function onexitsetextheading() { this.data.setextHeadingSlurpLineEnding = undefined; } /** * @this {CompileContext} * @type {Handle} */ function onenterdata(token) { const node = this.stack[this.stack.length - 1]; /** @type {Array} */ const siblings = node.children; let tail = siblings[siblings.length - 1]; if (!tail || tail.type !== 'text') { // Add a new text node. tail = text(); tail.position = { start: point(token.start), // @ts-expect-error: we’ll add `end` later. end: undefined }; siblings.push(tail); } this.stack.push(tail); } /** * @this {CompileContext} * @type {Handle} */ function onexitdata(token) { const tail = this.stack.pop(); tail.value += this.sliceSerialize(token); tail.position.end = point(token.end); } /** * @this {CompileContext} * @type {Handle} */ function onexitlineending(token) { const context = this.stack[this.stack.length - 1]; // If we’re at a hard break, include the line ending in there. if (this.data.atHardBreak) { const tail = context.children[context.children.length - 1]; tail.position.end = point(token.end); this.data.atHardBreak = undefined; return; } if (!this.data.setextHeadingSlurpLineEnding && config.canContainEols.includes(context.type)) { onenterdata.call(this, token); onexitdata.call(this, token); } } /** * @this {CompileContext} * @type {Handle} */ function onexithardbreak() { this.data.atHardBreak = true; } /** * @this {CompileContext} * @type {Handle} */ function onexithtmlflow() { const data = this.resume(); const node = this.stack[this.stack.length - 1]; node.value = data; } /** * @this {CompileContext} * @type {Handle} */ function onexithtmltext() { const data = this.resume(); const node = this.stack[this.stack.length - 1]; node.value = data; } /** * @this {CompileContext} * @type {Handle} */ function onexitcodetext() { const data = this.resume(); const node = this.stack[this.stack.length - 1]; node.value = data; } /** * @this {CompileContext} * @type {Handle} */ function onexitlink() { const node = this.stack[this.stack.length - 1]; // Note: there are also `identifier` and `label` fields on this link node! // These are used / cleaned here. // To do: clean. if (this.data.inReference) { /** @type {ReferenceType} */ const referenceType = this.data.referenceType || 'shortcut'; node.type += 'Reference'; // @ts-expect-error: mutate. node.referenceType = referenceType; // @ts-expect-error: mutate. delete node.url; delete node.title; } else { // @ts-expect-error: mutate. delete node.identifier; // @ts-expect-error: mutate. delete node.label; } this.data.referenceType = undefined; } /** * @this {CompileContext} * @type {Handle} */ function onexitimage() { const node = this.stack[this.stack.length - 1]; // Note: there are also `identifier` and `label` fields on this link node! // These are used / cleaned here. // To do: clean. if (this.data.inReference) { /** @type {ReferenceType} */ const referenceType = this.data.referenceType || 'shortcut'; node.type += 'Reference'; // @ts-expect-error: mutate. node.referenceType = referenceType; // @ts-expect-error: mutate. delete node.url; delete node.title; } else { // @ts-expect-error: mutate. delete node.identifier; // @ts-expect-error: mutate. delete node.label; } this.data.referenceType = undefined; } /** * @this {CompileContext} * @type {Handle} */ function onexitlabeltext(token) { const string = this.sliceSerialize(token); const ancestor = this.stack[this.stack.length - 2]; // @ts-expect-error: stash this on the node, as it might become a reference // later. ancestor.label = decodeString(string); // @ts-expect-error: same as above. ancestor.identifier = normalizeIdentifier(string).toLowerCase(); } /** * @this {CompileContext} * @type {Handle} */ function onexitlabel() { const fragment = this.stack[this.stack.length - 1]; const value = this.resume(); const node = this.stack[this.stack.length - 1]; // Assume a reference. this.data.inReference = true; if (node.type === 'link') { /** @type {Array} */ const children = fragment.children; node.children = children; } else { node.alt = value; } } /** * @this {CompileContext} * @type {Handle} */ function onexitresourcedestinationstring() { const data = this.resume(); const node = this.stack[this.stack.length - 1]; node.url = data; } /** * @this {CompileContext} * @type {Handle} */ function onexitresourcetitlestring() { const data = this.resume(); const node = this.stack[this.stack.length - 1]; node.title = data; } /** * @this {CompileContext} * @type {Handle} */ function onexitresource() { this.data.inReference = undefined; } /** * @this {CompileContext} * @type {Handle} */ function onenterreference() { this.data.referenceType = 'collapsed'; } /** * @this {CompileContext} * @type {Handle} */ function onexitreferencestring(token) { const label = this.resume(); const node = this.stack[this.stack.length - 1]; // @ts-expect-error: stash this on the node, as it might become a reference // later. node.label = label; // @ts-expect-error: same as above. node.identifier = normalizeIdentifier(this.sliceSerialize(token)).toLowerCase(); this.data.referenceType = 'full'; } /** * @this {CompileContext} * @type {Handle} */ function onexitcharacterreferencemarker(token) { this.data.characterReferenceType = token.type; } /** * @this {CompileContext} * @type {Handle} */ function onexitcharacterreferencevalue(token) { const data = this.sliceSerialize(token); const type = this.data.characterReferenceType; /** @type {string} */ let value; if (type) { value = decodeNumericCharacterReference(data, type === "characterReferenceMarkerNumeric" ? 10 : 16); this.data.characterReferenceType = undefined; } else { const result = decodeNamedCharacterReference(data); value = result; } const tail = this.stack[this.stack.length - 1]; tail.value += value; } /** * @this {CompileContext} * @type {Handle} */ function onexitcharacterreference(token) { const tail = this.stack.pop(); tail.position.end = point(token.end); } /** * @this {CompileContext} * @type {Handle} */ function onexitautolinkprotocol(token) { onexitdata.call(this, token); const node = this.stack[this.stack.length - 1]; node.url = this.sliceSerialize(token); } /** * @this {CompileContext} * @type {Handle} */ function onexitautolinkemail(token) { onexitdata.call(this, token); const node = this.stack[this.stack.length - 1]; node.url = 'mailto:' + this.sliceSerialize(token); } // // Creaters. // /** @returns {Blockquote} */ function blockQuote() { return { type: 'blockquote', children: [] }; } /** @returns {Code} */ function codeFlow() { return { type: 'code', lang: null, meta: null, value: '' }; } /** @returns {InlineCode} */ function codeText() { return { type: 'inlineCode', value: '' }; } /** @returns {Definition} */ function definition() { return { type: 'definition', identifier: '', label: null, title: null, url: '' }; } /** @returns {Emphasis} */ function emphasis() { return { type: 'emphasis', children: [] }; } /** @returns {Heading} */ function heading() { return { type: 'heading', // @ts-expect-error `depth` will be set later. depth: 0, children: [] }; } /** @returns {Break} */ function hardBreak() { return { type: 'break' }; } /** @returns {Html} */ function html() { return { type: 'html', value: '' }; } /** @returns {Image} */ function image() { return { type: 'image', title: null, url: '', alt: null }; } /** @returns {Link} */ function link() { return { type: 'link', title: null, url: '', children: [] }; } /** * @param {Token} token * @returns {List} */ function list(token) { return { type: 'list', ordered: token.type === 'listOrdered', start: null, spread: token._spread, children: [] }; } /** * @param {Token} token * @returns {ListItem} */ function listItem(token) { return { type: 'listItem', spread: token._spread, checked: null, children: [] }; } /** @returns {Paragraph} */ function paragraph() { return { type: 'paragraph', children: [] }; } /** @returns {Strong} */ function strong() { return { type: 'strong', children: [] }; } /** @returns {Text} */ function text() { return { type: 'text', value: '' }; } /** @returns {ThematicBreak} */ function thematicBreak() { return { type: 'thematicBreak' }; } } /** * Copy a point-like value. * * @param {Point} d * Point-like value. * @returns {Point} * unist point. */ function point(d) { return { line: d.line, column: d.column, offset: d.offset }; } /** * @param {Config} combined * @param {Array | Extension>} extensions * @returns {undefined} */ function configure(combined, extensions) { let index = -1; while (++index < extensions.length) { const value = extensions[index]; if (Array.isArray(value)) { configure(combined, value); } else { extension(combined, value); } } } /** * @param {Config} combined * @param {Extension} extension * @returns {undefined} */ function extension(combined, extension) { /** @type {keyof Extension} */ let key; for (key in extension) { if (own.call(extension, key)) { switch (key) { case 'canContainEols': { const right = extension[key]; if (right) { combined[key].push(...right); } break; } case 'transforms': { const right = extension[key]; if (right) { combined[key].push(...right); } break; } case 'enter': case 'exit': { const right = extension[key]; if (right) { Object.assign(combined[key], right); } break; } // No default } } } } /** @type {OnEnterError} */ function defaultOnError(left, right) { if (left) { throw new Error('Cannot close `' + left.type + '` (' + stringifyPosition({ start: left.start, end: left.end }) + '): a different token (`' + right.type + '`, ' + stringifyPosition({ start: right.start, end: right.end }) + ') is open'); } else { throw new Error('Cannot close document, a token (`' + right.type + '`, ' + stringifyPosition({ start: right.start, end: right.end }) + ') is still open'); } }