/** * @import {Chunk, Event, Token} from 'micromark-util-types' */ import {ok as assert} from 'devlop' import {splice} from 'micromark-util-chunked' import {codes, types} from 'micromark-util-symbol' import {SpliceBuffer} from './lib/splice-buffer.js' // Hidden API exposed for testing. export {SpliceBuffer} from './lib/splice-buffer.js' /** * Tokenize subcontent. * * @param {Array} eventsArray * List of events. * @returns {boolean} * Whether subtokens were found. */ // eslint-disable-next-line complexity export function subtokenize(eventsArray) { /** @type {Record} */ const jumps = {} let index = -1 /** @type {Event} */ let event /** @type {number | undefined} */ let lineIndex /** @type {number} */ let otherIndex /** @type {Event} */ let otherEvent /** @type {Array} */ let parameters /** @type {Array} */ let subevents /** @type {boolean | undefined} */ let more const events = new SpliceBuffer(eventsArray) while (++index < events.length) { while (index in jumps) { index = jumps[index] } event = events.get(index) // Add a hook for the GFM tasklist extension, which needs to know if text // is in the first content of a list item. if ( index && event[1].type === types.chunkFlow && events.get(index - 1)[1].type === types.listItemPrefix ) { assert(event[1]._tokenizer, 'expected `_tokenizer` on subtokens') subevents = event[1]._tokenizer.events otherIndex = 0 if ( otherIndex < subevents.length && subevents[otherIndex][1].type === types.lineEndingBlank ) { otherIndex += 2 } if ( otherIndex < subevents.length && subevents[otherIndex][1].type === types.content ) { while (++otherIndex < subevents.length) { if (subevents[otherIndex][1].type === types.content) { break } if (subevents[otherIndex][1].type === types.chunkText) { subevents[otherIndex][1]._isInFirstContentOfListItem = true otherIndex++ } } } } // Enter. if (event[0] === 'enter') { if (event[1].contentType) { Object.assign(jumps, subcontent(events, index)) index = jumps[index] more = true } } // Exit. else if (event[1]._container) { otherIndex = index lineIndex = undefined while (otherIndex--) { otherEvent = events.get(otherIndex) if ( otherEvent[1].type === types.lineEnding || otherEvent[1].type === types.lineEndingBlank ) { if (otherEvent[0] === 'enter') { if (lineIndex) { events.get(lineIndex)[1].type = types.lineEndingBlank } otherEvent[1].type = types.lineEnding lineIndex = otherIndex } } else if ( otherEvent[1].type === types.linePrefix || otherEvent[1].type === types.listItemIndent ) { // Move past. } else { break } } if (lineIndex) { // Fix position. event[1].end = {...events.get(lineIndex)[1].start} // Switch container exit w/ line endings. parameters = events.slice(lineIndex, index) parameters.unshift(event) events.splice(lineIndex, index - lineIndex + 1, parameters) } } } // The changes to the `events` buffer must be copied back into the eventsArray splice(eventsArray, 0, Number.POSITIVE_INFINITY, events.slice(0)) return !more } /** * Tokenize embedded tokens. * * @param {SpliceBuffer} events * Events. * @param {number} eventIndex * Index. * @returns {Record} * Gaps. */ function subcontent(events, eventIndex) { const token = events.get(eventIndex)[1] const context = events.get(eventIndex)[2] let startPosition = eventIndex - 1 /** @type {Array} */ const startPositions = [] assert(token.contentType, 'expected `contentType` on subtokens') let tokenizer = token._tokenizer if (!tokenizer) { tokenizer = context.parser[token.contentType](token.start) if (token._contentTypeTextTrailing) { tokenizer._contentTypeTextTrailing = true } } const childEvents = tokenizer.events /** @type {Array<[number, number]>} */ const jumps = [] /** @type {Record} */ const gaps = {} /** @type {Array} */ let stream /** @type {Token | undefined} */ let previous let index = -1 /** @type {Token | undefined} */ let current = token let adjust = 0 let start = 0 const breaks = [start] // Loop forward through the linked tokens to pass them in order to the // subtokenizer. while (current) { // Find the position of the event for this token. while (events.get(++startPosition)[1] !== current) { // Empty. } assert( !previous || current.previous === previous, 'expected previous to match' ) assert(!previous || previous.next === current, 'expected next to match') startPositions.push(startPosition) if (!current._tokenizer) { stream = context.sliceStream(current) if (!current.next) { stream.push(codes.eof) } if (previous) { tokenizer.defineSkip(current.start) } if (current._isInFirstContentOfListItem) { tokenizer._gfmTasklistFirstContentOfListItem = true } tokenizer.write(stream) if (current._isInFirstContentOfListItem) { tokenizer._gfmTasklistFirstContentOfListItem = undefined } } // Unravel the next token. previous = current current = current.next } // Now, loop back through all events (and linked tokens), to figure out which // parts belong where. current = token while (++index < childEvents.length) { if ( // Find a void token that includes a break. childEvents[index][0] === 'exit' && childEvents[index - 1][0] === 'enter' && childEvents[index][1].type === childEvents[index - 1][1].type && childEvents[index][1].start.line !== childEvents[index][1].end.line ) { assert(current, 'expected a current token') start = index + 1 breaks.push(start) // Help GC. current._tokenizer = undefined current.previous = undefined current = current.next } } // Help GC. tokenizer.events = [] // If there’s one more token (which is the cases for lines that end in an // EOF), that’s perfect: the last point we found starts it. // If there isn’t then make sure any remaining content is added to it. if (current) { // Help GC. current._tokenizer = undefined current.previous = undefined assert(!current.next, 'expected no next token') } else { breaks.pop() } // Now splice the events from the subtokenizer into the current events, // moving back to front so that splice indices aren’t affected. index = breaks.length while (index--) { const slice = childEvents.slice(breaks[index], breaks[index + 1]) const start = startPositions.pop() assert(start !== undefined, 'expected a start position when splicing') jumps.push([start, start + slice.length - 1]) events.splice(start, 2, slice) } jumps.reverse() index = -1 while (++index < jumps.length) { gaps[adjust + jumps[index][0]] = adjust + jumps[index][1] adjust += jumps[index][1] - jumps[index][0] - 1 } return gaps }