mirror of
https://github.com/Funkoala14/knowledgebase_law.git
synced 2025-06-09 08:58:16 +08:00
854 lines
20 KiB
JavaScript
854 lines
20 KiB
JavaScript
|
/**
|
|||
|
* @import {Code, ConstructRecord, Event, Extension, Previous, State, TokenizeContext, Tokenizer} from 'micromark-util-types'
|
|||
|
*/
|
|||
|
|
|||
|
import { asciiAlpha, asciiAlphanumeric, asciiControl, markdownLineEndingOrSpace, unicodePunctuation, unicodeWhitespace } from 'micromark-util-character';
|
|||
|
const wwwPrefix = {
|
|||
|
tokenize: tokenizeWwwPrefix,
|
|||
|
partial: true
|
|||
|
};
|
|||
|
const domain = {
|
|||
|
tokenize: tokenizeDomain,
|
|||
|
partial: true
|
|||
|
};
|
|||
|
const path = {
|
|||
|
tokenize: tokenizePath,
|
|||
|
partial: true
|
|||
|
};
|
|||
|
const trail = {
|
|||
|
tokenize: tokenizeTrail,
|
|||
|
partial: true
|
|||
|
};
|
|||
|
const emailDomainDotTrail = {
|
|||
|
tokenize: tokenizeEmailDomainDotTrail,
|
|||
|
partial: true
|
|||
|
};
|
|||
|
const wwwAutolink = {
|
|||
|
name: 'wwwAutolink',
|
|||
|
tokenize: tokenizeWwwAutolink,
|
|||
|
previous: previousWww
|
|||
|
};
|
|||
|
const protocolAutolink = {
|
|||
|
name: 'protocolAutolink',
|
|||
|
tokenize: tokenizeProtocolAutolink,
|
|||
|
previous: previousProtocol
|
|||
|
};
|
|||
|
const emailAutolink = {
|
|||
|
name: 'emailAutolink',
|
|||
|
tokenize: tokenizeEmailAutolink,
|
|||
|
previous: previousEmail
|
|||
|
};
|
|||
|
|
|||
|
/** @type {ConstructRecord} */
|
|||
|
const text = {};
|
|||
|
|
|||
|
/**
|
|||
|
* Create an extension for `micromark` to support GitHub autolink literal
|
|||
|
* syntax.
|
|||
|
*
|
|||
|
* @returns {Extension}
|
|||
|
* Extension for `micromark` that can be passed in `extensions` to enable GFM
|
|||
|
* autolink literal syntax.
|
|||
|
*/
|
|||
|
export function gfmAutolinkLiteral() {
|
|||
|
return {
|
|||
|
text
|
|||
|
};
|
|||
|
}
|
|||
|
|
|||
|
/** @type {Code} */
|
|||
|
let code = 48;
|
|||
|
|
|||
|
// Add alphanumerics.
|
|||
|
while (code < 123) {
|
|||
|
text[code] = emailAutolink;
|
|||
|
code++;
|
|||
|
if (code === 58) code = 65;else if (code === 91) code = 97;
|
|||
|
}
|
|||
|
text[43] = emailAutolink;
|
|||
|
text[45] = emailAutolink;
|
|||
|
text[46] = emailAutolink;
|
|||
|
text[95] = emailAutolink;
|
|||
|
text[72] = [emailAutolink, protocolAutolink];
|
|||
|
text[104] = [emailAutolink, protocolAutolink];
|
|||
|
text[87] = [emailAutolink, wwwAutolink];
|
|||
|
text[119] = [emailAutolink, wwwAutolink];
|
|||
|
|
|||
|
// To do: perform email autolink literals on events, afterwards.
|
|||
|
// That’s where `markdown-rs` and `cmark-gfm` perform it.
|
|||
|
// It should look for `@`, then for atext backwards, and then for a label
|
|||
|
// forwards.
|
|||
|
// To do: `mailto:`, `xmpp:` protocol as prefix.
|
|||
|
|
|||
|
/**
|
|||
|
* Email autolink literal.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | a contact@example.org b
|
|||
|
* ^^^^^^^^^^^^^^^^^^^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @this {TokenizeContext}
|
|||
|
* @type {Tokenizer}
|
|||
|
*/
|
|||
|
function tokenizeEmailAutolink(effects, ok, nok) {
|
|||
|
const self = this;
|
|||
|
/** @type {boolean | undefined} */
|
|||
|
let dot;
|
|||
|
/** @type {boolean} */
|
|||
|
let data;
|
|||
|
return start;
|
|||
|
|
|||
|
/**
|
|||
|
* Start of email autolink literal.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | a contact@example.org b
|
|||
|
* ^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @type {State}
|
|||
|
*/
|
|||
|
function start(code) {
|
|||
|
if (!gfmAtext(code) || !previousEmail.call(self, self.previous) || previousUnbalanced(self.events)) {
|
|||
|
return nok(code);
|
|||
|
}
|
|||
|
effects.enter('literalAutolink');
|
|||
|
effects.enter('literalAutolinkEmail');
|
|||
|
return atext(code);
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* In email atext.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | a contact@example.org b
|
|||
|
* ^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @type {State}
|
|||
|
*/
|
|||
|
function atext(code) {
|
|||
|
if (gfmAtext(code)) {
|
|||
|
effects.consume(code);
|
|||
|
return atext;
|
|||
|
}
|
|||
|
if (code === 64) {
|
|||
|
effects.consume(code);
|
|||
|
return emailDomain;
|
|||
|
}
|
|||
|
return nok(code);
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* In email domain.
|
|||
|
*
|
|||
|
* The reference code is a bit overly complex as it handles the `@`, of which
|
|||
|
* there may be just one.
|
|||
|
* Source: <https://github.com/github/cmark-gfm/blob/ef1cfcb/extensions/autolink.c#L318>
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | a contact@example.org b
|
|||
|
* ^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @type {State}
|
|||
|
*/
|
|||
|
function emailDomain(code) {
|
|||
|
// Dot followed by alphanumerical (not `-` or `_`).
|
|||
|
if (code === 46) {
|
|||
|
return effects.check(emailDomainDotTrail, emailDomainAfter, emailDomainDot)(code);
|
|||
|
}
|
|||
|
|
|||
|
// Alphanumerical, `-`, and `_`.
|
|||
|
if (code === 45 || code === 95 || asciiAlphanumeric(code)) {
|
|||
|
data = true;
|
|||
|
effects.consume(code);
|
|||
|
return emailDomain;
|
|||
|
}
|
|||
|
|
|||
|
// To do: `/` if xmpp.
|
|||
|
|
|||
|
// Note: normally we’d truncate trailing punctuation from the link.
|
|||
|
// However, email autolink literals cannot contain any of those markers,
|
|||
|
// except for `.`, but that can only occur if it isn’t trailing.
|
|||
|
// So we can ignore truncating!
|
|||
|
return emailDomainAfter(code);
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* In email domain, on dot that is not a trail.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | a contact@example.org b
|
|||
|
* ^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @type {State}
|
|||
|
*/
|
|||
|
function emailDomainDot(code) {
|
|||
|
effects.consume(code);
|
|||
|
dot = true;
|
|||
|
return emailDomain;
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* After email domain.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | a contact@example.org b
|
|||
|
* ^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @type {State}
|
|||
|
*/
|
|||
|
function emailDomainAfter(code) {
|
|||
|
// Domain must not be empty, must include a dot, and must end in alphabetical.
|
|||
|
// Source: <https://github.com/github/cmark-gfm/blob/ef1cfcb/extensions/autolink.c#L332>.
|
|||
|
if (data && dot && asciiAlpha(self.previous)) {
|
|||
|
effects.exit('literalAutolinkEmail');
|
|||
|
effects.exit('literalAutolink');
|
|||
|
return ok(code);
|
|||
|
}
|
|||
|
return nok(code);
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* `www` autolink literal.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | a www.example.org b
|
|||
|
* ^^^^^^^^^^^^^^^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @this {TokenizeContext}
|
|||
|
* @type {Tokenizer}
|
|||
|
*/
|
|||
|
function tokenizeWwwAutolink(effects, ok, nok) {
|
|||
|
const self = this;
|
|||
|
return wwwStart;
|
|||
|
|
|||
|
/**
|
|||
|
* Start of www autolink literal.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | www.example.com/a?b#c
|
|||
|
* ^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @type {State}
|
|||
|
*/
|
|||
|
function wwwStart(code) {
|
|||
|
if (code !== 87 && code !== 119 || !previousWww.call(self, self.previous) || previousUnbalanced(self.events)) {
|
|||
|
return nok(code);
|
|||
|
}
|
|||
|
effects.enter('literalAutolink');
|
|||
|
effects.enter('literalAutolinkWww');
|
|||
|
// Note: we *check*, so we can discard the `www.` we parsed.
|
|||
|
// If it worked, we consider it as a part of the domain.
|
|||
|
return effects.check(wwwPrefix, effects.attempt(domain, effects.attempt(path, wwwAfter), nok), nok)(code);
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* After a www autolink literal.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | www.example.com/a?b#c
|
|||
|
* ^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @type {State}
|
|||
|
*/
|
|||
|
function wwwAfter(code) {
|
|||
|
effects.exit('literalAutolinkWww');
|
|||
|
effects.exit('literalAutolink');
|
|||
|
return ok(code);
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* Protocol autolink literal.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | a https://example.org b
|
|||
|
* ^^^^^^^^^^^^^^^^^^^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @this {TokenizeContext}
|
|||
|
* @type {Tokenizer}
|
|||
|
*/
|
|||
|
function tokenizeProtocolAutolink(effects, ok, nok) {
|
|||
|
const self = this;
|
|||
|
let buffer = '';
|
|||
|
let seen = false;
|
|||
|
return protocolStart;
|
|||
|
|
|||
|
/**
|
|||
|
* Start of protocol autolink literal.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | https://example.com/a?b#c
|
|||
|
* ^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @type {State}
|
|||
|
*/
|
|||
|
function protocolStart(code) {
|
|||
|
if ((code === 72 || code === 104) && previousProtocol.call(self, self.previous) && !previousUnbalanced(self.events)) {
|
|||
|
effects.enter('literalAutolink');
|
|||
|
effects.enter('literalAutolinkHttp');
|
|||
|
buffer += String.fromCodePoint(code);
|
|||
|
effects.consume(code);
|
|||
|
return protocolPrefixInside;
|
|||
|
}
|
|||
|
return nok(code);
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* In protocol.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | https://example.com/a?b#c
|
|||
|
* ^^^^^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @type {State}
|
|||
|
*/
|
|||
|
function protocolPrefixInside(code) {
|
|||
|
// `5` is size of `https`
|
|||
|
if (asciiAlpha(code) && buffer.length < 5) {
|
|||
|
// @ts-expect-error: definitely number.
|
|||
|
buffer += String.fromCodePoint(code);
|
|||
|
effects.consume(code);
|
|||
|
return protocolPrefixInside;
|
|||
|
}
|
|||
|
if (code === 58) {
|
|||
|
const protocol = buffer.toLowerCase();
|
|||
|
if (protocol === 'http' || protocol === 'https') {
|
|||
|
effects.consume(code);
|
|||
|
return protocolSlashesInside;
|
|||
|
}
|
|||
|
}
|
|||
|
return nok(code);
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* In slashes.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | https://example.com/a?b#c
|
|||
|
* ^^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @type {State}
|
|||
|
*/
|
|||
|
function protocolSlashesInside(code) {
|
|||
|
if (code === 47) {
|
|||
|
effects.consume(code);
|
|||
|
if (seen) {
|
|||
|
return afterProtocol;
|
|||
|
}
|
|||
|
seen = true;
|
|||
|
return protocolSlashesInside;
|
|||
|
}
|
|||
|
return nok(code);
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* After protocol, before domain.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | https://example.com/a?b#c
|
|||
|
* ^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @type {State}
|
|||
|
*/
|
|||
|
function afterProtocol(code) {
|
|||
|
// To do: this is different from `markdown-rs`:
|
|||
|
// https://github.com/wooorm/markdown-rs/blob/b3a921c761309ae00a51fe348d8a43adbc54b518/src/construct/gfm_autolink_literal.rs#L172-L182
|
|||
|
return code === null || asciiControl(code) || markdownLineEndingOrSpace(code) || unicodeWhitespace(code) || unicodePunctuation(code) ? nok(code) : effects.attempt(domain, effects.attempt(path, protocolAfter), nok)(code);
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* After a protocol autolink literal.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | https://example.com/a?b#c
|
|||
|
* ^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @type {State}
|
|||
|
*/
|
|||
|
function protocolAfter(code) {
|
|||
|
effects.exit('literalAutolinkHttp');
|
|||
|
effects.exit('literalAutolink');
|
|||
|
return ok(code);
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* `www` prefix.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | a www.example.org b
|
|||
|
* ^^^^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @this {TokenizeContext}
|
|||
|
* @type {Tokenizer}
|
|||
|
*/
|
|||
|
function tokenizeWwwPrefix(effects, ok, nok) {
|
|||
|
let size = 0;
|
|||
|
return wwwPrefixInside;
|
|||
|
|
|||
|
/**
|
|||
|
* In www prefix.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | www.example.com
|
|||
|
* ^^^^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @type {State}
|
|||
|
*/
|
|||
|
function wwwPrefixInside(code) {
|
|||
|
if ((code === 87 || code === 119) && size < 3) {
|
|||
|
size++;
|
|||
|
effects.consume(code);
|
|||
|
return wwwPrefixInside;
|
|||
|
}
|
|||
|
if (code === 46 && size === 3) {
|
|||
|
effects.consume(code);
|
|||
|
return wwwPrefixAfter;
|
|||
|
}
|
|||
|
return nok(code);
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* After www prefix.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | www.example.com
|
|||
|
* ^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @type {State}
|
|||
|
*/
|
|||
|
function wwwPrefixAfter(code) {
|
|||
|
// If there is *anything*, we can link.
|
|||
|
return code === null ? nok(code) : ok(code);
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* Domain.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | a https://example.org b
|
|||
|
* ^^^^^^^^^^^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @this {TokenizeContext}
|
|||
|
* @type {Tokenizer}
|
|||
|
*/
|
|||
|
function tokenizeDomain(effects, ok, nok) {
|
|||
|
/** @type {boolean | undefined} */
|
|||
|
let underscoreInLastSegment;
|
|||
|
/** @type {boolean | undefined} */
|
|||
|
let underscoreInLastLastSegment;
|
|||
|
/** @type {boolean | undefined} */
|
|||
|
let seen;
|
|||
|
return domainInside;
|
|||
|
|
|||
|
/**
|
|||
|
* In domain.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | https://example.com/a
|
|||
|
* ^^^^^^^^^^^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @type {State}
|
|||
|
*/
|
|||
|
function domainInside(code) {
|
|||
|
// Check whether this marker, which is a trailing punctuation
|
|||
|
// marker, optionally followed by more trailing markers, and then
|
|||
|
// followed by an end.
|
|||
|
if (code === 46 || code === 95) {
|
|||
|
return effects.check(trail, domainAfter, domainAtPunctuation)(code);
|
|||
|
}
|
|||
|
|
|||
|
// GH documents that only alphanumerics (other than `-`, `.`, and `_`) can
|
|||
|
// occur, which sounds like ASCII only, but they also support `www.點看.com`,
|
|||
|
// so that’s Unicode.
|
|||
|
// Instead of some new production for Unicode alphanumerics, markdown
|
|||
|
// already has that for Unicode punctuation and whitespace, so use those.
|
|||
|
// Source: <https://github.com/github/cmark-gfm/blob/ef1cfcb/extensions/autolink.c#L12>.
|
|||
|
if (code === null || markdownLineEndingOrSpace(code) || unicodeWhitespace(code) || code !== 45 && unicodePunctuation(code)) {
|
|||
|
return domainAfter(code);
|
|||
|
}
|
|||
|
seen = true;
|
|||
|
effects.consume(code);
|
|||
|
return domainInside;
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* In domain, at potential trailing punctuation, that was not trailing.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | https://example.com
|
|||
|
* ^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @type {State}
|
|||
|
*/
|
|||
|
function domainAtPunctuation(code) {
|
|||
|
// There is an underscore in the last segment of the domain
|
|||
|
if (code === 95) {
|
|||
|
underscoreInLastSegment = true;
|
|||
|
}
|
|||
|
// Otherwise, it’s a `.`: save the last segment underscore in the
|
|||
|
// penultimate segment slot.
|
|||
|
else {
|
|||
|
underscoreInLastLastSegment = underscoreInLastSegment;
|
|||
|
underscoreInLastSegment = undefined;
|
|||
|
}
|
|||
|
effects.consume(code);
|
|||
|
return domainInside;
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* After domain.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | https://example.com/a
|
|||
|
* ^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @type {State} */
|
|||
|
function domainAfter(code) {
|
|||
|
// Note: that’s GH says a dot is needed, but it’s not true:
|
|||
|
// <https://github.com/github/cmark-gfm/issues/279>
|
|||
|
if (underscoreInLastLastSegment || underscoreInLastSegment || !seen) {
|
|||
|
return nok(code);
|
|||
|
}
|
|||
|
return ok(code);
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* Path.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | a https://example.org/stuff b
|
|||
|
* ^^^^^^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @this {TokenizeContext}
|
|||
|
* @type {Tokenizer}
|
|||
|
*/
|
|||
|
function tokenizePath(effects, ok) {
|
|||
|
let sizeOpen = 0;
|
|||
|
let sizeClose = 0;
|
|||
|
return pathInside;
|
|||
|
|
|||
|
/**
|
|||
|
* In path.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | https://example.com/a
|
|||
|
* ^^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @type {State}
|
|||
|
*/
|
|||
|
function pathInside(code) {
|
|||
|
if (code === 40) {
|
|||
|
sizeOpen++;
|
|||
|
effects.consume(code);
|
|||
|
return pathInside;
|
|||
|
}
|
|||
|
|
|||
|
// To do: `markdown-rs` also needs this.
|
|||
|
// If this is a paren, and there are less closings than openings,
|
|||
|
// we don’t check for a trail.
|
|||
|
if (code === 41 && sizeClose < sizeOpen) {
|
|||
|
return pathAtPunctuation(code);
|
|||
|
}
|
|||
|
|
|||
|
// Check whether this trailing punctuation marker is optionally
|
|||
|
// followed by more trailing markers, and then followed
|
|||
|
// by an end.
|
|||
|
if (code === 33 || code === 34 || code === 38 || code === 39 || code === 41 || code === 42 || code === 44 || code === 46 || code === 58 || code === 59 || code === 60 || code === 63 || code === 93 || code === 95 || code === 126) {
|
|||
|
return effects.check(trail, ok, pathAtPunctuation)(code);
|
|||
|
}
|
|||
|
if (code === null || markdownLineEndingOrSpace(code) || unicodeWhitespace(code)) {
|
|||
|
return ok(code);
|
|||
|
}
|
|||
|
effects.consume(code);
|
|||
|
return pathInside;
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* In path, at potential trailing punctuation, that was not trailing.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | https://example.com/a"b
|
|||
|
* ^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @type {State}
|
|||
|
*/
|
|||
|
function pathAtPunctuation(code) {
|
|||
|
// Count closing parens.
|
|||
|
if (code === 41) {
|
|||
|
sizeClose++;
|
|||
|
}
|
|||
|
effects.consume(code);
|
|||
|
return pathInside;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* Trail.
|
|||
|
*
|
|||
|
* This calls `ok` if this *is* the trail, followed by an end, which means
|
|||
|
* the entire trail is not part of the link.
|
|||
|
* It calls `nok` if this *is* part of the link.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | https://example.com").
|
|||
|
* ^^^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @this {TokenizeContext}
|
|||
|
* @type {Tokenizer}
|
|||
|
*/
|
|||
|
function tokenizeTrail(effects, ok, nok) {
|
|||
|
return trail;
|
|||
|
|
|||
|
/**
|
|||
|
* In trail of domain or path.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | https://example.com").
|
|||
|
* ^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @type {State}
|
|||
|
*/
|
|||
|
function trail(code) {
|
|||
|
// Regular trailing punctuation.
|
|||
|
if (code === 33 || code === 34 || code === 39 || code === 41 || code === 42 || code === 44 || code === 46 || code === 58 || code === 59 || code === 63 || code === 95 || code === 126) {
|
|||
|
effects.consume(code);
|
|||
|
return trail;
|
|||
|
}
|
|||
|
|
|||
|
// `&` followed by one or more alphabeticals and then a `;`, is
|
|||
|
// as a whole considered as trailing punctuation.
|
|||
|
// In all other cases, it is considered as continuation of the URL.
|
|||
|
if (code === 38) {
|
|||
|
effects.consume(code);
|
|||
|
return trailCharacterReferenceStart;
|
|||
|
}
|
|||
|
|
|||
|
// Needed because we allow literals after `[`, as we fix:
|
|||
|
// <https://github.com/github/cmark-gfm/issues/278>.
|
|||
|
// Check that it is not followed by `(` or `[`.
|
|||
|
if (code === 93) {
|
|||
|
effects.consume(code);
|
|||
|
return trailBracketAfter;
|
|||
|
}
|
|||
|
if (
|
|||
|
// `<` is an end.
|
|||
|
code === 60 ||
|
|||
|
// So is whitespace.
|
|||
|
code === null || markdownLineEndingOrSpace(code) || unicodeWhitespace(code)) {
|
|||
|
return ok(code);
|
|||
|
}
|
|||
|
return nok(code);
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* In trail, after `]`.
|
|||
|
*
|
|||
|
* > 👉 **Note**: this deviates from `cmark-gfm` to fix a bug.
|
|||
|
* > See end of <https://github.com/github/cmark-gfm/issues/278> for more.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | https://example.com](
|
|||
|
* ^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @type {State}
|
|||
|
*/
|
|||
|
function trailBracketAfter(code) {
|
|||
|
// Whitespace or something that could start a resource or reference is the end.
|
|||
|
// Switch back to trail otherwise.
|
|||
|
if (code === null || code === 40 || code === 91 || markdownLineEndingOrSpace(code) || unicodeWhitespace(code)) {
|
|||
|
return ok(code);
|
|||
|
}
|
|||
|
return trail(code);
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* In character-reference like trail, after `&`.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | https://example.com&).
|
|||
|
* ^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @type {State}
|
|||
|
*/
|
|||
|
function trailCharacterReferenceStart(code) {
|
|||
|
// When non-alpha, it’s not a trail.
|
|||
|
return asciiAlpha(code) ? trailCharacterReferenceInside(code) : nok(code);
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* In character-reference like trail.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | https://example.com&).
|
|||
|
* ^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @type {State}
|
|||
|
*/
|
|||
|
function trailCharacterReferenceInside(code) {
|
|||
|
// Switch back to trail if this is well-formed.
|
|||
|
if (code === 59) {
|
|||
|
effects.consume(code);
|
|||
|
return trail;
|
|||
|
}
|
|||
|
if (asciiAlpha(code)) {
|
|||
|
effects.consume(code);
|
|||
|
return trailCharacterReferenceInside;
|
|||
|
}
|
|||
|
|
|||
|
// It’s not a trail.
|
|||
|
return nok(code);
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* Dot in email domain trail.
|
|||
|
*
|
|||
|
* This calls `ok` if this *is* the trail, followed by an end, which means
|
|||
|
* the trail is not part of the link.
|
|||
|
* It calls `nok` if this *is* part of the link.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | contact@example.org.
|
|||
|
* ^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @this {TokenizeContext}
|
|||
|
* @type {Tokenizer}
|
|||
|
*/
|
|||
|
function tokenizeEmailDomainDotTrail(effects, ok, nok) {
|
|||
|
return start;
|
|||
|
|
|||
|
/**
|
|||
|
* Dot.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | contact@example.org.
|
|||
|
* ^ ^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @type {State}
|
|||
|
*/
|
|||
|
function start(code) {
|
|||
|
// Must be dot.
|
|||
|
effects.consume(code);
|
|||
|
return after;
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* After dot.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | contact@example.org.
|
|||
|
* ^ ^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @type {State}
|
|||
|
*/
|
|||
|
function after(code) {
|
|||
|
// Not a trail if alphanumeric.
|
|||
|
return asciiAlphanumeric(code) ? nok(code) : ok(code);
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* See:
|
|||
|
* <https://github.com/github/cmark-gfm/blob/ef1cfcb/extensions/autolink.c#L156>.
|
|||
|
*
|
|||
|
* @type {Previous}
|
|||
|
*/
|
|||
|
function previousWww(code) {
|
|||
|
return code === null || code === 40 || code === 42 || code === 95 || code === 91 || code === 93 || code === 126 || markdownLineEndingOrSpace(code);
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* See:
|
|||
|
* <https://github.com/github/cmark-gfm/blob/ef1cfcb/extensions/autolink.c#L214>.
|
|||
|
*
|
|||
|
* @type {Previous}
|
|||
|
*/
|
|||
|
function previousProtocol(code) {
|
|||
|
return !asciiAlpha(code);
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* @this {TokenizeContext}
|
|||
|
* @type {Previous}
|
|||
|
*/
|
|||
|
function previousEmail(code) {
|
|||
|
// Do not allow a slash “inside” atext.
|
|||
|
// The reference code is a bit weird, but that’s what it results in.
|
|||
|
// Source: <https://github.com/github/cmark-gfm/blob/ef1cfcb/extensions/autolink.c#L307>.
|
|||
|
// Other than slash, every preceding character is allowed.
|
|||
|
return !(code === 47 || gfmAtext(code));
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* @param {Code} code
|
|||
|
* @returns {boolean}
|
|||
|
*/
|
|||
|
function gfmAtext(code) {
|
|||
|
return code === 43 || code === 45 || code === 46 || code === 95 || asciiAlphanumeric(code);
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* @param {Array<Event>} events
|
|||
|
* @returns {boolean}
|
|||
|
*/
|
|||
|
function previousUnbalanced(events) {
|
|||
|
let index = events.length;
|
|||
|
let result = false;
|
|||
|
while (index--) {
|
|||
|
const token = events[index][1];
|
|||
|
if ((token.type === 'labelLink' || token.type === 'labelImage') && !token._balanced) {
|
|||
|
result = true;
|
|||
|
break;
|
|||
|
}
|
|||
|
|
|||
|
// If we’ve seen this token, and it was marked as not having any unbalanced
|
|||
|
// bracket before it, we can exit.
|
|||
|
if (token._gfmAutolinkLiteralWalkedInto) {
|
|||
|
result = false;
|
|||
|
break;
|
|||
|
}
|
|||
|
}
|
|||
|
if (events.length > 0 && !result) {
|
|||
|
// Mark the last token as “walked into” w/o finding
|
|||
|
// anything.
|
|||
|
events[events.length - 1][1]._gfmAutolinkLiteralWalkedInto = true;
|
|||
|
}
|
|||
|
return result;
|
|||
|
}
|