knowledgebase_law/node_modules/mdast-util-gfm-autolink-literal/lib/index.js
2025-04-11 11:47:09 -04:00

281 lines
6.6 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/**
* @import {RegExpMatchObject, ReplaceFunction} from 'mdast-util-find-and-replace'
* @import {CompileContext, Extension as FromMarkdownExtension, Handle as FromMarkdownHandle, Transform as FromMarkdownTransform} from 'mdast-util-from-markdown'
* @import {ConstructName, Options as ToMarkdownExtension} from 'mdast-util-to-markdown'
* @import {Link, PhrasingContent} from 'mdast'
*/
import {ccount} from 'ccount'
import {ok as assert} from 'devlop'
import {unicodePunctuation, unicodeWhitespace} from 'micromark-util-character'
import {findAndReplace} from 'mdast-util-find-and-replace'
/** @type {ConstructName} */
const inConstruct = 'phrasing'
/** @type {Array<ConstructName>} */
const notInConstruct = ['autolink', 'link', 'image', 'label']
/**
* Create an extension for `mdast-util-from-markdown` to enable GFM autolink
* literals in markdown.
*
* @returns {FromMarkdownExtension}
* Extension for `mdast-util-to-markdown` to enable GFM autolink literals.
*/
export function gfmAutolinkLiteralFromMarkdown() {
return {
transforms: [transformGfmAutolinkLiterals],
enter: {
literalAutolink: enterLiteralAutolink,
literalAutolinkEmail: enterLiteralAutolinkValue,
literalAutolinkHttp: enterLiteralAutolinkValue,
literalAutolinkWww: enterLiteralAutolinkValue
},
exit: {
literalAutolink: exitLiteralAutolink,
literalAutolinkEmail: exitLiteralAutolinkEmail,
literalAutolinkHttp: exitLiteralAutolinkHttp,
literalAutolinkWww: exitLiteralAutolinkWww
}
}
}
/**
* Create an extension for `mdast-util-to-markdown` to enable GFM autolink
* literals in markdown.
*
* @returns {ToMarkdownExtension}
* Extension for `mdast-util-to-markdown` to enable GFM autolink literals.
*/
export function gfmAutolinkLiteralToMarkdown() {
return {
unsafe: [
{
character: '@',
before: '[+\\-.\\w]',
after: '[\\-.\\w]',
inConstruct,
notInConstruct
},
{
character: '.',
before: '[Ww]',
after: '[\\-.\\w]',
inConstruct,
notInConstruct
},
{
character: ':',
before: '[ps]',
after: '\\/',
inConstruct,
notInConstruct
}
]
}
}
/**
* @this {CompileContext}
* @type {FromMarkdownHandle}
*/
function enterLiteralAutolink(token) {
this.enter({type: 'link', title: null, url: '', children: []}, token)
}
/**
* @this {CompileContext}
* @type {FromMarkdownHandle}
*/
function enterLiteralAutolinkValue(token) {
this.config.enter.autolinkProtocol.call(this, token)
}
/**
* @this {CompileContext}
* @type {FromMarkdownHandle}
*/
function exitLiteralAutolinkHttp(token) {
this.config.exit.autolinkProtocol.call(this, token)
}
/**
* @this {CompileContext}
* @type {FromMarkdownHandle}
*/
function exitLiteralAutolinkWww(token) {
this.config.exit.data.call(this, token)
const node = this.stack[this.stack.length - 1]
assert(node.type === 'link')
node.url = 'http://' + this.sliceSerialize(token)
}
/**
* @this {CompileContext}
* @type {FromMarkdownHandle}
*/
function exitLiteralAutolinkEmail(token) {
this.config.exit.autolinkEmail.call(this, token)
}
/**
* @this {CompileContext}
* @type {FromMarkdownHandle}
*/
function exitLiteralAutolink(token) {
this.exit(token)
}
/** @type {FromMarkdownTransform} */
function transformGfmAutolinkLiterals(tree) {
findAndReplace(
tree,
[
[/(https?:\/\/|www(?=\.))([-.\w]+)([^ \t\r\n]*)/gi, findUrl],
[/(?<=^|\s|\p{P}|\p{S})([-.\w+]+)@([-\w]+(?:\.[-\w]+)+)/gu, findEmail]
],
{ignore: ['link', 'linkReference']}
)
}
/**
* @type {ReplaceFunction}
* @param {string} _
* @param {string} protocol
* @param {string} domain
* @param {string} path
* @param {RegExpMatchObject} match
* @returns {Array<PhrasingContent> | Link | false}
*/
// eslint-disable-next-line max-params
function findUrl(_, protocol, domain, path, match) {
let prefix = ''
// Not an expected previous character.
if (!previous(match)) {
return false
}
// Treat `www` as part of the domain.
if (/^w/i.test(protocol)) {
domain = protocol + domain
protocol = ''
prefix = 'http://'
}
if (!isCorrectDomain(domain)) {
return false
}
const parts = splitUrl(domain + path)
if (!parts[0]) return false
/** @type {Link} */
const result = {
type: 'link',
title: null,
url: prefix + protocol + parts[0],
children: [{type: 'text', value: protocol + parts[0]}]
}
if (parts[1]) {
return [result, {type: 'text', value: parts[1]}]
}
return result
}
/**
* @type {ReplaceFunction}
* @param {string} _
* @param {string} atext
* @param {string} label
* @param {RegExpMatchObject} match
* @returns {Link | false}
*/
function findEmail(_, atext, label, match) {
if (
// Not an expected previous character.
!previous(match, true) ||
// Label ends in not allowed character.
/[-\d_]$/.test(label)
) {
return false
}
return {
type: 'link',
title: null,
url: 'mailto:' + atext + '@' + label,
children: [{type: 'text', value: atext + '@' + label}]
}
}
/**
* @param {string} domain
* @returns {boolean}
*/
function isCorrectDomain(domain) {
const parts = domain.split('.')
if (
parts.length < 2 ||
(parts[parts.length - 1] &&
(/_/.test(parts[parts.length - 1]) ||
!/[a-zA-Z\d]/.test(parts[parts.length - 1]))) ||
(parts[parts.length - 2] &&
(/_/.test(parts[parts.length - 2]) ||
!/[a-zA-Z\d]/.test(parts[parts.length - 2])))
) {
return false
}
return true
}
/**
* @param {string} url
* @returns {[string, string | undefined]}
*/
function splitUrl(url) {
const trailExec = /[!"&'),.:;<>?\]}]+$/.exec(url)
if (!trailExec) {
return [url, undefined]
}
url = url.slice(0, trailExec.index)
let trail = trailExec[0]
let closingParenIndex = trail.indexOf(')')
const openingParens = ccount(url, '(')
let closingParens = ccount(url, ')')
while (closingParenIndex !== -1 && openingParens > closingParens) {
url += trail.slice(0, closingParenIndex + 1)
trail = trail.slice(closingParenIndex + 1)
closingParenIndex = trail.indexOf(')')
closingParens++
}
return [url, trail]
}
/**
* @param {RegExpMatchObject} match
* @param {boolean | null | undefined} [email=false]
* @returns {boolean}
*/
function previous(match, email) {
const code = match.input.charCodeAt(match.index - 1)
return (
(match.index === 0 ||
unicodeWhitespace(code) ||
unicodePunctuation(code)) &&
// If its an email, the previous character should not be a slash.
(!email || code !== 47)
)
}