knowledgebase_law/node_modules/stringify-entities/lib/core.js
2025-04-11 11:47:09 -04:00

118 lines
3.0 KiB
JavaScript

/**
* @typedef CoreOptions
* @property {ReadonlyArray<string>} [subset=[]]
* Whether to only escape the given subset of characters.
* @property {boolean} [escapeOnly=false]
* Whether to only escape possibly dangerous characters.
* Those characters are `"`, `&`, `'`, `<`, `>`, and `` ` ``.
*
* @typedef FormatOptions
* @property {(code: number, next: number, options: CoreWithFormatOptions) => string} format
* Format strategy.
*
* @typedef {CoreOptions & FormatOptions & import('./util/format-smart.js').FormatSmartOptions} CoreWithFormatOptions
*/
const defaultSubsetRegex = /["&'<>`]/g
const surrogatePairsRegex = /[\uD800-\uDBFF][\uDC00-\uDFFF]/g
const controlCharactersRegex =
// eslint-disable-next-line no-control-regex, unicorn/no-hex-escape
/[\x01-\t\v\f\x0E-\x1F\x7F\x81\x8D\x8F\x90\x9D\xA0-\uFFFF]/g
const regexEscapeRegex = /[|\\{}()[\]^$+*?.]/g
/** @type {WeakMap<ReadonlyArray<string>, RegExp>} */
const subsetToRegexCache = new WeakMap()
/**
* Encode certain characters in `value`.
*
* @param {string} value
* @param {CoreWithFormatOptions} options
* @returns {string}
*/
export function core(value, options) {
value = value.replace(
options.subset
? charactersToExpressionCached(options.subset)
: defaultSubsetRegex,
basic
)
if (options.subset || options.escapeOnly) {
return value
}
return (
value
// Surrogate pairs.
.replace(surrogatePairsRegex, surrogate)
// BMP control characters (C0 except for LF, CR, SP; DEL; and some more
// non-ASCII ones).
.replace(controlCharactersRegex, basic)
)
/**
* @param {string} pair
* @param {number} index
* @param {string} all
*/
function surrogate(pair, index, all) {
return options.format(
(pair.charCodeAt(0) - 0xd800) * 0x400 +
pair.charCodeAt(1) -
0xdc00 +
0x10000,
all.charCodeAt(index + 2),
options
)
}
/**
* @param {string} character
* @param {number} index
* @param {string} all
*/
function basic(character, index, all) {
return options.format(
character.charCodeAt(0),
all.charCodeAt(index + 1),
options
)
}
}
/**
* A wrapper function that caches the result of `charactersToExpression` with a WeakMap.
* This can improve performance when tooling calls `charactersToExpression` repeatedly
* with the same subset.
*
* @param {ReadonlyArray<string>} subset
* @returns {RegExp}
*/
function charactersToExpressionCached(subset) {
let cached = subsetToRegexCache.get(subset)
if (!cached) {
cached = charactersToExpression(subset)
subsetToRegexCache.set(subset, cached)
}
return cached
}
/**
* @param {ReadonlyArray<string>} subset
* @returns {RegExp}
*/
function charactersToExpression(subset) {
/** @type {Array<string>} */
const groups = []
let index = -1
while (++index < subset.length) {
groups.push(subset[index].replace(regexEscapeRegex, '\\$&'))
}
return new RegExp('(?:' + groups.join('|') + ')', 'g')
}