mirror of
https://github.com/Funkoala14/knowledgebase_law.git
synced 2025-06-09 00:28:15 +08:00
118 lines
3.0 KiB
JavaScript
118 lines
3.0 KiB
JavaScript
|
/**
|
||
|
* @typedef CoreOptions
|
||
|
* @property {ReadonlyArray<string>} [subset=[]]
|
||
|
* Whether to only escape the given subset of characters.
|
||
|
* @property {boolean} [escapeOnly=false]
|
||
|
* Whether to only escape possibly dangerous characters.
|
||
|
* Those characters are `"`, `&`, `'`, `<`, `>`, and `` ` ``.
|
||
|
*
|
||
|
* @typedef FormatOptions
|
||
|
* @property {(code: number, next: number, options: CoreWithFormatOptions) => string} format
|
||
|
* Format strategy.
|
||
|
*
|
||
|
* @typedef {CoreOptions & FormatOptions & import('./util/format-smart.js').FormatSmartOptions} CoreWithFormatOptions
|
||
|
*/
|
||
|
|
||
|
const defaultSubsetRegex = /["&'<>`]/g
|
||
|
const surrogatePairsRegex = /[\uD800-\uDBFF][\uDC00-\uDFFF]/g
|
||
|
const controlCharactersRegex =
|
||
|
// eslint-disable-next-line no-control-regex, unicorn/no-hex-escape
|
||
|
/[\x01-\t\v\f\x0E-\x1F\x7F\x81\x8D\x8F\x90\x9D\xA0-\uFFFF]/g
|
||
|
const regexEscapeRegex = /[|\\{}()[\]^$+*?.]/g
|
||
|
|
||
|
/** @type {WeakMap<ReadonlyArray<string>, RegExp>} */
|
||
|
const subsetToRegexCache = new WeakMap()
|
||
|
|
||
|
/**
|
||
|
* Encode certain characters in `value`.
|
||
|
*
|
||
|
* @param {string} value
|
||
|
* @param {CoreWithFormatOptions} options
|
||
|
* @returns {string}
|
||
|
*/
|
||
|
export function core(value, options) {
|
||
|
value = value.replace(
|
||
|
options.subset
|
||
|
? charactersToExpressionCached(options.subset)
|
||
|
: defaultSubsetRegex,
|
||
|
basic
|
||
|
)
|
||
|
|
||
|
if (options.subset || options.escapeOnly) {
|
||
|
return value
|
||
|
}
|
||
|
|
||
|
return (
|
||
|
value
|
||
|
// Surrogate pairs.
|
||
|
.replace(surrogatePairsRegex, surrogate)
|
||
|
// BMP control characters (C0 except for LF, CR, SP; DEL; and some more
|
||
|
// non-ASCII ones).
|
||
|
.replace(controlCharactersRegex, basic)
|
||
|
)
|
||
|
|
||
|
/**
|
||
|
* @param {string} pair
|
||
|
* @param {number} index
|
||
|
* @param {string} all
|
||
|
*/
|
||
|
function surrogate(pair, index, all) {
|
||
|
return options.format(
|
||
|
(pair.charCodeAt(0) - 0xd800) * 0x400 +
|
||
|
pair.charCodeAt(1) -
|
||
|
0xdc00 +
|
||
|
0x10000,
|
||
|
all.charCodeAt(index + 2),
|
||
|
options
|
||
|
)
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* @param {string} character
|
||
|
* @param {number} index
|
||
|
* @param {string} all
|
||
|
*/
|
||
|
function basic(character, index, all) {
|
||
|
return options.format(
|
||
|
character.charCodeAt(0),
|
||
|
all.charCodeAt(index + 1),
|
||
|
options
|
||
|
)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* A wrapper function that caches the result of `charactersToExpression` with a WeakMap.
|
||
|
* This can improve performance when tooling calls `charactersToExpression` repeatedly
|
||
|
* with the same subset.
|
||
|
*
|
||
|
* @param {ReadonlyArray<string>} subset
|
||
|
* @returns {RegExp}
|
||
|
*/
|
||
|
function charactersToExpressionCached(subset) {
|
||
|
let cached = subsetToRegexCache.get(subset)
|
||
|
|
||
|
if (!cached) {
|
||
|
cached = charactersToExpression(subset)
|
||
|
subsetToRegexCache.set(subset, cached)
|
||
|
}
|
||
|
|
||
|
return cached
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* @param {ReadonlyArray<string>} subset
|
||
|
* @returns {RegExp}
|
||
|
*/
|
||
|
function charactersToExpression(subset) {
|
||
|
/** @type {Array<string>} */
|
||
|
const groups = []
|
||
|
let index = -1
|
||
|
|
||
|
while (++index < subset.length) {
|
||
|
groups.push(subset[index].replace(regexEscapeRegex, '\\$&'))
|
||
|
}
|
||
|
|
||
|
return new RegExp('(?:' + groups.join('|') + ')', 'g')
|
||
|
}
|