Initial
This commit is contained in:
918
resources/app/node_modules/sanitize-html/index.js
generated
vendored
Normal file
918
resources/app/node_modules/sanitize-html/index.js
generated
vendored
Normal file
@@ -0,0 +1,918 @@
|
||||
const htmlparser = require('htmlparser2');
|
||||
const escapeStringRegexp = require('escape-string-regexp');
|
||||
const { isPlainObject } = require('is-plain-object');
|
||||
const deepmerge = require('deepmerge');
|
||||
const parseSrcset = require('parse-srcset');
|
||||
const { parse: postcssParse } = require('postcss');
|
||||
// Tags that can conceivably represent stand-alone media.
|
||||
const mediaTags = [
|
||||
'img', 'audio', 'video', 'picture', 'svg',
|
||||
'object', 'map', 'iframe', 'embed'
|
||||
];
|
||||
// Tags that are inherently vulnerable to being used in XSS attacks.
|
||||
const vulnerableTags = [ 'script', 'style' ];
|
||||
|
||||
function each(obj, cb) {
|
||||
if (obj) {
|
||||
Object.keys(obj).forEach(function (key) {
|
||||
cb(obj[key], key);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Avoid false positives with .__proto__, .hasOwnProperty, etc.
|
||||
function has(obj, key) {
|
||||
return ({}).hasOwnProperty.call(obj, key);
|
||||
}
|
||||
|
||||
// Returns those elements of `a` for which `cb(a)` returns truthy
|
||||
function filter(a, cb) {
|
||||
const n = [];
|
||||
each(a, function(v) {
|
||||
if (cb(v)) {
|
||||
n.push(v);
|
||||
}
|
||||
});
|
||||
return n;
|
||||
}
|
||||
|
||||
function isEmptyObject(obj) {
|
||||
for (const key in obj) {
|
||||
if (has(obj, key)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
function stringifySrcset(parsedSrcset) {
|
||||
return parsedSrcset.map(function(part) {
|
||||
if (!part.url) {
|
||||
throw new Error('URL missing');
|
||||
}
|
||||
|
||||
return (
|
||||
part.url +
|
||||
(part.w ? ` ${part.w}w` : '') +
|
||||
(part.h ? ` ${part.h}h` : '') +
|
||||
(part.d ? ` ${part.d}x` : '')
|
||||
);
|
||||
}).join(', ');
|
||||
}
|
||||
|
||||
module.exports = sanitizeHtml;
|
||||
|
||||
// A valid attribute name.
|
||||
// We use a tolerant definition based on the set of strings defined by
|
||||
// html.spec.whatwg.org/multipage/parsing.html#before-attribute-name-state
|
||||
// and html.spec.whatwg.org/multipage/parsing.html#attribute-name-state .
|
||||
// The characters accepted are ones which can be appended to the attribute
|
||||
// name buffer without triggering a parse error:
|
||||
// * unexpected-equals-sign-before-attribute-name
|
||||
// * unexpected-null-character
|
||||
// * unexpected-character-in-attribute-name
|
||||
// We exclude the empty string because it's impossible to get to the after
|
||||
// attribute name state with an empty attribute name buffer.
|
||||
const VALID_HTML_ATTRIBUTE_NAME = /^[^\0\t\n\f\r /<=>]+$/;
|
||||
|
||||
// Ignore the _recursing flag; it's there for recursive
|
||||
// invocation as a guard against this exploit:
|
||||
// https://github.com/fb55/htmlparser2/issues/105
|
||||
|
||||
function sanitizeHtml(html, options, _recursing) {
|
||||
if (html == null) {
|
||||
return '';
|
||||
}
|
||||
if (typeof html === 'number') {
|
||||
html = html.toString();
|
||||
}
|
||||
|
||||
let result = '';
|
||||
// Used for hot swapping the result variable with an empty string in order to "capture" the text written to it.
|
||||
let tempResult = '';
|
||||
|
||||
function Frame(tag, attribs) {
|
||||
const that = this;
|
||||
this.tag = tag;
|
||||
this.attribs = attribs || {};
|
||||
this.tagPosition = result.length;
|
||||
this.text = ''; // Node inner text
|
||||
this.mediaChildren = [];
|
||||
|
||||
this.updateParentNodeText = function() {
|
||||
if (stack.length) {
|
||||
const parentFrame = stack[stack.length - 1];
|
||||
parentFrame.text += that.text;
|
||||
}
|
||||
};
|
||||
|
||||
this.updateParentNodeMediaChildren = function() {
|
||||
if (stack.length && mediaTags.includes(this.tag)) {
|
||||
const parentFrame = stack[stack.length - 1];
|
||||
parentFrame.mediaChildren.push(this.tag);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
options = Object.assign({}, sanitizeHtml.defaults, options);
|
||||
options.parser = Object.assign({}, htmlParserDefaults, options.parser);
|
||||
|
||||
const tagAllowed = function (name) {
|
||||
return options.allowedTags === false || (options.allowedTags || []).indexOf(name) > -1;
|
||||
};
|
||||
|
||||
// vulnerableTags
|
||||
vulnerableTags.forEach(function (tag) {
|
||||
if (tagAllowed(tag) && !options.allowVulnerableTags) {
|
||||
console.warn(`\n\n⚠️ Your \`allowedTags\` option includes, \`${tag}\`, which is inherently\nvulnerable to XSS attacks. Please remove it from \`allowedTags\`.\nOr, to disable this warning, add the \`allowVulnerableTags\` option\nand ensure you are accounting for this risk.\n\n`);
|
||||
}
|
||||
});
|
||||
|
||||
// Tags that contain something other than HTML, or where discarding
|
||||
// the text when the tag is disallowed makes sense for other reasons.
|
||||
// If we are not allowing these tags, we should drop their content too.
|
||||
// For other tags you would drop the tag but keep its content.
|
||||
const nonTextTagsArray = options.nonTextTags || [
|
||||
'script',
|
||||
'style',
|
||||
'textarea',
|
||||
'option'
|
||||
];
|
||||
let allowedAttributesMap;
|
||||
let allowedAttributesGlobMap;
|
||||
if (options.allowedAttributes) {
|
||||
allowedAttributesMap = {};
|
||||
allowedAttributesGlobMap = {};
|
||||
each(options.allowedAttributes, function(attributes, tag) {
|
||||
allowedAttributesMap[tag] = [];
|
||||
const globRegex = [];
|
||||
attributes.forEach(function(obj) {
|
||||
if (typeof obj === 'string' && obj.indexOf('*') >= 0) {
|
||||
globRegex.push(escapeStringRegexp(obj).replace(/\\\*/g, '.*'));
|
||||
} else {
|
||||
allowedAttributesMap[tag].push(obj);
|
||||
}
|
||||
});
|
||||
if (globRegex.length) {
|
||||
allowedAttributesGlobMap[tag] = new RegExp('^(' + globRegex.join('|') + ')$');
|
||||
}
|
||||
});
|
||||
}
|
||||
const allowedClassesMap = {};
|
||||
const allowedClassesGlobMap = {};
|
||||
const allowedClassesRegexMap = {};
|
||||
each(options.allowedClasses, function(classes, tag) {
|
||||
// Implicitly allows the class attribute
|
||||
if (allowedAttributesMap) {
|
||||
if (!has(allowedAttributesMap, tag)) {
|
||||
allowedAttributesMap[tag] = [];
|
||||
}
|
||||
allowedAttributesMap[tag].push('class');
|
||||
}
|
||||
|
||||
allowedClassesMap[tag] = classes;
|
||||
|
||||
if (Array.isArray(classes)) {
|
||||
const globRegex = [];
|
||||
allowedClassesMap[tag] = [];
|
||||
allowedClassesRegexMap[tag] = [];
|
||||
classes.forEach(function(obj) {
|
||||
if (typeof obj === 'string' && obj.indexOf('*') >= 0) {
|
||||
globRegex.push(escapeStringRegexp(obj).replace(/\\\*/g, '.*'));
|
||||
} else if (obj instanceof RegExp) {
|
||||
allowedClassesRegexMap[tag].push(obj);
|
||||
} else {
|
||||
allowedClassesMap[tag].push(obj);
|
||||
}
|
||||
});
|
||||
if (globRegex.length) {
|
||||
allowedClassesGlobMap[tag] = new RegExp('^(' + globRegex.join('|') + ')$');
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
const transformTagsMap = {};
|
||||
let transformTagsAll;
|
||||
each(options.transformTags, function(transform, tag) {
|
||||
let transFun;
|
||||
if (typeof transform === 'function') {
|
||||
transFun = transform;
|
||||
} else if (typeof transform === 'string') {
|
||||
transFun = sanitizeHtml.simpleTransform(transform);
|
||||
}
|
||||
if (tag === '*') {
|
||||
transformTagsAll = transFun;
|
||||
} else {
|
||||
transformTagsMap[tag] = transFun;
|
||||
}
|
||||
});
|
||||
|
||||
let depth;
|
||||
let stack;
|
||||
let skipMap;
|
||||
let transformMap;
|
||||
let skipText;
|
||||
let skipTextDepth;
|
||||
let addedText = false;
|
||||
|
||||
initializeState();
|
||||
|
||||
const parser = new htmlparser.Parser({
|
||||
onopentag: function(name, attribs) {
|
||||
// If `enforceHtmlBoundary` is `true` and this has found the opening
|
||||
// `html` tag, reset the state.
|
||||
if (options.enforceHtmlBoundary && name === 'html') {
|
||||
initializeState();
|
||||
}
|
||||
|
||||
if (skipText) {
|
||||
skipTextDepth++;
|
||||
return;
|
||||
}
|
||||
const frame = new Frame(name, attribs);
|
||||
stack.push(frame);
|
||||
|
||||
let skip = false;
|
||||
const hasText = !!frame.text;
|
||||
let transformedTag;
|
||||
if (has(transformTagsMap, name)) {
|
||||
transformedTag = transformTagsMap[name](name, attribs);
|
||||
|
||||
frame.attribs = attribs = transformedTag.attribs;
|
||||
|
||||
if (transformedTag.text !== undefined) {
|
||||
frame.innerText = transformedTag.text;
|
||||
}
|
||||
|
||||
if (name !== transformedTag.tagName) {
|
||||
frame.name = name = transformedTag.tagName;
|
||||
transformMap[depth] = transformedTag.tagName;
|
||||
}
|
||||
}
|
||||
if (transformTagsAll) {
|
||||
transformedTag = transformTagsAll(name, attribs);
|
||||
|
||||
frame.attribs = attribs = transformedTag.attribs;
|
||||
if (name !== transformedTag.tagName) {
|
||||
frame.name = name = transformedTag.tagName;
|
||||
transformMap[depth] = transformedTag.tagName;
|
||||
}
|
||||
}
|
||||
|
||||
if (!tagAllowed(name) || (options.disallowedTagsMode === 'recursiveEscape' && !isEmptyObject(skipMap)) || (options.nestingLimit != null && depth >= options.nestingLimit)) {
|
||||
skip = true;
|
||||
skipMap[depth] = true;
|
||||
if (options.disallowedTagsMode === 'discard' || options.disallowedTagsMode === 'completelyDiscard') {
|
||||
if (nonTextTagsArray.indexOf(name) !== -1) {
|
||||
skipText = true;
|
||||
skipTextDepth = 1;
|
||||
}
|
||||
}
|
||||
skipMap[depth] = true;
|
||||
}
|
||||
depth++;
|
||||
if (skip) {
|
||||
if (options.disallowedTagsMode === 'discard' || options.disallowedTagsMode === 'completelyDiscard') {
|
||||
// We want the contents but not this tag
|
||||
return;
|
||||
}
|
||||
tempResult = result;
|
||||
result = '';
|
||||
}
|
||||
result += '<' + name;
|
||||
|
||||
if (name === 'script') {
|
||||
if (options.allowedScriptHostnames || options.allowedScriptDomains) {
|
||||
frame.innerText = '';
|
||||
}
|
||||
}
|
||||
|
||||
if (!allowedAttributesMap || has(allowedAttributesMap, name) || allowedAttributesMap['*']) {
|
||||
each(attribs, function(value, a) {
|
||||
if (!VALID_HTML_ATTRIBUTE_NAME.test(a)) {
|
||||
// This prevents part of an attribute name in the output from being
|
||||
// interpreted as the end of an attribute, or end of a tag.
|
||||
delete frame.attribs[a];
|
||||
return;
|
||||
}
|
||||
// If the value is empty, check if the attribute is in the allowedEmptyAttributes array.
|
||||
// If it is not in the allowedEmptyAttributes array, and it is a known non-boolean attribute, delete it
|
||||
// List taken from https://html.spec.whatwg.org/multipage/indices.html#attributes-3
|
||||
if (value === '' && (!options.allowedEmptyAttributes.includes(a)) &&
|
||||
(options.nonBooleanAttributes.includes(a) || options.nonBooleanAttributes.includes('*'))) {
|
||||
delete frame.attribs[a];
|
||||
return;
|
||||
}
|
||||
// check allowedAttributesMap for the element and attribute and modify the value
|
||||
// as necessary if there are specific values defined.
|
||||
let passedAllowedAttributesMapCheck = false;
|
||||
if (!allowedAttributesMap ||
|
||||
(has(allowedAttributesMap, name) && allowedAttributesMap[name].indexOf(a) !== -1) ||
|
||||
(allowedAttributesMap['*'] && allowedAttributesMap['*'].indexOf(a) !== -1) ||
|
||||
(has(allowedAttributesGlobMap, name) && allowedAttributesGlobMap[name].test(a)) ||
|
||||
(allowedAttributesGlobMap['*'] && allowedAttributesGlobMap['*'].test(a))) {
|
||||
passedAllowedAttributesMapCheck = true;
|
||||
} else if (allowedAttributesMap && allowedAttributesMap[name]) {
|
||||
for (const o of allowedAttributesMap[name]) {
|
||||
if (isPlainObject(o) && o.name && (o.name === a)) {
|
||||
passedAllowedAttributesMapCheck = true;
|
||||
let newValue = '';
|
||||
if (o.multiple === true) {
|
||||
// verify the values that are allowed
|
||||
const splitStrArray = value.split(' ');
|
||||
for (const s of splitStrArray) {
|
||||
if (o.values.indexOf(s) !== -1) {
|
||||
if (newValue === '') {
|
||||
newValue = s;
|
||||
} else {
|
||||
newValue += ' ' + s;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (o.values.indexOf(value) >= 0) {
|
||||
// verified an allowed value matches the entire attribute value
|
||||
newValue = value;
|
||||
}
|
||||
value = newValue;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (passedAllowedAttributesMapCheck) {
|
||||
if (options.allowedSchemesAppliedToAttributes.indexOf(a) !== -1) {
|
||||
if (naughtyHref(name, value)) {
|
||||
delete frame.attribs[a];
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (name === 'script' && a === 'src') {
|
||||
|
||||
let allowed = true;
|
||||
|
||||
try {
|
||||
const parsed = parseUrl(value);
|
||||
|
||||
if (options.allowedScriptHostnames || options.allowedScriptDomains) {
|
||||
const allowedHostname = (options.allowedScriptHostnames || []).find(function (hostname) {
|
||||
return hostname === parsed.url.hostname;
|
||||
});
|
||||
const allowedDomain = (options.allowedScriptDomains || []).find(function(domain) {
|
||||
return parsed.url.hostname === domain || parsed.url.hostname.endsWith(`.${domain}`);
|
||||
});
|
||||
allowed = allowedHostname || allowedDomain;
|
||||
}
|
||||
} catch (e) {
|
||||
allowed = false;
|
||||
}
|
||||
|
||||
if (!allowed) {
|
||||
delete frame.attribs[a];
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (name === 'iframe' && a === 'src') {
|
||||
let allowed = true;
|
||||
try {
|
||||
const parsed = parseUrl(value);
|
||||
|
||||
if (parsed.isRelativeUrl) {
|
||||
// default value of allowIframeRelativeUrls is true
|
||||
// unless allowedIframeHostnames or allowedIframeDomains specified
|
||||
allowed = has(options, 'allowIframeRelativeUrls')
|
||||
? options.allowIframeRelativeUrls
|
||||
: (!options.allowedIframeHostnames && !options.allowedIframeDomains);
|
||||
} else if (options.allowedIframeHostnames || options.allowedIframeDomains) {
|
||||
const allowedHostname = (options.allowedIframeHostnames || []).find(function (hostname) {
|
||||
return hostname === parsed.url.hostname;
|
||||
});
|
||||
const allowedDomain = (options.allowedIframeDomains || []).find(function(domain) {
|
||||
return parsed.url.hostname === domain || parsed.url.hostname.endsWith(`.${domain}`);
|
||||
});
|
||||
allowed = allowedHostname || allowedDomain;
|
||||
}
|
||||
} catch (e) {
|
||||
// Unparseable iframe src
|
||||
allowed = false;
|
||||
}
|
||||
if (!allowed) {
|
||||
delete frame.attribs[a];
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (a === 'srcset') {
|
||||
try {
|
||||
let parsed = parseSrcset(value);
|
||||
parsed.forEach(function(value) {
|
||||
if (naughtyHref('srcset', value.url)) {
|
||||
value.evil = true;
|
||||
}
|
||||
});
|
||||
parsed = filter(parsed, function(v) {
|
||||
return !v.evil;
|
||||
});
|
||||
if (!parsed.length) {
|
||||
delete frame.attribs[a];
|
||||
return;
|
||||
} else {
|
||||
value = stringifySrcset(filter(parsed, function(v) {
|
||||
return !v.evil;
|
||||
}));
|
||||
frame.attribs[a] = value;
|
||||
}
|
||||
} catch (e) {
|
||||
// Unparseable srcset
|
||||
delete frame.attribs[a];
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (a === 'class') {
|
||||
const allowedSpecificClasses = allowedClassesMap[name];
|
||||
const allowedWildcardClasses = allowedClassesMap['*'];
|
||||
const allowedSpecificClassesGlob = allowedClassesGlobMap[name];
|
||||
const allowedSpecificClassesRegex = allowedClassesRegexMap[name];
|
||||
const allowedWildcardClassesGlob = allowedClassesGlobMap['*'];
|
||||
const allowedClassesGlobs = [
|
||||
allowedSpecificClassesGlob,
|
||||
allowedWildcardClassesGlob
|
||||
]
|
||||
.concat(allowedSpecificClassesRegex)
|
||||
.filter(function (t) {
|
||||
return t;
|
||||
});
|
||||
if (allowedSpecificClasses && allowedWildcardClasses) {
|
||||
value = filterClasses(value, deepmerge(allowedSpecificClasses, allowedWildcardClasses), allowedClassesGlobs);
|
||||
} else {
|
||||
value = filterClasses(value, allowedSpecificClasses || allowedWildcardClasses, allowedClassesGlobs);
|
||||
}
|
||||
if (!value.length) {
|
||||
delete frame.attribs[a];
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (a === 'style') {
|
||||
if (options.parseStyleAttributes) {
|
||||
try {
|
||||
const abstractSyntaxTree = postcssParse(name + ' {' + value + '}', { map: false });
|
||||
const filteredAST = filterCss(abstractSyntaxTree, options.allowedStyles);
|
||||
|
||||
value = stringifyStyleAttributes(filteredAST);
|
||||
|
||||
if (value.length === 0) {
|
||||
delete frame.attribs[a];
|
||||
return;
|
||||
}
|
||||
} catch (e) {
|
||||
if (typeof window !== 'undefined') {
|
||||
console.warn('Failed to parse "' + name + ' {' + value + '}' + '", If you\'re running this in a browser, we recommend to disable style parsing: options.parseStyleAttributes: false, since this only works in a node environment due to a postcss dependency, More info: https://github.com/apostrophecms/sanitize-html/issues/547');
|
||||
}
|
||||
delete frame.attribs[a];
|
||||
return;
|
||||
}
|
||||
} else if (options.allowedStyles) {
|
||||
throw new Error('allowedStyles option cannot be used together with parseStyleAttributes: false.');
|
||||
}
|
||||
}
|
||||
result += ' ' + a;
|
||||
if (value && value.length) {
|
||||
result += '="' + escapeHtml(value, true) + '"';
|
||||
} else if (options.allowedEmptyAttributes.includes(a)) {
|
||||
result += '=""';
|
||||
}
|
||||
} else {
|
||||
delete frame.attribs[a];
|
||||
}
|
||||
});
|
||||
}
|
||||
if (options.selfClosing.indexOf(name) !== -1) {
|
||||
result += ' />';
|
||||
} else {
|
||||
result += '>';
|
||||
if (frame.innerText && !hasText && !options.textFilter) {
|
||||
result += escapeHtml(frame.innerText);
|
||||
addedText = true;
|
||||
}
|
||||
}
|
||||
if (skip) {
|
||||
result = tempResult + escapeHtml(result);
|
||||
tempResult = '';
|
||||
}
|
||||
},
|
||||
ontext: function(text) {
|
||||
if (skipText) {
|
||||
return;
|
||||
}
|
||||
const lastFrame = stack[stack.length - 1];
|
||||
let tag;
|
||||
|
||||
if (lastFrame) {
|
||||
tag = lastFrame.tag;
|
||||
// If inner text was set by transform function then let's use it
|
||||
text = lastFrame.innerText !== undefined ? lastFrame.innerText : text;
|
||||
}
|
||||
|
||||
if (options.disallowedTagsMode === 'completelyDiscard' && !tagAllowed(tag)) {
|
||||
text = '';
|
||||
} else if ((options.disallowedTagsMode === 'discard' || options.disallowedTagsMode === 'completelyDiscard') && ((tag === 'script') || (tag === 'style'))) {
|
||||
// htmlparser2 gives us these as-is. Escaping them ruins the content. Allowing
|
||||
// script tags is, by definition, game over for XSS protection, so if that's
|
||||
// your concern, don't allow them. The same is essentially true for style tags
|
||||
// which have their own collection of XSS vectors.
|
||||
result += text;
|
||||
} else {
|
||||
const escaped = escapeHtml(text, false);
|
||||
if (options.textFilter && !addedText) {
|
||||
result += options.textFilter(escaped, tag);
|
||||
} else if (!addedText) {
|
||||
result += escaped;
|
||||
}
|
||||
}
|
||||
if (stack.length) {
|
||||
const frame = stack[stack.length - 1];
|
||||
frame.text += text;
|
||||
}
|
||||
},
|
||||
onclosetag: function(name, isImplied) {
|
||||
|
||||
if (skipText) {
|
||||
skipTextDepth--;
|
||||
if (!skipTextDepth) {
|
||||
skipText = false;
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
const frame = stack.pop();
|
||||
if (!frame) {
|
||||
// Do not crash on bad markup
|
||||
return;
|
||||
}
|
||||
|
||||
if (frame.tag !== name) {
|
||||
// Another case of bad markup.
|
||||
// Push to stack, so that it will be used in future closing tags.
|
||||
stack.push(frame);
|
||||
return;
|
||||
}
|
||||
|
||||
skipText = options.enforceHtmlBoundary ? name === 'html' : false;
|
||||
depth--;
|
||||
const skip = skipMap[depth];
|
||||
if (skip) {
|
||||
delete skipMap[depth];
|
||||
if (options.disallowedTagsMode === 'discard' || options.disallowedTagsMode === 'completelyDiscard') {
|
||||
frame.updateParentNodeText();
|
||||
return;
|
||||
}
|
||||
tempResult = result;
|
||||
result = '';
|
||||
}
|
||||
|
||||
if (transformMap[depth]) {
|
||||
name = transformMap[depth];
|
||||
delete transformMap[depth];
|
||||
}
|
||||
|
||||
if (options.exclusiveFilter && options.exclusiveFilter(frame)) {
|
||||
result = result.substr(0, frame.tagPosition);
|
||||
return;
|
||||
}
|
||||
|
||||
frame.updateParentNodeMediaChildren();
|
||||
frame.updateParentNodeText();
|
||||
|
||||
if (
|
||||
// Already output />
|
||||
options.selfClosing.indexOf(name) !== -1 ||
|
||||
// Escaped tag, closing tag is implied
|
||||
(isImplied && !tagAllowed(name) && [ 'escape', 'recursiveEscape' ].indexOf(options.disallowedTagsMode) >= 0)
|
||||
) {
|
||||
if (skip) {
|
||||
result = tempResult;
|
||||
tempResult = '';
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
result += '</' + name + '>';
|
||||
if (skip) {
|
||||
result = tempResult + escapeHtml(result);
|
||||
tempResult = '';
|
||||
}
|
||||
addedText = false;
|
||||
}
|
||||
}, options.parser);
|
||||
parser.write(html);
|
||||
parser.end();
|
||||
|
||||
return result;
|
||||
|
||||
function initializeState() {
|
||||
result = '';
|
||||
depth = 0;
|
||||
stack = [];
|
||||
skipMap = {};
|
||||
transformMap = {};
|
||||
skipText = false;
|
||||
skipTextDepth = 0;
|
||||
}
|
||||
|
||||
function escapeHtml(s, quote) {
|
||||
if (typeof (s) !== 'string') {
|
||||
s = s + '';
|
||||
}
|
||||
if (options.parser.decodeEntities) {
|
||||
s = s.replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>');
|
||||
if (quote) {
|
||||
s = s.replace(/"/g, '"');
|
||||
}
|
||||
}
|
||||
// TODO: this is inadequate because it will pass `&0;`. This approach
|
||||
// will not work, each & must be considered with regard to whether it
|
||||
// is followed by a 100% syntactically valid entity or not, and escaped
|
||||
// if it is not. If this bothers you, don't set parser.decodeEntities
|
||||
// to false. (The default is true.)
|
||||
s = s.replace(/&(?![a-zA-Z0-9#]{1,20};)/g, '&') // Match ampersands not part of existing HTML entity
|
||||
.replace(/</g, '<')
|
||||
.replace(/>/g, '>');
|
||||
if (quote) {
|
||||
s = s.replace(/"/g, '"');
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
function naughtyHref(name, href) {
|
||||
// Browsers ignore character codes of 32 (space) and below in a surprising
|
||||
// number of situations. Start reading here:
|
||||
// https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet#Embedded_tab
|
||||
// eslint-disable-next-line no-control-regex
|
||||
href = href.replace(/[\x00-\x20]+/g, '');
|
||||
// Clobber any comments in URLs, which the browser might
|
||||
// interpret inside an XML data island, allowing
|
||||
// a javascript: URL to be snuck through
|
||||
while (true) {
|
||||
const firstIndex = href.indexOf('<!--');
|
||||
if (firstIndex === -1) {
|
||||
break;
|
||||
}
|
||||
const lastIndex = href.indexOf('-->', firstIndex + 4);
|
||||
if (lastIndex === -1) {
|
||||
break;
|
||||
}
|
||||
href = href.substring(0, firstIndex) + href.substring(lastIndex + 3);
|
||||
}
|
||||
// Case insensitive so we don't get faked out by JAVASCRIPT #1
|
||||
// Allow more characters after the first so we don't get faked
|
||||
// out by certain schemes browsers accept
|
||||
const matches = href.match(/^([a-zA-Z][a-zA-Z0-9.\-+]*):/);
|
||||
if (!matches) {
|
||||
// Protocol-relative URL starting with any combination of '/' and '\'
|
||||
if (href.match(/^[/\\]{2}/)) {
|
||||
return !options.allowProtocolRelative;
|
||||
}
|
||||
|
||||
// No scheme
|
||||
return false;
|
||||
}
|
||||
const scheme = matches[1].toLowerCase();
|
||||
|
||||
if (has(options.allowedSchemesByTag, name)) {
|
||||
return options.allowedSchemesByTag[name].indexOf(scheme) === -1;
|
||||
}
|
||||
|
||||
return !options.allowedSchemes || options.allowedSchemes.indexOf(scheme) === -1;
|
||||
}
|
||||
|
||||
function parseUrl(value) {
|
||||
value = value.replace(/^(\w+:)?\s*[\\/]\s*[\\/]/, '$1//');
|
||||
if (value.startsWith('relative:')) {
|
||||
// An attempt to exploit our workaround for base URLs being
|
||||
// mandatory for relative URL validation in the WHATWG
|
||||
// URL parser, reject it
|
||||
throw new Error('relative: exploit attempt');
|
||||
}
|
||||
// naughtyHref is in charge of whether protocol relative URLs
|
||||
// are cool. Here we are concerned just with allowed hostnames and
|
||||
// whether to allow relative URLs.
|
||||
//
|
||||
// Build a placeholder "base URL" against which any reasonable
|
||||
// relative URL may be parsed successfully
|
||||
let base = 'relative://relative-site';
|
||||
for (let i = 0; (i < 100); i++) {
|
||||
base += `/${i}`;
|
||||
}
|
||||
|
||||
const parsed = new URL(value, base);
|
||||
|
||||
const isRelativeUrl = parsed && parsed.hostname === 'relative-site' && parsed.protocol === 'relative:';
|
||||
return {
|
||||
isRelativeUrl,
|
||||
url: parsed
|
||||
};
|
||||
}
|
||||
/**
|
||||
* Filters user input css properties by allowlisted regex attributes.
|
||||
* Modifies the abstractSyntaxTree object.
|
||||
*
|
||||
* @param {object} abstractSyntaxTree - Object representation of CSS attributes.
|
||||
* @property {array[Declaration]} abstractSyntaxTree.nodes[0] - Each object cointains prop and value key, i.e { prop: 'color', value: 'red' }.
|
||||
* @param {object} allowedStyles - Keys are properties (i.e color), value is list of permitted regex rules (i.e /green/i).
|
||||
* @return {object} - The modified tree.
|
||||
*/
|
||||
function filterCss(abstractSyntaxTree, allowedStyles) {
|
||||
if (!allowedStyles) {
|
||||
return abstractSyntaxTree;
|
||||
}
|
||||
|
||||
const astRules = abstractSyntaxTree.nodes[0];
|
||||
let selectedRule;
|
||||
|
||||
// Merge global and tag-specific styles into new AST.
|
||||
if (allowedStyles[astRules.selector] && allowedStyles['*']) {
|
||||
selectedRule = deepmerge(
|
||||
allowedStyles[astRules.selector],
|
||||
allowedStyles['*']
|
||||
);
|
||||
} else {
|
||||
selectedRule = allowedStyles[astRules.selector] || allowedStyles['*'];
|
||||
}
|
||||
|
||||
if (selectedRule) {
|
||||
abstractSyntaxTree.nodes[0].nodes = astRules.nodes.reduce(filterDeclarations(selectedRule), []);
|
||||
}
|
||||
|
||||
return abstractSyntaxTree;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts the style attributes from an AbstractSyntaxTree and formats those
|
||||
* values in the inline style attribute format.
|
||||
*
|
||||
* @param {AbstractSyntaxTree} filteredAST
|
||||
* @return {string} - Example: "color:yellow;text-align:center !important;font-family:helvetica;"
|
||||
*/
|
||||
function stringifyStyleAttributes(filteredAST) {
|
||||
return filteredAST.nodes[0].nodes
|
||||
.reduce(function(extractedAttributes, attrObject) {
|
||||
extractedAttributes.push(
|
||||
`${attrObject.prop}:${attrObject.value}${attrObject.important ? ' !important' : ''}`
|
||||
);
|
||||
return extractedAttributes;
|
||||
}, [])
|
||||
.join(';');
|
||||
}
|
||||
|
||||
/**
|
||||
* Filters the existing attributes for the given property. Discards any attributes
|
||||
* which don't match the allowlist.
|
||||
*
|
||||
* @param {object} selectedRule - Example: { color: red, font-family: helvetica }
|
||||
* @param {array} allowedDeclarationsList - List of declarations which pass the allowlist.
|
||||
* @param {object} attributeObject - Object representing the current css property.
|
||||
* @property {string} attributeObject.type - Typically 'declaration'.
|
||||
* @property {string} attributeObject.prop - The CSS property, i.e 'color'.
|
||||
* @property {string} attributeObject.value - The corresponding value to the css property, i.e 'red'.
|
||||
* @return {function} - When used in Array.reduce, will return an array of Declaration objects
|
||||
*/
|
||||
function filterDeclarations(selectedRule) {
|
||||
return function (allowedDeclarationsList, attributeObject) {
|
||||
// If this property is allowlisted...
|
||||
if (has(selectedRule, attributeObject.prop)) {
|
||||
const matchesRegex = selectedRule[attributeObject.prop].some(function(regularExpression) {
|
||||
return regularExpression.test(attributeObject.value);
|
||||
});
|
||||
|
||||
if (matchesRegex) {
|
||||
allowedDeclarationsList.push(attributeObject);
|
||||
}
|
||||
}
|
||||
return allowedDeclarationsList;
|
||||
};
|
||||
}
|
||||
|
||||
function filterClasses(classes, allowed, allowedGlobs) {
|
||||
if (!allowed) {
|
||||
// The class attribute is allowed without filtering on this tag
|
||||
return classes;
|
||||
}
|
||||
classes = classes.split(/\s+/);
|
||||
return classes.filter(function(clss) {
|
||||
return allowed.indexOf(clss) !== -1 || allowedGlobs.some(function(glob) {
|
||||
return glob.test(clss);
|
||||
});
|
||||
}).join(' ');
|
||||
}
|
||||
}
|
||||
|
||||
// Defaults are accessible to you so that you can use them as a starting point
|
||||
// programmatically if you wish
|
||||
|
||||
const htmlParserDefaults = {
|
||||
decodeEntities: true
|
||||
};
|
||||
sanitizeHtml.defaults = {
|
||||
allowedTags: [
|
||||
// Sections derived from MDN element categories and limited to the more
|
||||
// benign categories.
|
||||
// https://developer.mozilla.org/en-US/docs/Web/HTML/Element
|
||||
// Content sectioning
|
||||
'address', 'article', 'aside', 'footer', 'header',
|
||||
'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hgroup',
|
||||
'main', 'nav', 'section',
|
||||
// Text content
|
||||
'blockquote', 'dd', 'div', 'dl', 'dt', 'figcaption', 'figure',
|
||||
'hr', 'li', 'main', 'ol', 'p', 'pre', 'ul',
|
||||
// Inline text semantics
|
||||
'a', 'abbr', 'b', 'bdi', 'bdo', 'br', 'cite', 'code', 'data', 'dfn',
|
||||
'em', 'i', 'kbd', 'mark', 'q',
|
||||
'rb', 'rp', 'rt', 'rtc', 'ruby',
|
||||
's', 'samp', 'small', 'span', 'strong', 'sub', 'sup', 'time', 'u', 'var', 'wbr',
|
||||
// Table content
|
||||
'caption', 'col', 'colgroup', 'table', 'tbody', 'td', 'tfoot', 'th',
|
||||
'thead', 'tr'
|
||||
],
|
||||
// Tags that cannot be boolean
|
||||
nonBooleanAttributes: [
|
||||
'abbr', 'accept', 'accept-charset', 'accesskey', 'action',
|
||||
'allow', 'alt', 'as', 'autocapitalize', 'autocomplete',
|
||||
'blocking', 'charset', 'cite', 'class', 'color', 'cols',
|
||||
'colspan', 'content', 'contenteditable', 'coords', 'crossorigin',
|
||||
'data', 'datetime', 'decoding', 'dir', 'dirname', 'download',
|
||||
'draggable', 'enctype', 'enterkeyhint', 'fetchpriority', 'for',
|
||||
'form', 'formaction', 'formenctype', 'formmethod', 'formtarget',
|
||||
'headers', 'height', 'hidden', 'high', 'href', 'hreflang',
|
||||
'http-equiv', 'id', 'imagesizes', 'imagesrcset', 'inputmode',
|
||||
'integrity', 'is', 'itemid', 'itemprop', 'itemref', 'itemtype',
|
||||
'kind', 'label', 'lang', 'list', 'loading', 'low', 'max',
|
||||
'maxlength', 'media', 'method', 'min', 'minlength', 'name',
|
||||
'nonce', 'optimum', 'pattern', 'ping', 'placeholder', 'popover',
|
||||
'popovertarget', 'popovertargetaction', 'poster', 'preload',
|
||||
'referrerpolicy', 'rel', 'rows', 'rowspan', 'sandbox', 'scope',
|
||||
'shape', 'size', 'sizes', 'slot', 'span', 'spellcheck', 'src',
|
||||
'srcdoc', 'srclang', 'srcset', 'start', 'step', 'style',
|
||||
'tabindex', 'target', 'title', 'translate', 'type', 'usemap',
|
||||
'value', 'width', 'wrap',
|
||||
// Event handlers
|
||||
'onauxclick', 'onafterprint', 'onbeforematch', 'onbeforeprint',
|
||||
'onbeforeunload', 'onbeforetoggle', 'onblur', 'oncancel',
|
||||
'oncanplay', 'oncanplaythrough', 'onchange', 'onclick', 'onclose',
|
||||
'oncontextlost', 'oncontextmenu', 'oncontextrestored', 'oncopy',
|
||||
'oncuechange', 'oncut', 'ondblclick', 'ondrag', 'ondragend',
|
||||
'ondragenter', 'ondragleave', 'ondragover', 'ondragstart',
|
||||
'ondrop', 'ondurationchange', 'onemptied', 'onended',
|
||||
'onerror', 'onfocus', 'onformdata', 'onhashchange', 'oninput',
|
||||
'oninvalid', 'onkeydown', 'onkeypress', 'onkeyup',
|
||||
'onlanguagechange', 'onload', 'onloadeddata', 'onloadedmetadata',
|
||||
'onloadstart', 'onmessage', 'onmessageerror', 'onmousedown',
|
||||
'onmouseenter', 'onmouseleave', 'onmousemove', 'onmouseout',
|
||||
'onmouseover', 'onmouseup', 'onoffline', 'ononline', 'onpagehide',
|
||||
'onpageshow', 'onpaste', 'onpause', 'onplay', 'onplaying',
|
||||
'onpopstate', 'onprogress', 'onratechange', 'onreset', 'onresize',
|
||||
'onrejectionhandled', 'onscroll', 'onscrollend',
|
||||
'onsecuritypolicyviolation', 'onseeked', 'onseeking', 'onselect',
|
||||
'onslotchange', 'onstalled', 'onstorage', 'onsubmit', 'onsuspend',
|
||||
'ontimeupdate', 'ontoggle', 'onunhandledrejection', 'onunload',
|
||||
'onvolumechange', 'onwaiting', 'onwheel'
|
||||
],
|
||||
disallowedTagsMode: 'discard',
|
||||
allowedAttributes: {
|
||||
a: [ 'href', 'name', 'target' ],
|
||||
// We don't currently allow img itself by default, but
|
||||
// these attributes would make sense if we did.
|
||||
img: [ 'src', 'srcset', 'alt', 'title', 'width', 'height', 'loading' ]
|
||||
},
|
||||
allowedEmptyAttributes: [
|
||||
'alt'
|
||||
],
|
||||
// Lots of these won't come up by default because we don't allow them
|
||||
selfClosing: [ 'img', 'br', 'hr', 'area', 'base', 'basefont', 'input', 'link', 'meta' ],
|
||||
// URL schemes we permit
|
||||
allowedSchemes: [ 'http', 'https', 'ftp', 'mailto', 'tel' ],
|
||||
allowedSchemesByTag: {},
|
||||
allowedSchemesAppliedToAttributes: [ 'href', 'src', 'cite' ],
|
||||
allowProtocolRelative: true,
|
||||
enforceHtmlBoundary: false,
|
||||
parseStyleAttributes: true
|
||||
};
|
||||
|
||||
sanitizeHtml.simpleTransform = function(newTagName, newAttribs, merge) {
|
||||
merge = (merge === undefined) ? true : merge;
|
||||
newAttribs = newAttribs || {};
|
||||
|
||||
return function(tagName, attribs) {
|
||||
let attrib;
|
||||
if (merge) {
|
||||
for (attrib in newAttribs) {
|
||||
attribs[attrib] = newAttribs[attrib];
|
||||
}
|
||||
} else {
|
||||
attribs = newAttribs;
|
||||
}
|
||||
|
||||
return {
|
||||
tagName: newTagName,
|
||||
attribs: attribs
|
||||
};
|
||||
};
|
||||
};
|
||||
Reference in New Issue
Block a user