This commit is contained in:
2025-01-04 00:34:03 +01:00
parent 41829408dc
commit 0ca14bbc19
18111 changed files with 1871397 additions and 0 deletions

18
resources/app/node_modules/htmlparser2/LICENSE generated vendored Normal file
View File

@@ -0,0 +1,18 @@
Copyright 2010, 2011, Chris Winberry <chris@winberry.net>. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
IN THE SOFTWARE.

View File

@@ -0,0 +1 @@
{"version":3,"file":"Parser.d.ts","sourceRoot":"https://raw.githubusercontent.com/fb55/htmlparser2/c123610e003a1eaebc61febed01cabb6e41eb658/src/","sources":["Parser.ts"],"names":[],"mappings":"AAAA,OAAO,SAAS,EAAE,EAAE,SAAS,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAuGjE,MAAM,WAAW,aAAa;IAC1B;;;;;;;OAOG;IACH,OAAO,CAAC,EAAE,OAAO,CAAC;IAElB;;;;OAIG;IACH,cAAc,CAAC,EAAE,OAAO,CAAC;IAEzB;;;;OAIG;IACH,aAAa,CAAC,EAAE,OAAO,CAAC;IAExB;;;;OAIG;IACH,uBAAuB,CAAC,EAAE,OAAO,CAAC;IAElC;;;;;OAKG;IACH,cAAc,CAAC,EAAE,OAAO,CAAC;IAEzB;;;;;OAKG;IACH,oBAAoB,CAAC,EAAE,OAAO,CAAC;IAE/B;;OAEG;IACH,SAAS,CAAC,EAAE,OAAO,SAAS,CAAC;CAChC;AAED,MAAM,WAAW,OAAO;IACpB,YAAY,CAAC,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IAEnC;;OAEG;IACH,OAAO,IAAI,IAAI,CAAC;IAEhB;;OAEG;IACH,KAAK,IAAI,IAAI,CAAC;IACd,OAAO,CAAC,KAAK,EAAE,KAAK,GAAG,IAAI,CAAC;IAC5B,UAAU,CAAC,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,OAAO,GAAG,IAAI,CAAC;IACnD,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IAClC;;;;;OAKG;IACH,WAAW,CACP,IAAI,EAAE,MAAM,EACZ,KAAK,EAAE,MAAM,EACb,KAAK,CAAC,EAAE,MAAM,GAAG,SAAS,GAAG,IAAI,GAClC,IAAI,CAAC;IACR,SAAS,CACL,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE;QAAE,CAAC,CAAC,EAAE,MAAM,GAAG,MAAM,CAAA;KAAE,EAChC,SAAS,EAAE,OAAO,GACnB,IAAI,CAAC;IACR,MAAM,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,SAAS,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IAC9B,YAAY,IAAI,IAAI,CAAC;IACrB,UAAU,IAAI,IAAI,CAAC;IACnB,YAAY,IAAI,IAAI,CAAC;IACrB,uBAAuB,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;CAC7D;AAID,qBAAa,MAAO,YAAW,SAAS;IA+BhC,OAAO,CAAC,QAAQ,CAAC,OAAO;IA9B5B,yCAAyC;IAClC,UAAU,SAAK;IACtB,uCAAuC;IAChC,QAAQ,SAAK;IACpB;;;OAGG;IACH,OAAO,CAAC,YAAY,CAAK;IAEzB,OAAO,CAAC,OAAO,CAAM;IACrB,OAAO,CAAC,UAAU,CAAM;IACxB,OAAO,CAAC,WAAW,CAAM;IACzB,OAAO,CAAC,OAAO,CAA0C;IACzD,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAgB;IACtC,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAiB;IAChD,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAmB;IACvC,OAAO,CAAC,QAAQ,CAAC,iBAAiB,CAAU;IAC5C,OAAO,CAAC,QAAQ,CAAC,uBAAuB,CAAU;IAClD,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAY;IAEtC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAgB;IACxC,OAAO,CAAC,YAAY,CAAK;IACzB,kFAAkF;IAClF,OAAO,CAAC,UAAU,CAAK;IACvB,kFAAkF;IAClF,OAAO,CAAC,KAAK,CAAS;gBAGlB,GAAG,CAAC,EAAE,OAAO,CAAC,OAAO,CAAC,GAAG,IAAI,EACZ,OAAO,GAAE,aAAkB;IAehD,gBAAgB;IAChB,MAAM,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,IAAI;IAO7C,gBAAgB;IAChB,YAAY,CAAC,EAAE,EAAE,MAAM,GAAG,IAAI;IAW9B,SAAS,CAAC,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO;IAI9C,gBAAgB;IAChB,aAAa,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,IAAI;IAYpD,OAAO,CAAC,WAAW;IA4BnB,OAAO,CAAC,UAAU;IAclB,gBAAgB;IAChB,YAAY,CAAC,QAAQ,EAAE,MAAM,GAAG,IAAI;IAQpC,gBAAgB;IAChB,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,IAAI;IA0CjD,gBAAgB;IAChB,gBAAgB,CAAC,QAAQ,EAAE,MAAM,GAAG,IAAI;IAiBxC,OAAO,CAAC,eAAe;IAYvB,gBAAgB;IAChB,YAAY,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,IAAI;IASnD,gBAAgB;IAChB,YAAY,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,IAAI;IAInD,gBAAgB;IAChB,cAAc,CAAC,EAAE,EAAE,MAAM,GAAG,IAAI;IAIhC,gBAAgB;IAChB,WAAW,CAAC,KAAK,EAAE,SAAS,EAAE,QAAQ,EAAE,MAAM,GAAG,IAAI;IAwBrD,OAAO,CAAC,kBAAkB;IAW1B,gBAAgB;IAChB,aAAa,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,IAAI;IAapD,gBAAgB;IAChB,uBAAuB,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,IAAI;IAa9D,gBAAgB;IAChB,SAAS,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,IAAI;IAUhE,gBAAgB;IAChB,OAAO,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,IAAI;IAiB9D,gBAAgB;IAChB,KAAK,IAAI,IAAI;IAab;;OAEG;IACI,KAAK,IAAI,IAAI;IAgBpB;;;;;OAKG;IACI,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI;IAKxC,OAAO,CAAC,QAAQ;IAkBhB,OAAO,CAAC,WAAW;IAMnB;;;;OAIG;IACI,KAAK,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI;IAajC;;;;OAIG;IACI,GAAG,CAAC,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI;IAWhC;;OAEG;IACI,KAAK,IAAI,IAAI;IAIpB;;OAEG;IACI,MAAM,IAAI,IAAI;IAarB;;;;;OAKG;IACI,UAAU,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI;IAGtC;;;;;OAKG;IACI,IAAI,CAAC,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI;CAGpC"}

518
resources/app/node_modules/htmlparser2/lib/Parser.js generated vendored Normal file
View File

@@ -0,0 +1,518 @@
"use strict";
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
__setModuleDefault(result, mod);
return result;
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.Parser = void 0;
var Tokenizer_js_1 = __importStar(require("./Tokenizer.js"));
var decode_js_1 = require("entities/lib/decode.js");
var formTags = new Set([
"input",
"option",
"optgroup",
"select",
"button",
"datalist",
"textarea",
]);
var pTag = new Set(["p"]);
var tableSectionTags = new Set(["thead", "tbody"]);
var ddtTags = new Set(["dd", "dt"]);
var rtpTags = new Set(["rt", "rp"]);
var openImpliesClose = new Map([
["tr", new Set(["tr", "th", "td"])],
["th", new Set(["th"])],
["td", new Set(["thead", "th", "td"])],
["body", new Set(["head", "link", "script"])],
["li", new Set(["li"])],
["p", pTag],
["h1", pTag],
["h2", pTag],
["h3", pTag],
["h4", pTag],
["h5", pTag],
["h6", pTag],
["select", formTags],
["input", formTags],
["output", formTags],
["button", formTags],
["datalist", formTags],
["textarea", formTags],
["option", new Set(["option"])],
["optgroup", new Set(["optgroup", "option"])],
["dd", ddtTags],
["dt", ddtTags],
["address", pTag],
["article", pTag],
["aside", pTag],
["blockquote", pTag],
["details", pTag],
["div", pTag],
["dl", pTag],
["fieldset", pTag],
["figcaption", pTag],
["figure", pTag],
["footer", pTag],
["form", pTag],
["header", pTag],
["hr", pTag],
["main", pTag],
["nav", pTag],
["ol", pTag],
["pre", pTag],
["section", pTag],
["table", pTag],
["ul", pTag],
["rt", rtpTags],
["rp", rtpTags],
["tbody", tableSectionTags],
["tfoot", tableSectionTags],
]);
var voidElements = new Set([
"area",
"base",
"basefont",
"br",
"col",
"command",
"embed",
"frame",
"hr",
"img",
"input",
"isindex",
"keygen",
"link",
"meta",
"param",
"source",
"track",
"wbr",
]);
var foreignContextElements = new Set(["math", "svg"]);
var htmlIntegrationElements = new Set([
"mi",
"mo",
"mn",
"ms",
"mtext",
"annotation-xml",
"foreignobject",
"desc",
"title",
]);
var reNameEnd = /\s|\//;
var Parser = /** @class */ (function () {
function Parser(cbs, options) {
if (options === void 0) { options = {}; }
var _a, _b, _c, _d, _e;
this.options = options;
/** The start index of the last event. */
this.startIndex = 0;
/** The end index of the last event. */
this.endIndex = 0;
/**
* Store the start index of the current open tag,
* so we can update the start index for attributes.
*/
this.openTagStart = 0;
this.tagname = "";
this.attribname = "";
this.attribvalue = "";
this.attribs = null;
this.stack = [];
this.foreignContext = [];
this.buffers = [];
this.bufferOffset = 0;
/** The index of the last written buffer. Used when resuming after a `pause()`. */
this.writeIndex = 0;
/** Indicates whether the parser has finished running / `.end` has been called. */
this.ended = false;
this.cbs = cbs !== null && cbs !== void 0 ? cbs : {};
this.lowerCaseTagNames = (_a = options.lowerCaseTags) !== null && _a !== void 0 ? _a : !options.xmlMode;
this.lowerCaseAttributeNames =
(_b = options.lowerCaseAttributeNames) !== null && _b !== void 0 ? _b : !options.xmlMode;
this.tokenizer = new ((_c = options.Tokenizer) !== null && _c !== void 0 ? _c : Tokenizer_js_1.default)(this.options, this);
(_e = (_d = this.cbs).onparserinit) === null || _e === void 0 ? void 0 : _e.call(_d, this);
}
// Tokenizer event handlers
/** @internal */
Parser.prototype.ontext = function (start, endIndex) {
var _a, _b;
var data = this.getSlice(start, endIndex);
this.endIndex = endIndex - 1;
(_b = (_a = this.cbs).ontext) === null || _b === void 0 ? void 0 : _b.call(_a, data);
this.startIndex = endIndex;
};
/** @internal */
Parser.prototype.ontextentity = function (cp) {
var _a, _b;
/*
* Entities can be emitted on the character, or directly after.
* We use the section start here to get accurate indices.
*/
var index = this.tokenizer.getSectionStart();
this.endIndex = index - 1;
(_b = (_a = this.cbs).ontext) === null || _b === void 0 ? void 0 : _b.call(_a, (0, decode_js_1.fromCodePoint)(cp));
this.startIndex = index;
};
Parser.prototype.isVoidElement = function (name) {
return !this.options.xmlMode && voidElements.has(name);
};
/** @internal */
Parser.prototype.onopentagname = function (start, endIndex) {
this.endIndex = endIndex;
var name = this.getSlice(start, endIndex);
if (this.lowerCaseTagNames) {
name = name.toLowerCase();
}
this.emitOpenTag(name);
};
Parser.prototype.emitOpenTag = function (name) {
var _a, _b, _c, _d;
this.openTagStart = this.startIndex;
this.tagname = name;
var impliesClose = !this.options.xmlMode && openImpliesClose.get(name);
if (impliesClose) {
while (this.stack.length > 0 &&
impliesClose.has(this.stack[this.stack.length - 1])) {
var element = this.stack.pop();
(_b = (_a = this.cbs).onclosetag) === null || _b === void 0 ? void 0 : _b.call(_a, element, true);
}
}
if (!this.isVoidElement(name)) {
this.stack.push(name);
if (foreignContextElements.has(name)) {
this.foreignContext.push(true);
}
else if (htmlIntegrationElements.has(name)) {
this.foreignContext.push(false);
}
}
(_d = (_c = this.cbs).onopentagname) === null || _d === void 0 ? void 0 : _d.call(_c, name);
if (this.cbs.onopentag)
this.attribs = {};
};
Parser.prototype.endOpenTag = function (isImplied) {
var _a, _b;
this.startIndex = this.openTagStart;
if (this.attribs) {
(_b = (_a = this.cbs).onopentag) === null || _b === void 0 ? void 0 : _b.call(_a, this.tagname, this.attribs, isImplied);
this.attribs = null;
}
if (this.cbs.onclosetag && this.isVoidElement(this.tagname)) {
this.cbs.onclosetag(this.tagname, true);
}
this.tagname = "";
};
/** @internal */
Parser.prototype.onopentagend = function (endIndex) {
this.endIndex = endIndex;
this.endOpenTag(false);
// Set `startIndex` for next node
this.startIndex = endIndex + 1;
};
/** @internal */
Parser.prototype.onclosetag = function (start, endIndex) {
var _a, _b, _c, _d, _e, _f;
this.endIndex = endIndex;
var name = this.getSlice(start, endIndex);
if (this.lowerCaseTagNames) {
name = name.toLowerCase();
}
if (foreignContextElements.has(name) ||
htmlIntegrationElements.has(name)) {
this.foreignContext.pop();
}
if (!this.isVoidElement(name)) {
var pos = this.stack.lastIndexOf(name);
if (pos !== -1) {
if (this.cbs.onclosetag) {
var count = this.stack.length - pos;
while (count--) {
// We know the stack has sufficient elements.
this.cbs.onclosetag(this.stack.pop(), count !== 0);
}
}
else
this.stack.length = pos;
}
else if (!this.options.xmlMode && name === "p") {
// Implicit open before close
this.emitOpenTag("p");
this.closeCurrentTag(true);
}
}
else if (!this.options.xmlMode && name === "br") {
// We can't use `emitOpenTag` for implicit open, as `br` would be implicitly closed.
(_b = (_a = this.cbs).onopentagname) === null || _b === void 0 ? void 0 : _b.call(_a, "br");
(_d = (_c = this.cbs).onopentag) === null || _d === void 0 ? void 0 : _d.call(_c, "br", {}, true);
(_f = (_e = this.cbs).onclosetag) === null || _f === void 0 ? void 0 : _f.call(_e, "br", false);
}
// Set `startIndex` for next node
this.startIndex = endIndex + 1;
};
/** @internal */
Parser.prototype.onselfclosingtag = function (endIndex) {
this.endIndex = endIndex;
if (this.options.xmlMode ||
this.options.recognizeSelfClosing ||
this.foreignContext[this.foreignContext.length - 1]) {
this.closeCurrentTag(false);
// Set `startIndex` for next node
this.startIndex = endIndex + 1;
}
else {
// Ignore the fact that the tag is self-closing.
this.onopentagend(endIndex);
}
};
Parser.prototype.closeCurrentTag = function (isOpenImplied) {
var _a, _b;
var name = this.tagname;
this.endOpenTag(isOpenImplied);
// Self-closing tags will be on the top of the stack
if (this.stack[this.stack.length - 1] === name) {
// If the opening tag isn't implied, the closing tag has to be implied.
(_b = (_a = this.cbs).onclosetag) === null || _b === void 0 ? void 0 : _b.call(_a, name, !isOpenImplied);
this.stack.pop();
}
};
/** @internal */
Parser.prototype.onattribname = function (start, endIndex) {
this.startIndex = start;
var name = this.getSlice(start, endIndex);
this.attribname = this.lowerCaseAttributeNames
? name.toLowerCase()
: name;
};
/** @internal */
Parser.prototype.onattribdata = function (start, endIndex) {
this.attribvalue += this.getSlice(start, endIndex);
};
/** @internal */
Parser.prototype.onattribentity = function (cp) {
this.attribvalue += (0, decode_js_1.fromCodePoint)(cp);
};
/** @internal */
Parser.prototype.onattribend = function (quote, endIndex) {
var _a, _b;
this.endIndex = endIndex;
(_b = (_a = this.cbs).onattribute) === null || _b === void 0 ? void 0 : _b.call(_a, this.attribname, this.attribvalue, quote === Tokenizer_js_1.QuoteType.Double
? '"'
: quote === Tokenizer_js_1.QuoteType.Single
? "'"
: quote === Tokenizer_js_1.QuoteType.NoValue
? undefined
: null);
if (this.attribs &&
!Object.prototype.hasOwnProperty.call(this.attribs, this.attribname)) {
this.attribs[this.attribname] = this.attribvalue;
}
this.attribvalue = "";
};
Parser.prototype.getInstructionName = function (value) {
var index = value.search(reNameEnd);
var name = index < 0 ? value : value.substr(0, index);
if (this.lowerCaseTagNames) {
name = name.toLowerCase();
}
return name;
};
/** @internal */
Parser.prototype.ondeclaration = function (start, endIndex) {
this.endIndex = endIndex;
var value = this.getSlice(start, endIndex);
if (this.cbs.onprocessinginstruction) {
var name = this.getInstructionName(value);
this.cbs.onprocessinginstruction("!".concat(name), "!".concat(value));
}
// Set `startIndex` for next node
this.startIndex = endIndex + 1;
};
/** @internal */
Parser.prototype.onprocessinginstruction = function (start, endIndex) {
this.endIndex = endIndex;
var value = this.getSlice(start, endIndex);
if (this.cbs.onprocessinginstruction) {
var name = this.getInstructionName(value);
this.cbs.onprocessinginstruction("?".concat(name), "?".concat(value));
}
// Set `startIndex` for next node
this.startIndex = endIndex + 1;
};
/** @internal */
Parser.prototype.oncomment = function (start, endIndex, offset) {
var _a, _b, _c, _d;
this.endIndex = endIndex;
(_b = (_a = this.cbs).oncomment) === null || _b === void 0 ? void 0 : _b.call(_a, this.getSlice(start, endIndex - offset));
(_d = (_c = this.cbs).oncommentend) === null || _d === void 0 ? void 0 : _d.call(_c);
// Set `startIndex` for next node
this.startIndex = endIndex + 1;
};
/** @internal */
Parser.prototype.oncdata = function (start, endIndex, offset) {
var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k;
this.endIndex = endIndex;
var value = this.getSlice(start, endIndex - offset);
if (this.options.xmlMode || this.options.recognizeCDATA) {
(_b = (_a = this.cbs).oncdatastart) === null || _b === void 0 ? void 0 : _b.call(_a);
(_d = (_c = this.cbs).ontext) === null || _d === void 0 ? void 0 : _d.call(_c, value);
(_f = (_e = this.cbs).oncdataend) === null || _f === void 0 ? void 0 : _f.call(_e);
}
else {
(_h = (_g = this.cbs).oncomment) === null || _h === void 0 ? void 0 : _h.call(_g, "[CDATA[".concat(value, "]]"));
(_k = (_j = this.cbs).oncommentend) === null || _k === void 0 ? void 0 : _k.call(_j);
}
// Set `startIndex` for next node
this.startIndex = endIndex + 1;
};
/** @internal */
Parser.prototype.onend = function () {
var _a, _b;
if (this.cbs.onclosetag) {
// Set the end index for all remaining tags
this.endIndex = this.startIndex;
for (var index = this.stack.length; index > 0; this.cbs.onclosetag(this.stack[--index], true))
;
}
(_b = (_a = this.cbs).onend) === null || _b === void 0 ? void 0 : _b.call(_a);
};
/**
* Resets the parser to a blank state, ready to parse a new HTML document
*/
Parser.prototype.reset = function () {
var _a, _b, _c, _d;
(_b = (_a = this.cbs).onreset) === null || _b === void 0 ? void 0 : _b.call(_a);
this.tokenizer.reset();
this.tagname = "";
this.attribname = "";
this.attribs = null;
this.stack.length = 0;
this.startIndex = 0;
this.endIndex = 0;
(_d = (_c = this.cbs).onparserinit) === null || _d === void 0 ? void 0 : _d.call(_c, this);
this.buffers.length = 0;
this.bufferOffset = 0;
this.writeIndex = 0;
this.ended = false;
};
/**
* Resets the parser, then parses a complete document and
* pushes it to the handler.
*
* @param data Document to parse.
*/
Parser.prototype.parseComplete = function (data) {
this.reset();
this.end(data);
};
Parser.prototype.getSlice = function (start, end) {
while (start - this.bufferOffset >= this.buffers[0].length) {
this.shiftBuffer();
}
var slice = this.buffers[0].slice(start - this.bufferOffset, end - this.bufferOffset);
while (end - this.bufferOffset > this.buffers[0].length) {
this.shiftBuffer();
slice += this.buffers[0].slice(0, end - this.bufferOffset);
}
return slice;
};
Parser.prototype.shiftBuffer = function () {
this.bufferOffset += this.buffers[0].length;
this.writeIndex--;
this.buffers.shift();
};
/**
* Parses a chunk of data and calls the corresponding callbacks.
*
* @param chunk Chunk to parse.
*/
Parser.prototype.write = function (chunk) {
var _a, _b;
if (this.ended) {
(_b = (_a = this.cbs).onerror) === null || _b === void 0 ? void 0 : _b.call(_a, new Error(".write() after done!"));
return;
}
this.buffers.push(chunk);
if (this.tokenizer.running) {
this.tokenizer.write(chunk);
this.writeIndex++;
}
};
/**
* Parses the end of the buffer and clears the stack, calls onend.
*
* @param chunk Optional final chunk to parse.
*/
Parser.prototype.end = function (chunk) {
var _a, _b;
if (this.ended) {
(_b = (_a = this.cbs).onerror) === null || _b === void 0 ? void 0 : _b.call(_a, new Error(".end() after done!"));
return;
}
if (chunk)
this.write(chunk);
this.ended = true;
this.tokenizer.end();
};
/**
* Pauses parsing. The parser won't emit events until `resume` is called.
*/
Parser.prototype.pause = function () {
this.tokenizer.pause();
};
/**
* Resumes parsing after `pause` was called.
*/
Parser.prototype.resume = function () {
this.tokenizer.resume();
while (this.tokenizer.running &&
this.writeIndex < this.buffers.length) {
this.tokenizer.write(this.buffers[this.writeIndex++]);
}
if (this.ended)
this.tokenizer.end();
};
/**
* Alias of `write`, for backwards compatibility.
*
* @param chunk Chunk to parse.
* @deprecated
*/
Parser.prototype.parseChunk = function (chunk) {
this.write(chunk);
};
/**
* Alias of `end`, for backwards compatibility.
*
* @param chunk Optional final chunk to parse.
* @deprecated
*/
Parser.prototype.done = function (chunk) {
this.end(chunk);
};
return Parser;
}());
exports.Parser = Parser;
//# sourceMappingURL=Parser.js.map

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1 @@
{"version":3,"file":"Tokenizer.d.ts","sourceRoot":"https://raw.githubusercontent.com/fb55/htmlparser2/c123610e003a1eaebc61febed01cabb6e41eb658/src/","sources":["Tokenizer.ts"],"names":[],"mappings":"AAkHA,oBAAY,SAAS;IACjB,OAAO,IAAI;IACX,QAAQ,IAAI;IACZ,MAAM,IAAI;IACV,MAAM,IAAI;CACb;AAED,MAAM,WAAW,SAAS;IACtB,YAAY,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACpD,cAAc,CAAC,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;IACxC,WAAW,CAAC,KAAK,EAAE,SAAS,EAAE,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACtD,YAAY,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACpD,OAAO,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;IAClE,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IAClD,SAAS,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;IACpE,aAAa,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACrD,KAAK,IAAI,IAAI,CAAC;IACd,YAAY,CAAC,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACrC,aAAa,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACrD,uBAAuB,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IAC/D,gBAAgB,CAAC,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACzC,MAAM,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IAC9C,YAAY,CAAC,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;CACzC;AAiBD,MAAM,CAAC,OAAO,OAAO,SAAS;IA2BtB,OAAO,CAAC,QAAQ,CAAC,GAAG;IA1BxB,6CAA6C;IAC7C,OAAO,CAAC,KAAK,CAAc;IAC3B,uBAAuB;IACvB,OAAO,CAAC,MAAM,CAAM;IACpB,iEAAiE;IACjE,OAAO,CAAC,YAAY,CAAK;IACzB,oEAAoE;IACpE,OAAO,CAAC,KAAK,CAAK;IAClB,kIAAkI;IAClI,OAAO,CAAC,SAAS,CAAc;IAC/B,oEAAoE;IACpE,OAAO,CAAC,SAAS,CAAS;IAC1B,uDAAuD;IAChD,OAAO,UAAQ;IACtB,wCAAwC;IACxC,OAAO,CAAC,MAAM,CAAK;IAEnB,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAU;IAClC,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAU;IACzC,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAc;gBAGrC,EACI,OAAe,EACf,cAAqB,GACxB,EAAE;QAAE,OAAO,CAAC,EAAE,OAAO,CAAC;QAAC,cAAc,CAAC,EAAE,OAAO,CAAA;KAAE,EACjC,GAAG,EAAE,SAAS;IAO5B,KAAK,IAAI,IAAI;IAWb,KAAK,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI;IAM1B,GAAG,IAAI,IAAI;IAIX,KAAK,IAAI,IAAI;IAIb,MAAM,IAAI,IAAI;IAOrB;;OAEG;IACI,QAAQ,IAAI,MAAM;IAIzB;;OAEG;IACI,eAAe,IAAI,MAAM;IAIhC,OAAO,CAAC,SAAS;IAejB,OAAO,CAAC,eAAe,CAA0B;IACjD,OAAO,CAAC,aAAa,CAAK;IAC1B,OAAO,CAAC,yBAAyB;IAoBjC,mEAAmE;IACnE,OAAO,CAAC,iBAAiB;IAwCzB,OAAO,CAAC,kBAAkB;IAe1B;;;;;OAKG;IACH,OAAO,CAAC,aAAa;IAkBrB;;;;;;;OAOG;IACH,OAAO,CAAC,kBAAkB;IAwB1B;;;;;OAKG;IACH,OAAO,CAAC,cAAc;IAItB,OAAO,CAAC,YAAY;IAOpB,OAAO,CAAC,kBAAkB;IAyB1B,OAAO,CAAC,cAAc;IAQtB,OAAO,CAAC,yBAAyB;IAYjC,OAAO,CAAC,qBAAqB;IAQ7B,OAAO,CAAC,wBAAwB;IAQhC,OAAO,CAAC,wBAAwB;IAkBhC,OAAO,CAAC,qBAAqB;IAY7B,OAAO,CAAC,oBAAoB;IAQ5B,OAAO,CAAC,uBAAuB;IAa/B,OAAO,CAAC,yBAAyB;IAajC,OAAO,CAAC,sBAAsB;IAmB9B,OAAO,CAAC,iCAAiC;IAGzC,OAAO,CAAC,iCAAiC;IAGzC,OAAO,CAAC,6BAA6B;IAYrC,OAAO,CAAC,sBAAsB;IAW9B,OAAO,CAAC,kBAAkB;IAO1B,OAAO,CAAC,4BAA4B;IAOpC,OAAO,CAAC,kBAAkB;IAW1B,OAAO,CAAC,qBAAqB;IAO7B,OAAO,CAAC,mBAAmB;IAY3B,OAAO,CAAC,SAAS,CAAK;IACtB,OAAO,CAAC,WAAW,CAAK;IACxB,wFAAwF;IACxF,OAAO,CAAC,YAAY,CAAK;IACzB,OAAO,CAAC,YAAY,CAAK;IAEzB,OAAO,CAAC,iBAAiB;IAiBzB,OAAO,CAAC,kBAAkB;IAiD1B,OAAO,CAAC,eAAe;IA8BvB,OAAO,CAAC,wBAAwB;IAUhC,OAAO,CAAC,iBAAiB;IAgBzB,OAAO,CAAC,oBAAoB;IAe5B,OAAO,CAAC,gBAAgB;IAoBxB,OAAO,CAAC,iBAAiB;IAQzB;;OAEG;IACH,OAAO,CAAC,OAAO;IAoBf,OAAO,CAAC,cAAc;IAItB;;;;OAIG;IACH,OAAO,CAAC,KAAK;IA8Hb,OAAO,CAAC,MAAM;IAYd,gCAAgC;IAChC,OAAO,CAAC,kBAAkB;IAwC1B,OAAO,CAAC,WAAW;IAUnB,OAAO,CAAC,aAAa;CAUxB"}

938
resources/app/node_modules/htmlparser2/lib/Tokenizer.js generated vendored Normal file
View File

@@ -0,0 +1,938 @@
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.QuoteType = void 0;
var decode_js_1 = require("entities/lib/decode.js");
var CharCodes;
(function (CharCodes) {
CharCodes[CharCodes["Tab"] = 9] = "Tab";
CharCodes[CharCodes["NewLine"] = 10] = "NewLine";
CharCodes[CharCodes["FormFeed"] = 12] = "FormFeed";
CharCodes[CharCodes["CarriageReturn"] = 13] = "CarriageReturn";
CharCodes[CharCodes["Space"] = 32] = "Space";
CharCodes[CharCodes["ExclamationMark"] = 33] = "ExclamationMark";
CharCodes[CharCodes["Number"] = 35] = "Number";
CharCodes[CharCodes["Amp"] = 38] = "Amp";
CharCodes[CharCodes["SingleQuote"] = 39] = "SingleQuote";
CharCodes[CharCodes["DoubleQuote"] = 34] = "DoubleQuote";
CharCodes[CharCodes["Dash"] = 45] = "Dash";
CharCodes[CharCodes["Slash"] = 47] = "Slash";
CharCodes[CharCodes["Zero"] = 48] = "Zero";
CharCodes[CharCodes["Nine"] = 57] = "Nine";
CharCodes[CharCodes["Semi"] = 59] = "Semi";
CharCodes[CharCodes["Lt"] = 60] = "Lt";
CharCodes[CharCodes["Eq"] = 61] = "Eq";
CharCodes[CharCodes["Gt"] = 62] = "Gt";
CharCodes[CharCodes["Questionmark"] = 63] = "Questionmark";
CharCodes[CharCodes["UpperA"] = 65] = "UpperA";
CharCodes[CharCodes["LowerA"] = 97] = "LowerA";
CharCodes[CharCodes["UpperF"] = 70] = "UpperF";
CharCodes[CharCodes["LowerF"] = 102] = "LowerF";
CharCodes[CharCodes["UpperZ"] = 90] = "UpperZ";
CharCodes[CharCodes["LowerZ"] = 122] = "LowerZ";
CharCodes[CharCodes["LowerX"] = 120] = "LowerX";
CharCodes[CharCodes["OpeningSquareBracket"] = 91] = "OpeningSquareBracket";
})(CharCodes || (CharCodes = {}));
/** All the states the tokenizer can be in. */
var State;
(function (State) {
State[State["Text"] = 1] = "Text";
State[State["BeforeTagName"] = 2] = "BeforeTagName";
State[State["InTagName"] = 3] = "InTagName";
State[State["InSelfClosingTag"] = 4] = "InSelfClosingTag";
State[State["BeforeClosingTagName"] = 5] = "BeforeClosingTagName";
State[State["InClosingTagName"] = 6] = "InClosingTagName";
State[State["AfterClosingTagName"] = 7] = "AfterClosingTagName";
// Attributes
State[State["BeforeAttributeName"] = 8] = "BeforeAttributeName";
State[State["InAttributeName"] = 9] = "InAttributeName";
State[State["AfterAttributeName"] = 10] = "AfterAttributeName";
State[State["BeforeAttributeValue"] = 11] = "BeforeAttributeValue";
State[State["InAttributeValueDq"] = 12] = "InAttributeValueDq";
State[State["InAttributeValueSq"] = 13] = "InAttributeValueSq";
State[State["InAttributeValueNq"] = 14] = "InAttributeValueNq";
// Declarations
State[State["BeforeDeclaration"] = 15] = "BeforeDeclaration";
State[State["InDeclaration"] = 16] = "InDeclaration";
// Processing instructions
State[State["InProcessingInstruction"] = 17] = "InProcessingInstruction";
// Comments & CDATA
State[State["BeforeComment"] = 18] = "BeforeComment";
State[State["CDATASequence"] = 19] = "CDATASequence";
State[State["InSpecialComment"] = 20] = "InSpecialComment";
State[State["InCommentLike"] = 21] = "InCommentLike";
// Special tags
State[State["BeforeSpecialS"] = 22] = "BeforeSpecialS";
State[State["SpecialStartSequence"] = 23] = "SpecialStartSequence";
State[State["InSpecialTag"] = 24] = "InSpecialTag";
State[State["BeforeEntity"] = 25] = "BeforeEntity";
State[State["BeforeNumericEntity"] = 26] = "BeforeNumericEntity";
State[State["InNamedEntity"] = 27] = "InNamedEntity";
State[State["InNumericEntity"] = 28] = "InNumericEntity";
State[State["InHexEntity"] = 29] = "InHexEntity";
})(State || (State = {}));
function isWhitespace(c) {
return (c === CharCodes.Space ||
c === CharCodes.NewLine ||
c === CharCodes.Tab ||
c === CharCodes.FormFeed ||
c === CharCodes.CarriageReturn);
}
function isEndOfTagSection(c) {
return c === CharCodes.Slash || c === CharCodes.Gt || isWhitespace(c);
}
function isNumber(c) {
return c >= CharCodes.Zero && c <= CharCodes.Nine;
}
function isASCIIAlpha(c) {
return ((c >= CharCodes.LowerA && c <= CharCodes.LowerZ) ||
(c >= CharCodes.UpperA && c <= CharCodes.UpperZ));
}
function isHexDigit(c) {
return ((c >= CharCodes.UpperA && c <= CharCodes.UpperF) ||
(c >= CharCodes.LowerA && c <= CharCodes.LowerF));
}
var QuoteType;
(function (QuoteType) {
QuoteType[QuoteType["NoValue"] = 0] = "NoValue";
QuoteType[QuoteType["Unquoted"] = 1] = "Unquoted";
QuoteType[QuoteType["Single"] = 2] = "Single";
QuoteType[QuoteType["Double"] = 3] = "Double";
})(QuoteType = exports.QuoteType || (exports.QuoteType = {}));
/**
* Sequences used to match longer strings.
*
* We don't have `Script`, `Style`, or `Title` here. Instead, we re-use the *End
* sequences with an increased offset.
*/
var Sequences = {
Cdata: new Uint8Array([0x43, 0x44, 0x41, 0x54, 0x41, 0x5b]),
CdataEnd: new Uint8Array([0x5d, 0x5d, 0x3e]),
CommentEnd: new Uint8Array([0x2d, 0x2d, 0x3e]),
ScriptEnd: new Uint8Array([0x3c, 0x2f, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74]),
StyleEnd: new Uint8Array([0x3c, 0x2f, 0x73, 0x74, 0x79, 0x6c, 0x65]),
TitleEnd: new Uint8Array([0x3c, 0x2f, 0x74, 0x69, 0x74, 0x6c, 0x65]), // `</title`
};
var Tokenizer = /** @class */ (function () {
function Tokenizer(_a, cbs) {
var _b = _a.xmlMode, xmlMode = _b === void 0 ? false : _b, _c = _a.decodeEntities, decodeEntities = _c === void 0 ? true : _c;
this.cbs = cbs;
/** The current state the tokenizer is in. */
this.state = State.Text;
/** The read buffer. */
this.buffer = "";
/** The beginning of the section that is currently being read. */
this.sectionStart = 0;
/** The index within the buffer that we are currently looking at. */
this.index = 0;
/** Some behavior, eg. when decoding entities, is done while we are in another state. This keeps track of the other state type. */
this.baseState = State.Text;
/** For special parsing behavior inside of script and style tags. */
this.isSpecial = false;
/** Indicates whether the tokenizer has been paused. */
this.running = true;
/** The offset of the current buffer. */
this.offset = 0;
this.currentSequence = undefined;
this.sequenceIndex = 0;
this.trieIndex = 0;
this.trieCurrent = 0;
/** For named entities, the index of the value. For numeric entities, the code point. */
this.entityResult = 0;
this.entityExcess = 0;
this.xmlMode = xmlMode;
this.decodeEntities = decodeEntities;
this.entityTrie = xmlMode ? decode_js_1.xmlDecodeTree : decode_js_1.htmlDecodeTree;
}
Tokenizer.prototype.reset = function () {
this.state = State.Text;
this.buffer = "";
this.sectionStart = 0;
this.index = 0;
this.baseState = State.Text;
this.currentSequence = undefined;
this.running = true;
this.offset = 0;
};
Tokenizer.prototype.write = function (chunk) {
this.offset += this.buffer.length;
this.buffer = chunk;
this.parse();
};
Tokenizer.prototype.end = function () {
if (this.running)
this.finish();
};
Tokenizer.prototype.pause = function () {
this.running = false;
};
Tokenizer.prototype.resume = function () {
this.running = true;
if (this.index < this.buffer.length + this.offset) {
this.parse();
}
};
/**
* The current index within all of the written data.
*/
Tokenizer.prototype.getIndex = function () {
return this.index;
};
/**
* The start of the current section.
*/
Tokenizer.prototype.getSectionStart = function () {
return this.sectionStart;
};
Tokenizer.prototype.stateText = function (c) {
if (c === CharCodes.Lt ||
(!this.decodeEntities && this.fastForwardTo(CharCodes.Lt))) {
if (this.index > this.sectionStart) {
this.cbs.ontext(this.sectionStart, this.index);
}
this.state = State.BeforeTagName;
this.sectionStart = this.index;
}
else if (this.decodeEntities && c === CharCodes.Amp) {
this.state = State.BeforeEntity;
}
};
Tokenizer.prototype.stateSpecialStartSequence = function (c) {
var isEnd = this.sequenceIndex === this.currentSequence.length;
var isMatch = isEnd
? // If we are at the end of the sequence, make sure the tag name has ended
isEndOfTagSection(c)
: // Otherwise, do a case-insensitive comparison
(c | 0x20) === this.currentSequence[this.sequenceIndex];
if (!isMatch) {
this.isSpecial = false;
}
else if (!isEnd) {
this.sequenceIndex++;
return;
}
this.sequenceIndex = 0;
this.state = State.InTagName;
this.stateInTagName(c);
};
/** Look for an end tag. For <title> tags, also decode entities. */
Tokenizer.prototype.stateInSpecialTag = function (c) {
if (this.sequenceIndex === this.currentSequence.length) {
if (c === CharCodes.Gt || isWhitespace(c)) {
var endOfText = this.index - this.currentSequence.length;
if (this.sectionStart < endOfText) {
// Spoof the index so that reported locations match up.
var actualIndex = this.index;
this.index = endOfText;
this.cbs.ontext(this.sectionStart, endOfText);
this.index = actualIndex;
}
this.isSpecial = false;
this.sectionStart = endOfText + 2; // Skip over the `</`
this.stateInClosingTagName(c);
return; // We are done; skip the rest of the function.
}
this.sequenceIndex = 0;
}
if ((c | 0x20) === this.currentSequence[this.sequenceIndex]) {
this.sequenceIndex += 1;
}
else if (this.sequenceIndex === 0) {
if (this.currentSequence === Sequences.TitleEnd) {
// We have to parse entities in <title> tags.
if (this.decodeEntities && c === CharCodes.Amp) {
this.state = State.BeforeEntity;
}
}
else if (this.fastForwardTo(CharCodes.Lt)) {
// Outside of <title> tags, we can fast-forward.
this.sequenceIndex = 1;
}
}
else {
// If we see a `<`, set the sequence index to 1; useful for eg. `<</script>`.
this.sequenceIndex = Number(c === CharCodes.Lt);
}
};
Tokenizer.prototype.stateCDATASequence = function (c) {
if (c === Sequences.Cdata[this.sequenceIndex]) {
if (++this.sequenceIndex === Sequences.Cdata.length) {
this.state = State.InCommentLike;
this.currentSequence = Sequences.CdataEnd;
this.sequenceIndex = 0;
this.sectionStart = this.index + 1;
}
}
else {
this.sequenceIndex = 0;
this.state = State.InDeclaration;
this.stateInDeclaration(c); // Reconsume the character
}
};
/**
* When we wait for one specific character, we can speed things up
* by skipping through the buffer until we find it.
*
* @returns Whether the character was found.
*/
Tokenizer.prototype.fastForwardTo = function (c) {
while (++this.index < this.buffer.length + this.offset) {
if (this.buffer.charCodeAt(this.index - this.offset) === c) {
return true;
}
}
/*
* We increment the index at the end of the `parse` loop,
* so set it to `buffer.length - 1` here.
*
* TODO: Refactor `parse` to increment index before calling states.
*/
this.index = this.buffer.length + this.offset - 1;
return false;
};
/**
* Comments and CDATA end with `-->` and `]]>`.
*
* Their common qualities are:
* - Their end sequences have a distinct character they start with.
* - That character is then repeated, so we have to check multiple repeats.
* - All characters but the start character of the sequence can be skipped.
*/
Tokenizer.prototype.stateInCommentLike = function (c) {
if (c === this.currentSequence[this.sequenceIndex]) {
if (++this.sequenceIndex === this.currentSequence.length) {
if (this.currentSequence === Sequences.CdataEnd) {
this.cbs.oncdata(this.sectionStart, this.index, 2);
}
else {
this.cbs.oncomment(this.sectionStart, this.index, 2);
}
this.sequenceIndex = 0;
this.sectionStart = this.index + 1;
this.state = State.Text;
}
}
else if (this.sequenceIndex === 0) {
// Fast-forward to the first character of the sequence
if (this.fastForwardTo(this.currentSequence[0])) {
this.sequenceIndex = 1;
}
}
else if (c !== this.currentSequence[this.sequenceIndex - 1]) {
// Allow long sequences, eg. --->, ]]]>
this.sequenceIndex = 0;
}
};
/**
* HTML only allows ASCII alpha characters (a-z and A-Z) at the beginning of a tag name.
*
* XML allows a lot more characters here (@see https://www.w3.org/TR/REC-xml/#NT-NameStartChar).
* We allow anything that wouldn't end the tag.
*/
Tokenizer.prototype.isTagStartChar = function (c) {
return this.xmlMode ? !isEndOfTagSection(c) : isASCIIAlpha(c);
};
Tokenizer.prototype.startSpecial = function (sequence, offset) {
this.isSpecial = true;
this.currentSequence = sequence;
this.sequenceIndex = offset;
this.state = State.SpecialStartSequence;
};
Tokenizer.prototype.stateBeforeTagName = function (c) {
if (c === CharCodes.ExclamationMark) {
this.state = State.BeforeDeclaration;
this.sectionStart = this.index + 1;
}
else if (c === CharCodes.Questionmark) {
this.state = State.InProcessingInstruction;
this.sectionStart = this.index + 1;
}
else if (this.isTagStartChar(c)) {
var lower = c | 0x20;
this.sectionStart = this.index;
if (!this.xmlMode && lower === Sequences.TitleEnd[2]) {
this.startSpecial(Sequences.TitleEnd, 3);
}
else {
this.state =
!this.xmlMode && lower === Sequences.ScriptEnd[2]
? State.BeforeSpecialS
: State.InTagName;
}
}
else if (c === CharCodes.Slash) {
this.state = State.BeforeClosingTagName;
}
else {
this.state = State.Text;
this.stateText(c);
}
};
Tokenizer.prototype.stateInTagName = function (c) {
if (isEndOfTagSection(c)) {
this.cbs.onopentagname(this.sectionStart, this.index);
this.sectionStart = -1;
this.state = State.BeforeAttributeName;
this.stateBeforeAttributeName(c);
}
};
Tokenizer.prototype.stateBeforeClosingTagName = function (c) {
if (isWhitespace(c)) {
// Ignore
}
else if (c === CharCodes.Gt) {
this.state = State.Text;
}
else {
this.state = this.isTagStartChar(c)
? State.InClosingTagName
: State.InSpecialComment;
this.sectionStart = this.index;
}
};
Tokenizer.prototype.stateInClosingTagName = function (c) {
if (c === CharCodes.Gt || isWhitespace(c)) {
this.cbs.onclosetag(this.sectionStart, this.index);
this.sectionStart = -1;
this.state = State.AfterClosingTagName;
this.stateAfterClosingTagName(c);
}
};
Tokenizer.prototype.stateAfterClosingTagName = function (c) {
// Skip everything until ">"
if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) {
this.state = State.Text;
this.baseState = State.Text;
this.sectionStart = this.index + 1;
}
};
Tokenizer.prototype.stateBeforeAttributeName = function (c) {
if (c === CharCodes.Gt) {
this.cbs.onopentagend(this.index);
if (this.isSpecial) {
this.state = State.InSpecialTag;
this.sequenceIndex = 0;
}
else {
this.state = State.Text;
}
this.baseState = this.state;
this.sectionStart = this.index + 1;
}
else if (c === CharCodes.Slash) {
this.state = State.InSelfClosingTag;
}
else if (!isWhitespace(c)) {
this.state = State.InAttributeName;
this.sectionStart = this.index;
}
};
Tokenizer.prototype.stateInSelfClosingTag = function (c) {
if (c === CharCodes.Gt) {
this.cbs.onselfclosingtag(this.index);
this.state = State.Text;
this.baseState = State.Text;
this.sectionStart = this.index + 1;
this.isSpecial = false; // Reset special state, in case of self-closing special tags
}
else if (!isWhitespace(c)) {
this.state = State.BeforeAttributeName;
this.stateBeforeAttributeName(c);
}
};
Tokenizer.prototype.stateInAttributeName = function (c) {
if (c === CharCodes.Eq || isEndOfTagSection(c)) {
this.cbs.onattribname(this.sectionStart, this.index);
this.sectionStart = -1;
this.state = State.AfterAttributeName;
this.stateAfterAttributeName(c);
}
};
Tokenizer.prototype.stateAfterAttributeName = function (c) {
if (c === CharCodes.Eq) {
this.state = State.BeforeAttributeValue;
}
else if (c === CharCodes.Slash || c === CharCodes.Gt) {
this.cbs.onattribend(QuoteType.NoValue, this.index);
this.state = State.BeforeAttributeName;
this.stateBeforeAttributeName(c);
}
else if (!isWhitespace(c)) {
this.cbs.onattribend(QuoteType.NoValue, this.index);
this.state = State.InAttributeName;
this.sectionStart = this.index;
}
};
Tokenizer.prototype.stateBeforeAttributeValue = function (c) {
if (c === CharCodes.DoubleQuote) {
this.state = State.InAttributeValueDq;
this.sectionStart = this.index + 1;
}
else if (c === CharCodes.SingleQuote) {
this.state = State.InAttributeValueSq;
this.sectionStart = this.index + 1;
}
else if (!isWhitespace(c)) {
this.sectionStart = this.index;
this.state = State.InAttributeValueNq;
this.stateInAttributeValueNoQuotes(c); // Reconsume token
}
};
Tokenizer.prototype.handleInAttributeValue = function (c, quote) {
if (c === quote ||
(!this.decodeEntities && this.fastForwardTo(quote))) {
this.cbs.onattribdata(this.sectionStart, this.index);
this.sectionStart = -1;
this.cbs.onattribend(quote === CharCodes.DoubleQuote
? QuoteType.Double
: QuoteType.Single, this.index);
this.state = State.BeforeAttributeName;
}
else if (this.decodeEntities && c === CharCodes.Amp) {
this.baseState = this.state;
this.state = State.BeforeEntity;
}
};
Tokenizer.prototype.stateInAttributeValueDoubleQuotes = function (c) {
this.handleInAttributeValue(c, CharCodes.DoubleQuote);
};
Tokenizer.prototype.stateInAttributeValueSingleQuotes = function (c) {
this.handleInAttributeValue(c, CharCodes.SingleQuote);
};
Tokenizer.prototype.stateInAttributeValueNoQuotes = function (c) {
if (isWhitespace(c) || c === CharCodes.Gt) {
this.cbs.onattribdata(this.sectionStart, this.index);
this.sectionStart = -1;
this.cbs.onattribend(QuoteType.Unquoted, this.index);
this.state = State.BeforeAttributeName;
this.stateBeforeAttributeName(c);
}
else if (this.decodeEntities && c === CharCodes.Amp) {
this.baseState = this.state;
this.state = State.BeforeEntity;
}
};
Tokenizer.prototype.stateBeforeDeclaration = function (c) {
if (c === CharCodes.OpeningSquareBracket) {
this.state = State.CDATASequence;
this.sequenceIndex = 0;
}
else {
this.state =
c === CharCodes.Dash
? State.BeforeComment
: State.InDeclaration;
}
};
Tokenizer.prototype.stateInDeclaration = function (c) {
if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) {
this.cbs.ondeclaration(this.sectionStart, this.index);
this.state = State.Text;
this.sectionStart = this.index + 1;
}
};
Tokenizer.prototype.stateInProcessingInstruction = function (c) {
if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) {
this.cbs.onprocessinginstruction(this.sectionStart, this.index);
this.state = State.Text;
this.sectionStart = this.index + 1;
}
};
Tokenizer.prototype.stateBeforeComment = function (c) {
if (c === CharCodes.Dash) {
this.state = State.InCommentLike;
this.currentSequence = Sequences.CommentEnd;
// Allow short comments (eg. <!-->)
this.sequenceIndex = 2;
this.sectionStart = this.index + 1;
}
else {
this.state = State.InDeclaration;
}
};
Tokenizer.prototype.stateInSpecialComment = function (c) {
if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) {
this.cbs.oncomment(this.sectionStart, this.index, 0);
this.state = State.Text;
this.sectionStart = this.index + 1;
}
};
Tokenizer.prototype.stateBeforeSpecialS = function (c) {
var lower = c | 0x20;
if (lower === Sequences.ScriptEnd[3]) {
this.startSpecial(Sequences.ScriptEnd, 4);
}
else if (lower === Sequences.StyleEnd[3]) {
this.startSpecial(Sequences.StyleEnd, 4);
}
else {
this.state = State.InTagName;
this.stateInTagName(c); // Consume the token again
}
};
Tokenizer.prototype.stateBeforeEntity = function (c) {
// Start excess with 1 to include the '&'
this.entityExcess = 1;
this.entityResult = 0;
if (c === CharCodes.Number) {
this.state = State.BeforeNumericEntity;
}
else if (c === CharCodes.Amp) {
// We have two `&` characters in a row. Stay in the current state.
}
else {
this.trieIndex = 0;
this.trieCurrent = this.entityTrie[0];
this.state = State.InNamedEntity;
this.stateInNamedEntity(c);
}
};
Tokenizer.prototype.stateInNamedEntity = function (c) {
this.entityExcess += 1;
this.trieIndex = (0, decode_js_1.determineBranch)(this.entityTrie, this.trieCurrent, this.trieIndex + 1, c);
if (this.trieIndex < 0) {
this.emitNamedEntity();
this.index--;
return;
}
this.trieCurrent = this.entityTrie[this.trieIndex];
var masked = this.trieCurrent & decode_js_1.BinTrieFlags.VALUE_LENGTH;
// If the branch is a value, store it and continue
if (masked) {
// The mask is the number of bytes of the value, including the current byte.
var valueLength = (masked >> 14) - 1;
// If we have a legacy entity while parsing strictly, just skip the number of bytes
if (!this.allowLegacyEntity() && c !== CharCodes.Semi) {
this.trieIndex += valueLength;
}
else {
// Add 1 as we have already incremented the excess
var entityStart = this.index - this.entityExcess + 1;
if (entityStart > this.sectionStart) {
this.emitPartial(this.sectionStart, entityStart);
}
// If this is a surrogate pair, consume the next two bytes
this.entityResult = this.trieIndex;
this.trieIndex += valueLength;
this.entityExcess = 0;
this.sectionStart = this.index + 1;
if (valueLength === 0) {
this.emitNamedEntity();
}
}
}
};
Tokenizer.prototype.emitNamedEntity = function () {
this.state = this.baseState;
if (this.entityResult === 0) {
return;
}
var valueLength = (this.entityTrie[this.entityResult] & decode_js_1.BinTrieFlags.VALUE_LENGTH) >>
14;
switch (valueLength) {
case 1: {
this.emitCodePoint(this.entityTrie[this.entityResult] &
~decode_js_1.BinTrieFlags.VALUE_LENGTH);
break;
}
case 2: {
this.emitCodePoint(this.entityTrie[this.entityResult + 1]);
break;
}
case 3: {
this.emitCodePoint(this.entityTrie[this.entityResult + 1]);
this.emitCodePoint(this.entityTrie[this.entityResult + 2]);
}
}
};
Tokenizer.prototype.stateBeforeNumericEntity = function (c) {
if ((c | 0x20) === CharCodes.LowerX) {
this.entityExcess++;
this.state = State.InHexEntity;
}
else {
this.state = State.InNumericEntity;
this.stateInNumericEntity(c);
}
};
Tokenizer.prototype.emitNumericEntity = function (strict) {
var entityStart = this.index - this.entityExcess - 1;
var numberStart = entityStart + 2 + Number(this.state === State.InHexEntity);
if (numberStart !== this.index) {
// Emit leading data if any
if (entityStart > this.sectionStart) {
this.emitPartial(this.sectionStart, entityStart);
}
this.sectionStart = this.index + Number(strict);
this.emitCodePoint((0, decode_js_1.replaceCodePoint)(this.entityResult));
}
this.state = this.baseState;
};
Tokenizer.prototype.stateInNumericEntity = function (c) {
if (c === CharCodes.Semi) {
this.emitNumericEntity(true);
}
else if (isNumber(c)) {
this.entityResult = this.entityResult * 10 + (c - CharCodes.Zero);
this.entityExcess++;
}
else {
if (this.allowLegacyEntity()) {
this.emitNumericEntity(false);
}
else {
this.state = this.baseState;
}
this.index--;
}
};
Tokenizer.prototype.stateInHexEntity = function (c) {
if (c === CharCodes.Semi) {
this.emitNumericEntity(true);
}
else if (isNumber(c)) {
this.entityResult = this.entityResult * 16 + (c - CharCodes.Zero);
this.entityExcess++;
}
else if (isHexDigit(c)) {
this.entityResult =
this.entityResult * 16 + ((c | 0x20) - CharCodes.LowerA + 10);
this.entityExcess++;
}
else {
if (this.allowLegacyEntity()) {
this.emitNumericEntity(false);
}
else {
this.state = this.baseState;
}
this.index--;
}
};
Tokenizer.prototype.allowLegacyEntity = function () {
return (!this.xmlMode &&
(this.baseState === State.Text ||
this.baseState === State.InSpecialTag));
};
/**
* Remove data that has already been consumed from the buffer.
*/
Tokenizer.prototype.cleanup = function () {
// If we are inside of text or attributes, emit what we already have.
if (this.running && this.sectionStart !== this.index) {
if (this.state === State.Text ||
(this.state === State.InSpecialTag && this.sequenceIndex === 0)) {
this.cbs.ontext(this.sectionStart, this.index);
this.sectionStart = this.index;
}
else if (this.state === State.InAttributeValueDq ||
this.state === State.InAttributeValueSq ||
this.state === State.InAttributeValueNq) {
this.cbs.onattribdata(this.sectionStart, this.index);
this.sectionStart = this.index;
}
}
};
Tokenizer.prototype.shouldContinue = function () {
return this.index < this.buffer.length + this.offset && this.running;
};
/**
* Iterates through the buffer, calling the function corresponding to the current state.
*
* States that are more likely to be hit are higher up, as a performance improvement.
*/
Tokenizer.prototype.parse = function () {
while (this.shouldContinue()) {
var c = this.buffer.charCodeAt(this.index - this.offset);
switch (this.state) {
case State.Text: {
this.stateText(c);
break;
}
case State.SpecialStartSequence: {
this.stateSpecialStartSequence(c);
break;
}
case State.InSpecialTag: {
this.stateInSpecialTag(c);
break;
}
case State.CDATASequence: {
this.stateCDATASequence(c);
break;
}
case State.InAttributeValueDq: {
this.stateInAttributeValueDoubleQuotes(c);
break;
}
case State.InAttributeName: {
this.stateInAttributeName(c);
break;
}
case State.InCommentLike: {
this.stateInCommentLike(c);
break;
}
case State.InSpecialComment: {
this.stateInSpecialComment(c);
break;
}
case State.BeforeAttributeName: {
this.stateBeforeAttributeName(c);
break;
}
case State.InTagName: {
this.stateInTagName(c);
break;
}
case State.InClosingTagName: {
this.stateInClosingTagName(c);
break;
}
case State.BeforeTagName: {
this.stateBeforeTagName(c);
break;
}
case State.AfterAttributeName: {
this.stateAfterAttributeName(c);
break;
}
case State.InAttributeValueSq: {
this.stateInAttributeValueSingleQuotes(c);
break;
}
case State.BeforeAttributeValue: {
this.stateBeforeAttributeValue(c);
break;
}
case State.BeforeClosingTagName: {
this.stateBeforeClosingTagName(c);
break;
}
case State.AfterClosingTagName: {
this.stateAfterClosingTagName(c);
break;
}
case State.BeforeSpecialS: {
this.stateBeforeSpecialS(c);
break;
}
case State.InAttributeValueNq: {
this.stateInAttributeValueNoQuotes(c);
break;
}
case State.InSelfClosingTag: {
this.stateInSelfClosingTag(c);
break;
}
case State.InDeclaration: {
this.stateInDeclaration(c);
break;
}
case State.BeforeDeclaration: {
this.stateBeforeDeclaration(c);
break;
}
case State.BeforeComment: {
this.stateBeforeComment(c);
break;
}
case State.InProcessingInstruction: {
this.stateInProcessingInstruction(c);
break;
}
case State.InNamedEntity: {
this.stateInNamedEntity(c);
break;
}
case State.BeforeEntity: {
this.stateBeforeEntity(c);
break;
}
case State.InHexEntity: {
this.stateInHexEntity(c);
break;
}
case State.InNumericEntity: {
this.stateInNumericEntity(c);
break;
}
default: {
// `this._state === State.BeforeNumericEntity`
this.stateBeforeNumericEntity(c);
}
}
this.index++;
}
this.cleanup();
};
Tokenizer.prototype.finish = function () {
if (this.state === State.InNamedEntity) {
this.emitNamedEntity();
}
// If there is remaining data, emit it in a reasonable way
if (this.sectionStart < this.index) {
this.handleTrailingData();
}
this.cbs.onend();
};
/** Handle any trailing data. */
Tokenizer.prototype.handleTrailingData = function () {
var endIndex = this.buffer.length + this.offset;
if (this.state === State.InCommentLike) {
if (this.currentSequence === Sequences.CdataEnd) {
this.cbs.oncdata(this.sectionStart, endIndex, 0);
}
else {
this.cbs.oncomment(this.sectionStart, endIndex, 0);
}
}
else if (this.state === State.InNumericEntity &&
this.allowLegacyEntity()) {
this.emitNumericEntity(false);
// All trailing data will have been consumed
}
else if (this.state === State.InHexEntity &&
this.allowLegacyEntity()) {
this.emitNumericEntity(false);
// All trailing data will have been consumed
}
else if (this.state === State.InTagName ||
this.state === State.BeforeAttributeName ||
this.state === State.BeforeAttributeValue ||
this.state === State.AfterAttributeName ||
this.state === State.InAttributeName ||
this.state === State.InAttributeValueSq ||
this.state === State.InAttributeValueDq ||
this.state === State.InAttributeValueNq ||
this.state === State.InClosingTagName) {
/*
* If we are currently in an opening or closing tag, us not calling the
* respective callback signals that the tag should be ignored.
*/
}
else {
this.cbs.ontext(this.sectionStart, endIndex);
}
};
Tokenizer.prototype.emitPartial = function (start, endIndex) {
if (this.baseState !== State.Text &&
this.baseState !== State.InSpecialTag) {
this.cbs.onattribdata(start, endIndex);
}
else {
this.cbs.ontext(start, endIndex);
}
};
Tokenizer.prototype.emitCodePoint = function (cp) {
if (this.baseState !== State.Text &&
this.baseState !== State.InSpecialTag) {
this.cbs.onattribentity(cp);
}
else {
this.cbs.ontextentity(cp);
}
};
return Tokenizer;
}());
exports.default = Tokenizer;
//# sourceMappingURL=Tokenizer.js.map

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1 @@
{"version":3,"file":"WritableStream.d.ts","sourceRoot":"https://raw.githubusercontent.com/fb55/htmlparser2/c123610e003a1eaebc61febed01cabb6e41eb658/src/","sources":["WritableStream.ts"],"names":[],"mappings":";;AAAA,OAAO,EAAU,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAK7D,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAQvC;;;;GAIG;AACH,qBAAa,cAAe,SAAQ,QAAQ;IACxC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAS;IACjC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAuB;gBAEpC,GAAG,EAAE,OAAO,CAAC,OAAO,CAAC,EAAE,OAAO,CAAC,EAAE,aAAa;IAKjD,MAAM,CACX,KAAK,EAAE,MAAM,GAAG,MAAM,EACtB,QAAQ,EAAE,MAAM,EAChB,QAAQ,EAAE,MAAM,IAAI,GACrB,IAAI;IAOE,MAAM,CAAC,QAAQ,EAAE,MAAM,IAAI,GAAG,IAAI;CAI9C"}

View File

@@ -0,0 +1,54 @@
"use strict";
var __extends = (this && this.__extends) || (function () {
var extendStatics = function (d, b) {
extendStatics = Object.setPrototypeOf ||
({ __proto__: [] } instanceof Array && function (d, b) { d.__proto__ = b; }) ||
function (d, b) { for (var p in b) if (Object.prototype.hasOwnProperty.call(b, p)) d[p] = b[p]; };
return extendStatics(d, b);
};
return function (d, b) {
if (typeof b !== "function" && b !== null)
throw new TypeError("Class extends value " + String(b) + " is not a constructor or null");
extendStatics(d, b);
function __() { this.constructor = d; }
d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __());
};
})();
Object.defineProperty(exports, "__esModule", { value: true });
exports.WritableStream = void 0;
var Parser_js_1 = require("./Parser.js");
/*
* NOTE: If either of these two imports produces a type error,
* please update your @types/node dependency!
*/
var node_stream_1 = require("node:stream");
var node_string_decoder_1 = require("node:string_decoder");
// Following the example in https://nodejs.org/api/stream.html#stream_decoding_buffers_in_a_writable_stream
function isBuffer(_chunk, encoding) {
return encoding === "buffer";
}
/**
* WritableStream makes the `Parser` interface available as a NodeJS stream.
*
* @see Parser
*/
var WritableStream = /** @class */ (function (_super) {
__extends(WritableStream, _super);
function WritableStream(cbs, options) {
var _this = _super.call(this, { decodeStrings: false }) || this;
_this._decoder = new node_string_decoder_1.StringDecoder();
_this._parser = new Parser_js_1.Parser(cbs, options);
return _this;
}
WritableStream.prototype._write = function (chunk, encoding, callback) {
this._parser.write(isBuffer(chunk, encoding) ? this._decoder.write(chunk) : chunk);
callback();
};
WritableStream.prototype._final = function (callback) {
this._parser.end(this._decoder.end());
callback();
};
return WritableStream;
}(node_stream_1.Writable));
exports.WritableStream = WritableStream;
//# sourceMappingURL=WritableStream.js.map

View File

@@ -0,0 +1 @@
{"version":3,"file":"WritableStream.js","sourceRoot":"https://raw.githubusercontent.com/fb55/htmlparser2/c123610e003a1eaebc61febed01cabb6e41eb658/src/","sources":["WritableStream.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;AAAA,yCAA6D;AAC7D;;;GAGG;AACH,2CAAuC;AACvC,2DAAoD;AAEpD,2GAA2G;AAC3G,SAAS,QAAQ,CAAC,MAAuB,EAAE,QAAgB;IACvD,OAAO,QAAQ,KAAK,QAAQ,CAAC;AACjC,CAAC;AAED;;;;GAIG;AACH;IAAoC,kCAAQ;IAIxC,wBAAY,GAAqB,EAAE,OAAuB;QAA1D,YACI,kBAAM,EAAE,aAAa,EAAE,KAAK,EAAE,CAAC,SAElC;QALgB,cAAQ,GAAG,IAAI,mCAAa,EAAE,CAAC;QAI5C,KAAI,CAAC,OAAO,GAAG,IAAI,kBAAM,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;;IAC5C,CAAC;IAEQ,+BAAM,GAAf,UACI,KAAsB,EACtB,QAAgB,EAChB,QAAoB;QAEpB,IAAI,CAAC,OAAO,CAAC,KAAK,CACd,QAAQ,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CACjE,CAAC;QACF,QAAQ,EAAE,CAAC;IACf,CAAC;IAEQ,+BAAM,GAAf,UAAgB,QAAoB;QAChC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,QAAQ,CAAC,GAAG,EAAE,CAAC,CAAC;QACtC,QAAQ,EAAE,CAAC;IACf,CAAC;IACL,qBAAC;AAAD,CAAC,AAxBD,CAAoC,sBAAQ,GAwB3C;AAxBY,wCAAc"}

View File

@@ -0,0 +1 @@
{"version":3,"file":"Parser.d.ts","sourceRoot":"https://raw.githubusercontent.com/fb55/htmlparser2/c123610e003a1eaebc61febed01cabb6e41eb658/src/","sources":["Parser.ts"],"names":[],"mappings":"AAAA,OAAO,SAAS,EAAE,EAAE,SAAS,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAuGjE,MAAM,WAAW,aAAa;IAC1B;;;;;;;OAOG;IACH,OAAO,CAAC,EAAE,OAAO,CAAC;IAElB;;;;OAIG;IACH,cAAc,CAAC,EAAE,OAAO,CAAC;IAEzB;;;;OAIG;IACH,aAAa,CAAC,EAAE,OAAO,CAAC;IAExB;;;;OAIG;IACH,uBAAuB,CAAC,EAAE,OAAO,CAAC;IAElC;;;;;OAKG;IACH,cAAc,CAAC,EAAE,OAAO,CAAC;IAEzB;;;;;OAKG;IACH,oBAAoB,CAAC,EAAE,OAAO,CAAC;IAE/B;;OAEG;IACH,SAAS,CAAC,EAAE,OAAO,SAAS,CAAC;CAChC;AAED,MAAM,WAAW,OAAO;IACpB,YAAY,CAAC,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IAEnC;;OAEG;IACH,OAAO,IAAI,IAAI,CAAC;IAEhB;;OAEG;IACH,KAAK,IAAI,IAAI,CAAC;IACd,OAAO,CAAC,KAAK,EAAE,KAAK,GAAG,IAAI,CAAC;IAC5B,UAAU,CAAC,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,OAAO,GAAG,IAAI,CAAC;IACnD,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IAClC;;;;;OAKG;IACH,WAAW,CACP,IAAI,EAAE,MAAM,EACZ,KAAK,EAAE,MAAM,EACb,KAAK,CAAC,EAAE,MAAM,GAAG,SAAS,GAAG,IAAI,GAClC,IAAI,CAAC;IACR,SAAS,CACL,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE;QAAE,CAAC,CAAC,EAAE,MAAM,GAAG,MAAM,CAAA;KAAE,EAChC,SAAS,EAAE,OAAO,GACnB,IAAI,CAAC;IACR,MAAM,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,SAAS,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IAC9B,YAAY,IAAI,IAAI,CAAC;IACrB,UAAU,IAAI,IAAI,CAAC;IACnB,YAAY,IAAI,IAAI,CAAC;IACrB,uBAAuB,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;CAC7D;AAID,qBAAa,MAAO,YAAW,SAAS;IA+BhC,OAAO,CAAC,QAAQ,CAAC,OAAO;IA9B5B,yCAAyC;IAClC,UAAU,SAAK;IACtB,uCAAuC;IAChC,QAAQ,SAAK;IACpB;;;OAGG;IACH,OAAO,CAAC,YAAY,CAAK;IAEzB,OAAO,CAAC,OAAO,CAAM;IACrB,OAAO,CAAC,UAAU,CAAM;IACxB,OAAO,CAAC,WAAW,CAAM;IACzB,OAAO,CAAC,OAAO,CAA0C;IACzD,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAgB;IACtC,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAiB;IAChD,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAmB;IACvC,OAAO,CAAC,QAAQ,CAAC,iBAAiB,CAAU;IAC5C,OAAO,CAAC,QAAQ,CAAC,uBAAuB,CAAU;IAClD,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAY;IAEtC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAgB;IACxC,OAAO,CAAC,YAAY,CAAK;IACzB,kFAAkF;IAClF,OAAO,CAAC,UAAU,CAAK;IACvB,kFAAkF;IAClF,OAAO,CAAC,KAAK,CAAS;gBAGlB,GAAG,CAAC,EAAE,OAAO,CAAC,OAAO,CAAC,GAAG,IAAI,EACZ,OAAO,GAAE,aAAkB;IAehD,gBAAgB;IAChB,MAAM,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,IAAI;IAO7C,gBAAgB;IAChB,YAAY,CAAC,EAAE,EAAE,MAAM,GAAG,IAAI;IAW9B,SAAS,CAAC,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO;IAI9C,gBAAgB;IAChB,aAAa,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,IAAI;IAYpD,OAAO,CAAC,WAAW;IA4BnB,OAAO,CAAC,UAAU;IAclB,gBAAgB;IAChB,YAAY,CAAC,QAAQ,EAAE,MAAM,GAAG,IAAI;IAQpC,gBAAgB;IAChB,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,IAAI;IA0CjD,gBAAgB;IAChB,gBAAgB,CAAC,QAAQ,EAAE,MAAM,GAAG,IAAI;IAiBxC,OAAO,CAAC,eAAe;IAYvB,gBAAgB;IAChB,YAAY,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,IAAI;IASnD,gBAAgB;IAChB,YAAY,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,IAAI;IAInD,gBAAgB;IAChB,cAAc,CAAC,EAAE,EAAE,MAAM,GAAG,IAAI;IAIhC,gBAAgB;IAChB,WAAW,CAAC,KAAK,EAAE,SAAS,EAAE,QAAQ,EAAE,MAAM,GAAG,IAAI;IAwBrD,OAAO,CAAC,kBAAkB;IAW1B,gBAAgB;IAChB,aAAa,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,IAAI;IAapD,gBAAgB;IAChB,uBAAuB,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,IAAI;IAa9D,gBAAgB;IAChB,SAAS,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,IAAI;IAUhE,gBAAgB;IAChB,OAAO,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,IAAI;IAiB9D,gBAAgB;IAChB,KAAK,IAAI,IAAI;IAab;;OAEG;IACI,KAAK,IAAI,IAAI;IAgBpB;;;;;OAKG;IACI,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI;IAKxC,OAAO,CAAC,QAAQ;IAkBhB,OAAO,CAAC,WAAW;IAMnB;;;;OAIG;IACI,KAAK,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI;IAajC;;;;OAIG;IACI,GAAG,CAAC,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI;IAWhC;;OAEG;IACI,KAAK,IAAI,IAAI;IAIpB;;OAEG;IACI,MAAM,IAAI,IAAI;IAarB;;;;;OAKG;IACI,UAAU,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI;IAGtC;;;;;OAKG;IACI,IAAI,CAAC,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI;CAGpC"}

View File

@@ -0,0 +1,489 @@
import Tokenizer, { QuoteType } from "./Tokenizer.js";
import { fromCodePoint } from "entities/lib/decode.js";
const formTags = new Set([
"input",
"option",
"optgroup",
"select",
"button",
"datalist",
"textarea",
]);
const pTag = new Set(["p"]);
const tableSectionTags = new Set(["thead", "tbody"]);
const ddtTags = new Set(["dd", "dt"]);
const rtpTags = new Set(["rt", "rp"]);
const openImpliesClose = new Map([
["tr", new Set(["tr", "th", "td"])],
["th", new Set(["th"])],
["td", new Set(["thead", "th", "td"])],
["body", new Set(["head", "link", "script"])],
["li", new Set(["li"])],
["p", pTag],
["h1", pTag],
["h2", pTag],
["h3", pTag],
["h4", pTag],
["h5", pTag],
["h6", pTag],
["select", formTags],
["input", formTags],
["output", formTags],
["button", formTags],
["datalist", formTags],
["textarea", formTags],
["option", new Set(["option"])],
["optgroup", new Set(["optgroup", "option"])],
["dd", ddtTags],
["dt", ddtTags],
["address", pTag],
["article", pTag],
["aside", pTag],
["blockquote", pTag],
["details", pTag],
["div", pTag],
["dl", pTag],
["fieldset", pTag],
["figcaption", pTag],
["figure", pTag],
["footer", pTag],
["form", pTag],
["header", pTag],
["hr", pTag],
["main", pTag],
["nav", pTag],
["ol", pTag],
["pre", pTag],
["section", pTag],
["table", pTag],
["ul", pTag],
["rt", rtpTags],
["rp", rtpTags],
["tbody", tableSectionTags],
["tfoot", tableSectionTags],
]);
const voidElements = new Set([
"area",
"base",
"basefont",
"br",
"col",
"command",
"embed",
"frame",
"hr",
"img",
"input",
"isindex",
"keygen",
"link",
"meta",
"param",
"source",
"track",
"wbr",
]);
const foreignContextElements = new Set(["math", "svg"]);
const htmlIntegrationElements = new Set([
"mi",
"mo",
"mn",
"ms",
"mtext",
"annotation-xml",
"foreignobject",
"desc",
"title",
]);
const reNameEnd = /\s|\//;
export class Parser {
constructor(cbs, options = {}) {
var _a, _b, _c, _d, _e;
this.options = options;
/** The start index of the last event. */
this.startIndex = 0;
/** The end index of the last event. */
this.endIndex = 0;
/**
* Store the start index of the current open tag,
* so we can update the start index for attributes.
*/
this.openTagStart = 0;
this.tagname = "";
this.attribname = "";
this.attribvalue = "";
this.attribs = null;
this.stack = [];
this.foreignContext = [];
this.buffers = [];
this.bufferOffset = 0;
/** The index of the last written buffer. Used when resuming after a `pause()`. */
this.writeIndex = 0;
/** Indicates whether the parser has finished running / `.end` has been called. */
this.ended = false;
this.cbs = cbs !== null && cbs !== void 0 ? cbs : {};
this.lowerCaseTagNames = (_a = options.lowerCaseTags) !== null && _a !== void 0 ? _a : !options.xmlMode;
this.lowerCaseAttributeNames =
(_b = options.lowerCaseAttributeNames) !== null && _b !== void 0 ? _b : !options.xmlMode;
this.tokenizer = new ((_c = options.Tokenizer) !== null && _c !== void 0 ? _c : Tokenizer)(this.options, this);
(_e = (_d = this.cbs).onparserinit) === null || _e === void 0 ? void 0 : _e.call(_d, this);
}
// Tokenizer event handlers
/** @internal */
ontext(start, endIndex) {
var _a, _b;
const data = this.getSlice(start, endIndex);
this.endIndex = endIndex - 1;
(_b = (_a = this.cbs).ontext) === null || _b === void 0 ? void 0 : _b.call(_a, data);
this.startIndex = endIndex;
}
/** @internal */
ontextentity(cp) {
var _a, _b;
/*
* Entities can be emitted on the character, or directly after.
* We use the section start here to get accurate indices.
*/
const index = this.tokenizer.getSectionStart();
this.endIndex = index - 1;
(_b = (_a = this.cbs).ontext) === null || _b === void 0 ? void 0 : _b.call(_a, fromCodePoint(cp));
this.startIndex = index;
}
isVoidElement(name) {
return !this.options.xmlMode && voidElements.has(name);
}
/** @internal */
onopentagname(start, endIndex) {
this.endIndex = endIndex;
let name = this.getSlice(start, endIndex);
if (this.lowerCaseTagNames) {
name = name.toLowerCase();
}
this.emitOpenTag(name);
}
emitOpenTag(name) {
var _a, _b, _c, _d;
this.openTagStart = this.startIndex;
this.tagname = name;
const impliesClose = !this.options.xmlMode && openImpliesClose.get(name);
if (impliesClose) {
while (this.stack.length > 0 &&
impliesClose.has(this.stack[this.stack.length - 1])) {
const element = this.stack.pop();
(_b = (_a = this.cbs).onclosetag) === null || _b === void 0 ? void 0 : _b.call(_a, element, true);
}
}
if (!this.isVoidElement(name)) {
this.stack.push(name);
if (foreignContextElements.has(name)) {
this.foreignContext.push(true);
}
else if (htmlIntegrationElements.has(name)) {
this.foreignContext.push(false);
}
}
(_d = (_c = this.cbs).onopentagname) === null || _d === void 0 ? void 0 : _d.call(_c, name);
if (this.cbs.onopentag)
this.attribs = {};
}
endOpenTag(isImplied) {
var _a, _b;
this.startIndex = this.openTagStart;
if (this.attribs) {
(_b = (_a = this.cbs).onopentag) === null || _b === void 0 ? void 0 : _b.call(_a, this.tagname, this.attribs, isImplied);
this.attribs = null;
}
if (this.cbs.onclosetag && this.isVoidElement(this.tagname)) {
this.cbs.onclosetag(this.tagname, true);
}
this.tagname = "";
}
/** @internal */
onopentagend(endIndex) {
this.endIndex = endIndex;
this.endOpenTag(false);
// Set `startIndex` for next node
this.startIndex = endIndex + 1;
}
/** @internal */
onclosetag(start, endIndex) {
var _a, _b, _c, _d, _e, _f;
this.endIndex = endIndex;
let name = this.getSlice(start, endIndex);
if (this.lowerCaseTagNames) {
name = name.toLowerCase();
}
if (foreignContextElements.has(name) ||
htmlIntegrationElements.has(name)) {
this.foreignContext.pop();
}
if (!this.isVoidElement(name)) {
const pos = this.stack.lastIndexOf(name);
if (pos !== -1) {
if (this.cbs.onclosetag) {
let count = this.stack.length - pos;
while (count--) {
// We know the stack has sufficient elements.
this.cbs.onclosetag(this.stack.pop(), count !== 0);
}
}
else
this.stack.length = pos;
}
else if (!this.options.xmlMode && name === "p") {
// Implicit open before close
this.emitOpenTag("p");
this.closeCurrentTag(true);
}
}
else if (!this.options.xmlMode && name === "br") {
// We can't use `emitOpenTag` for implicit open, as `br` would be implicitly closed.
(_b = (_a = this.cbs).onopentagname) === null || _b === void 0 ? void 0 : _b.call(_a, "br");
(_d = (_c = this.cbs).onopentag) === null || _d === void 0 ? void 0 : _d.call(_c, "br", {}, true);
(_f = (_e = this.cbs).onclosetag) === null || _f === void 0 ? void 0 : _f.call(_e, "br", false);
}
// Set `startIndex` for next node
this.startIndex = endIndex + 1;
}
/** @internal */
onselfclosingtag(endIndex) {
this.endIndex = endIndex;
if (this.options.xmlMode ||
this.options.recognizeSelfClosing ||
this.foreignContext[this.foreignContext.length - 1]) {
this.closeCurrentTag(false);
// Set `startIndex` for next node
this.startIndex = endIndex + 1;
}
else {
// Ignore the fact that the tag is self-closing.
this.onopentagend(endIndex);
}
}
closeCurrentTag(isOpenImplied) {
var _a, _b;
const name = this.tagname;
this.endOpenTag(isOpenImplied);
// Self-closing tags will be on the top of the stack
if (this.stack[this.stack.length - 1] === name) {
// If the opening tag isn't implied, the closing tag has to be implied.
(_b = (_a = this.cbs).onclosetag) === null || _b === void 0 ? void 0 : _b.call(_a, name, !isOpenImplied);
this.stack.pop();
}
}
/** @internal */
onattribname(start, endIndex) {
this.startIndex = start;
const name = this.getSlice(start, endIndex);
this.attribname = this.lowerCaseAttributeNames
? name.toLowerCase()
: name;
}
/** @internal */
onattribdata(start, endIndex) {
this.attribvalue += this.getSlice(start, endIndex);
}
/** @internal */
onattribentity(cp) {
this.attribvalue += fromCodePoint(cp);
}
/** @internal */
onattribend(quote, endIndex) {
var _a, _b;
this.endIndex = endIndex;
(_b = (_a = this.cbs).onattribute) === null || _b === void 0 ? void 0 : _b.call(_a, this.attribname, this.attribvalue, quote === QuoteType.Double
? '"'
: quote === QuoteType.Single
? "'"
: quote === QuoteType.NoValue
? undefined
: null);
if (this.attribs &&
!Object.prototype.hasOwnProperty.call(this.attribs, this.attribname)) {
this.attribs[this.attribname] = this.attribvalue;
}
this.attribvalue = "";
}
getInstructionName(value) {
const index = value.search(reNameEnd);
let name = index < 0 ? value : value.substr(0, index);
if (this.lowerCaseTagNames) {
name = name.toLowerCase();
}
return name;
}
/** @internal */
ondeclaration(start, endIndex) {
this.endIndex = endIndex;
const value = this.getSlice(start, endIndex);
if (this.cbs.onprocessinginstruction) {
const name = this.getInstructionName(value);
this.cbs.onprocessinginstruction(`!${name}`, `!${value}`);
}
// Set `startIndex` for next node
this.startIndex = endIndex + 1;
}
/** @internal */
onprocessinginstruction(start, endIndex) {
this.endIndex = endIndex;
const value = this.getSlice(start, endIndex);
if (this.cbs.onprocessinginstruction) {
const name = this.getInstructionName(value);
this.cbs.onprocessinginstruction(`?${name}`, `?${value}`);
}
// Set `startIndex` for next node
this.startIndex = endIndex + 1;
}
/** @internal */
oncomment(start, endIndex, offset) {
var _a, _b, _c, _d;
this.endIndex = endIndex;
(_b = (_a = this.cbs).oncomment) === null || _b === void 0 ? void 0 : _b.call(_a, this.getSlice(start, endIndex - offset));
(_d = (_c = this.cbs).oncommentend) === null || _d === void 0 ? void 0 : _d.call(_c);
// Set `startIndex` for next node
this.startIndex = endIndex + 1;
}
/** @internal */
oncdata(start, endIndex, offset) {
var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k;
this.endIndex = endIndex;
const value = this.getSlice(start, endIndex - offset);
if (this.options.xmlMode || this.options.recognizeCDATA) {
(_b = (_a = this.cbs).oncdatastart) === null || _b === void 0 ? void 0 : _b.call(_a);
(_d = (_c = this.cbs).ontext) === null || _d === void 0 ? void 0 : _d.call(_c, value);
(_f = (_e = this.cbs).oncdataend) === null || _f === void 0 ? void 0 : _f.call(_e);
}
else {
(_h = (_g = this.cbs).oncomment) === null || _h === void 0 ? void 0 : _h.call(_g, `[CDATA[${value}]]`);
(_k = (_j = this.cbs).oncommentend) === null || _k === void 0 ? void 0 : _k.call(_j);
}
// Set `startIndex` for next node
this.startIndex = endIndex + 1;
}
/** @internal */
onend() {
var _a, _b;
if (this.cbs.onclosetag) {
// Set the end index for all remaining tags
this.endIndex = this.startIndex;
for (let index = this.stack.length; index > 0; this.cbs.onclosetag(this.stack[--index], true))
;
}
(_b = (_a = this.cbs).onend) === null || _b === void 0 ? void 0 : _b.call(_a);
}
/**
* Resets the parser to a blank state, ready to parse a new HTML document
*/
reset() {
var _a, _b, _c, _d;
(_b = (_a = this.cbs).onreset) === null || _b === void 0 ? void 0 : _b.call(_a);
this.tokenizer.reset();
this.tagname = "";
this.attribname = "";
this.attribs = null;
this.stack.length = 0;
this.startIndex = 0;
this.endIndex = 0;
(_d = (_c = this.cbs).onparserinit) === null || _d === void 0 ? void 0 : _d.call(_c, this);
this.buffers.length = 0;
this.bufferOffset = 0;
this.writeIndex = 0;
this.ended = false;
}
/**
* Resets the parser, then parses a complete document and
* pushes it to the handler.
*
* @param data Document to parse.
*/
parseComplete(data) {
this.reset();
this.end(data);
}
getSlice(start, end) {
while (start - this.bufferOffset >= this.buffers[0].length) {
this.shiftBuffer();
}
let slice = this.buffers[0].slice(start - this.bufferOffset, end - this.bufferOffset);
while (end - this.bufferOffset > this.buffers[0].length) {
this.shiftBuffer();
slice += this.buffers[0].slice(0, end - this.bufferOffset);
}
return slice;
}
shiftBuffer() {
this.bufferOffset += this.buffers[0].length;
this.writeIndex--;
this.buffers.shift();
}
/**
* Parses a chunk of data and calls the corresponding callbacks.
*
* @param chunk Chunk to parse.
*/
write(chunk) {
var _a, _b;
if (this.ended) {
(_b = (_a = this.cbs).onerror) === null || _b === void 0 ? void 0 : _b.call(_a, new Error(".write() after done!"));
return;
}
this.buffers.push(chunk);
if (this.tokenizer.running) {
this.tokenizer.write(chunk);
this.writeIndex++;
}
}
/**
* Parses the end of the buffer and clears the stack, calls onend.
*
* @param chunk Optional final chunk to parse.
*/
end(chunk) {
var _a, _b;
if (this.ended) {
(_b = (_a = this.cbs).onerror) === null || _b === void 0 ? void 0 : _b.call(_a, new Error(".end() after done!"));
return;
}
if (chunk)
this.write(chunk);
this.ended = true;
this.tokenizer.end();
}
/**
* Pauses parsing. The parser won't emit events until `resume` is called.
*/
pause() {
this.tokenizer.pause();
}
/**
* Resumes parsing after `pause` was called.
*/
resume() {
this.tokenizer.resume();
while (this.tokenizer.running &&
this.writeIndex < this.buffers.length) {
this.tokenizer.write(this.buffers[this.writeIndex++]);
}
if (this.ended)
this.tokenizer.end();
}
/**
* Alias of `write`, for backwards compatibility.
*
* @param chunk Chunk to parse.
* @deprecated
*/
parseChunk(chunk) {
this.write(chunk);
}
/**
* Alias of `end`, for backwards compatibility.
*
* @param chunk Optional final chunk to parse.
* @deprecated
*/
done(chunk) {
this.end(chunk);
}
}
//# sourceMappingURL=Parser.js.map

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1 @@
{"version":3,"file":"Tokenizer.d.ts","sourceRoot":"https://raw.githubusercontent.com/fb55/htmlparser2/c123610e003a1eaebc61febed01cabb6e41eb658/src/","sources":["Tokenizer.ts"],"names":[],"mappings":"AAkHA,oBAAY,SAAS;IACjB,OAAO,IAAI;IACX,QAAQ,IAAI;IACZ,MAAM,IAAI;IACV,MAAM,IAAI;CACb;AAED,MAAM,WAAW,SAAS;IACtB,YAAY,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACpD,cAAc,CAAC,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;IACxC,WAAW,CAAC,KAAK,EAAE,SAAS,EAAE,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACtD,YAAY,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACpD,OAAO,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;IAClE,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IAClD,SAAS,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;IACpE,aAAa,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACrD,KAAK,IAAI,IAAI,CAAC;IACd,YAAY,CAAC,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACrC,aAAa,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACrD,uBAAuB,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IAC/D,gBAAgB,CAAC,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACzC,MAAM,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IAC9C,YAAY,CAAC,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;CACzC;AAiBD,MAAM,CAAC,OAAO,OAAO,SAAS;IA2BtB,OAAO,CAAC,QAAQ,CAAC,GAAG;IA1BxB,6CAA6C;IAC7C,OAAO,CAAC,KAAK,CAAc;IAC3B,uBAAuB;IACvB,OAAO,CAAC,MAAM,CAAM;IACpB,iEAAiE;IACjE,OAAO,CAAC,YAAY,CAAK;IACzB,oEAAoE;IACpE,OAAO,CAAC,KAAK,CAAK;IAClB,kIAAkI;IAClI,OAAO,CAAC,SAAS,CAAc;IAC/B,oEAAoE;IACpE,OAAO,CAAC,SAAS,CAAS;IAC1B,uDAAuD;IAChD,OAAO,UAAQ;IACtB,wCAAwC;IACxC,OAAO,CAAC,MAAM,CAAK;IAEnB,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAU;IAClC,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAU;IACzC,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAc;gBAGrC,EACI,OAAe,EACf,cAAqB,GACxB,EAAE;QAAE,OAAO,CAAC,EAAE,OAAO,CAAC;QAAC,cAAc,CAAC,EAAE,OAAO,CAAA;KAAE,EACjC,GAAG,EAAE,SAAS;IAO5B,KAAK,IAAI,IAAI;IAWb,KAAK,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI;IAM1B,GAAG,IAAI,IAAI;IAIX,KAAK,IAAI,IAAI;IAIb,MAAM,IAAI,IAAI;IAOrB;;OAEG;IACI,QAAQ,IAAI,MAAM;IAIzB;;OAEG;IACI,eAAe,IAAI,MAAM;IAIhC,OAAO,CAAC,SAAS;IAejB,OAAO,CAAC,eAAe,CAA0B;IACjD,OAAO,CAAC,aAAa,CAAK;IAC1B,OAAO,CAAC,yBAAyB;IAoBjC,mEAAmE;IACnE,OAAO,CAAC,iBAAiB;IAwCzB,OAAO,CAAC,kBAAkB;IAe1B;;;;;OAKG;IACH,OAAO,CAAC,aAAa;IAkBrB;;;;;;;OAOG;IACH,OAAO,CAAC,kBAAkB;IAwB1B;;;;;OAKG;IACH,OAAO,CAAC,cAAc;IAItB,OAAO,CAAC,YAAY;IAOpB,OAAO,CAAC,kBAAkB;IAyB1B,OAAO,CAAC,cAAc;IAQtB,OAAO,CAAC,yBAAyB;IAYjC,OAAO,CAAC,qBAAqB;IAQ7B,OAAO,CAAC,wBAAwB;IAQhC,OAAO,CAAC,wBAAwB;IAkBhC,OAAO,CAAC,qBAAqB;IAY7B,OAAO,CAAC,oBAAoB;IAQ5B,OAAO,CAAC,uBAAuB;IAa/B,OAAO,CAAC,yBAAyB;IAajC,OAAO,CAAC,sBAAsB;IAmB9B,OAAO,CAAC,iCAAiC;IAGzC,OAAO,CAAC,iCAAiC;IAGzC,OAAO,CAAC,6BAA6B;IAYrC,OAAO,CAAC,sBAAsB;IAW9B,OAAO,CAAC,kBAAkB;IAO1B,OAAO,CAAC,4BAA4B;IAOpC,OAAO,CAAC,kBAAkB;IAW1B,OAAO,CAAC,qBAAqB;IAO7B,OAAO,CAAC,mBAAmB;IAY3B,OAAO,CAAC,SAAS,CAAK;IACtB,OAAO,CAAC,WAAW,CAAK;IACxB,wFAAwF;IACxF,OAAO,CAAC,YAAY,CAAK;IACzB,OAAO,CAAC,YAAY,CAAK;IAEzB,OAAO,CAAC,iBAAiB;IAiBzB,OAAO,CAAC,kBAAkB;IAiD1B,OAAO,CAAC,eAAe;IA8BvB,OAAO,CAAC,wBAAwB;IAUhC,OAAO,CAAC,iBAAiB;IAgBzB,OAAO,CAAC,oBAAoB;IAe5B,OAAO,CAAC,gBAAgB;IAoBxB,OAAO,CAAC,iBAAiB;IAQzB;;OAEG;IACH,OAAO,CAAC,OAAO;IAoBf,OAAO,CAAC,cAAc;IAItB;;;;OAIG;IACH,OAAO,CAAC,KAAK;IA8Hb,OAAO,CAAC,MAAM;IAYd,gCAAgC;IAChC,OAAO,CAAC,kBAAkB;IAwC1B,OAAO,CAAC,WAAW;IAUnB,OAAO,CAAC,aAAa;CAUxB"}

View File

@@ -0,0 +1,932 @@
import { htmlDecodeTree, xmlDecodeTree, BinTrieFlags, determineBranch, replaceCodePoint, } from "entities/lib/decode.js";
var CharCodes;
(function (CharCodes) {
CharCodes[CharCodes["Tab"] = 9] = "Tab";
CharCodes[CharCodes["NewLine"] = 10] = "NewLine";
CharCodes[CharCodes["FormFeed"] = 12] = "FormFeed";
CharCodes[CharCodes["CarriageReturn"] = 13] = "CarriageReturn";
CharCodes[CharCodes["Space"] = 32] = "Space";
CharCodes[CharCodes["ExclamationMark"] = 33] = "ExclamationMark";
CharCodes[CharCodes["Number"] = 35] = "Number";
CharCodes[CharCodes["Amp"] = 38] = "Amp";
CharCodes[CharCodes["SingleQuote"] = 39] = "SingleQuote";
CharCodes[CharCodes["DoubleQuote"] = 34] = "DoubleQuote";
CharCodes[CharCodes["Dash"] = 45] = "Dash";
CharCodes[CharCodes["Slash"] = 47] = "Slash";
CharCodes[CharCodes["Zero"] = 48] = "Zero";
CharCodes[CharCodes["Nine"] = 57] = "Nine";
CharCodes[CharCodes["Semi"] = 59] = "Semi";
CharCodes[CharCodes["Lt"] = 60] = "Lt";
CharCodes[CharCodes["Eq"] = 61] = "Eq";
CharCodes[CharCodes["Gt"] = 62] = "Gt";
CharCodes[CharCodes["Questionmark"] = 63] = "Questionmark";
CharCodes[CharCodes["UpperA"] = 65] = "UpperA";
CharCodes[CharCodes["LowerA"] = 97] = "LowerA";
CharCodes[CharCodes["UpperF"] = 70] = "UpperF";
CharCodes[CharCodes["LowerF"] = 102] = "LowerF";
CharCodes[CharCodes["UpperZ"] = 90] = "UpperZ";
CharCodes[CharCodes["LowerZ"] = 122] = "LowerZ";
CharCodes[CharCodes["LowerX"] = 120] = "LowerX";
CharCodes[CharCodes["OpeningSquareBracket"] = 91] = "OpeningSquareBracket";
})(CharCodes || (CharCodes = {}));
/** All the states the tokenizer can be in. */
var State;
(function (State) {
State[State["Text"] = 1] = "Text";
State[State["BeforeTagName"] = 2] = "BeforeTagName";
State[State["InTagName"] = 3] = "InTagName";
State[State["InSelfClosingTag"] = 4] = "InSelfClosingTag";
State[State["BeforeClosingTagName"] = 5] = "BeforeClosingTagName";
State[State["InClosingTagName"] = 6] = "InClosingTagName";
State[State["AfterClosingTagName"] = 7] = "AfterClosingTagName";
// Attributes
State[State["BeforeAttributeName"] = 8] = "BeforeAttributeName";
State[State["InAttributeName"] = 9] = "InAttributeName";
State[State["AfterAttributeName"] = 10] = "AfterAttributeName";
State[State["BeforeAttributeValue"] = 11] = "BeforeAttributeValue";
State[State["InAttributeValueDq"] = 12] = "InAttributeValueDq";
State[State["InAttributeValueSq"] = 13] = "InAttributeValueSq";
State[State["InAttributeValueNq"] = 14] = "InAttributeValueNq";
// Declarations
State[State["BeforeDeclaration"] = 15] = "BeforeDeclaration";
State[State["InDeclaration"] = 16] = "InDeclaration";
// Processing instructions
State[State["InProcessingInstruction"] = 17] = "InProcessingInstruction";
// Comments & CDATA
State[State["BeforeComment"] = 18] = "BeforeComment";
State[State["CDATASequence"] = 19] = "CDATASequence";
State[State["InSpecialComment"] = 20] = "InSpecialComment";
State[State["InCommentLike"] = 21] = "InCommentLike";
// Special tags
State[State["BeforeSpecialS"] = 22] = "BeforeSpecialS";
State[State["SpecialStartSequence"] = 23] = "SpecialStartSequence";
State[State["InSpecialTag"] = 24] = "InSpecialTag";
State[State["BeforeEntity"] = 25] = "BeforeEntity";
State[State["BeforeNumericEntity"] = 26] = "BeforeNumericEntity";
State[State["InNamedEntity"] = 27] = "InNamedEntity";
State[State["InNumericEntity"] = 28] = "InNumericEntity";
State[State["InHexEntity"] = 29] = "InHexEntity";
})(State || (State = {}));
function isWhitespace(c) {
return (c === CharCodes.Space ||
c === CharCodes.NewLine ||
c === CharCodes.Tab ||
c === CharCodes.FormFeed ||
c === CharCodes.CarriageReturn);
}
function isEndOfTagSection(c) {
return c === CharCodes.Slash || c === CharCodes.Gt || isWhitespace(c);
}
function isNumber(c) {
return c >= CharCodes.Zero && c <= CharCodes.Nine;
}
function isASCIIAlpha(c) {
return ((c >= CharCodes.LowerA && c <= CharCodes.LowerZ) ||
(c >= CharCodes.UpperA && c <= CharCodes.UpperZ));
}
function isHexDigit(c) {
return ((c >= CharCodes.UpperA && c <= CharCodes.UpperF) ||
(c >= CharCodes.LowerA && c <= CharCodes.LowerF));
}
export var QuoteType;
(function (QuoteType) {
QuoteType[QuoteType["NoValue"] = 0] = "NoValue";
QuoteType[QuoteType["Unquoted"] = 1] = "Unquoted";
QuoteType[QuoteType["Single"] = 2] = "Single";
QuoteType[QuoteType["Double"] = 3] = "Double";
})(QuoteType || (QuoteType = {}));
/**
* Sequences used to match longer strings.
*
* We don't have `Script`, `Style`, or `Title` here. Instead, we re-use the *End
* sequences with an increased offset.
*/
const Sequences = {
Cdata: new Uint8Array([0x43, 0x44, 0x41, 0x54, 0x41, 0x5b]),
CdataEnd: new Uint8Array([0x5d, 0x5d, 0x3e]),
CommentEnd: new Uint8Array([0x2d, 0x2d, 0x3e]),
ScriptEnd: new Uint8Array([0x3c, 0x2f, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74]),
StyleEnd: new Uint8Array([0x3c, 0x2f, 0x73, 0x74, 0x79, 0x6c, 0x65]),
TitleEnd: new Uint8Array([0x3c, 0x2f, 0x74, 0x69, 0x74, 0x6c, 0x65]), // `</title`
};
export default class Tokenizer {
constructor({ xmlMode = false, decodeEntities = true, }, cbs) {
this.cbs = cbs;
/** The current state the tokenizer is in. */
this.state = State.Text;
/** The read buffer. */
this.buffer = "";
/** The beginning of the section that is currently being read. */
this.sectionStart = 0;
/** The index within the buffer that we are currently looking at. */
this.index = 0;
/** Some behavior, eg. when decoding entities, is done while we are in another state. This keeps track of the other state type. */
this.baseState = State.Text;
/** For special parsing behavior inside of script and style tags. */
this.isSpecial = false;
/** Indicates whether the tokenizer has been paused. */
this.running = true;
/** The offset of the current buffer. */
this.offset = 0;
this.currentSequence = undefined;
this.sequenceIndex = 0;
this.trieIndex = 0;
this.trieCurrent = 0;
/** For named entities, the index of the value. For numeric entities, the code point. */
this.entityResult = 0;
this.entityExcess = 0;
this.xmlMode = xmlMode;
this.decodeEntities = decodeEntities;
this.entityTrie = xmlMode ? xmlDecodeTree : htmlDecodeTree;
}
reset() {
this.state = State.Text;
this.buffer = "";
this.sectionStart = 0;
this.index = 0;
this.baseState = State.Text;
this.currentSequence = undefined;
this.running = true;
this.offset = 0;
}
write(chunk) {
this.offset += this.buffer.length;
this.buffer = chunk;
this.parse();
}
end() {
if (this.running)
this.finish();
}
pause() {
this.running = false;
}
resume() {
this.running = true;
if (this.index < this.buffer.length + this.offset) {
this.parse();
}
}
/**
* The current index within all of the written data.
*/
getIndex() {
return this.index;
}
/**
* The start of the current section.
*/
getSectionStart() {
return this.sectionStart;
}
stateText(c) {
if (c === CharCodes.Lt ||
(!this.decodeEntities && this.fastForwardTo(CharCodes.Lt))) {
if (this.index > this.sectionStart) {
this.cbs.ontext(this.sectionStart, this.index);
}
this.state = State.BeforeTagName;
this.sectionStart = this.index;
}
else if (this.decodeEntities && c === CharCodes.Amp) {
this.state = State.BeforeEntity;
}
}
stateSpecialStartSequence(c) {
const isEnd = this.sequenceIndex === this.currentSequence.length;
const isMatch = isEnd
? // If we are at the end of the sequence, make sure the tag name has ended
isEndOfTagSection(c)
: // Otherwise, do a case-insensitive comparison
(c | 0x20) === this.currentSequence[this.sequenceIndex];
if (!isMatch) {
this.isSpecial = false;
}
else if (!isEnd) {
this.sequenceIndex++;
return;
}
this.sequenceIndex = 0;
this.state = State.InTagName;
this.stateInTagName(c);
}
/** Look for an end tag. For <title> tags, also decode entities. */
stateInSpecialTag(c) {
if (this.sequenceIndex === this.currentSequence.length) {
if (c === CharCodes.Gt || isWhitespace(c)) {
const endOfText = this.index - this.currentSequence.length;
if (this.sectionStart < endOfText) {
// Spoof the index so that reported locations match up.
const actualIndex = this.index;
this.index = endOfText;
this.cbs.ontext(this.sectionStart, endOfText);
this.index = actualIndex;
}
this.isSpecial = false;
this.sectionStart = endOfText + 2; // Skip over the `</`
this.stateInClosingTagName(c);
return; // We are done; skip the rest of the function.
}
this.sequenceIndex = 0;
}
if ((c | 0x20) === this.currentSequence[this.sequenceIndex]) {
this.sequenceIndex += 1;
}
else if (this.sequenceIndex === 0) {
if (this.currentSequence === Sequences.TitleEnd) {
// We have to parse entities in <title> tags.
if (this.decodeEntities && c === CharCodes.Amp) {
this.state = State.BeforeEntity;
}
}
else if (this.fastForwardTo(CharCodes.Lt)) {
// Outside of <title> tags, we can fast-forward.
this.sequenceIndex = 1;
}
}
else {
// If we see a `<`, set the sequence index to 1; useful for eg. `<</script>`.
this.sequenceIndex = Number(c === CharCodes.Lt);
}
}
stateCDATASequence(c) {
if (c === Sequences.Cdata[this.sequenceIndex]) {
if (++this.sequenceIndex === Sequences.Cdata.length) {
this.state = State.InCommentLike;
this.currentSequence = Sequences.CdataEnd;
this.sequenceIndex = 0;
this.sectionStart = this.index + 1;
}
}
else {
this.sequenceIndex = 0;
this.state = State.InDeclaration;
this.stateInDeclaration(c); // Reconsume the character
}
}
/**
* When we wait for one specific character, we can speed things up
* by skipping through the buffer until we find it.
*
* @returns Whether the character was found.
*/
fastForwardTo(c) {
while (++this.index < this.buffer.length + this.offset) {
if (this.buffer.charCodeAt(this.index - this.offset) === c) {
return true;
}
}
/*
* We increment the index at the end of the `parse` loop,
* so set it to `buffer.length - 1` here.
*
* TODO: Refactor `parse` to increment index before calling states.
*/
this.index = this.buffer.length + this.offset - 1;
return false;
}
/**
* Comments and CDATA end with `-->` and `]]>`.
*
* Their common qualities are:
* - Their end sequences have a distinct character they start with.
* - That character is then repeated, so we have to check multiple repeats.
* - All characters but the start character of the sequence can be skipped.
*/
stateInCommentLike(c) {
if (c === this.currentSequence[this.sequenceIndex]) {
if (++this.sequenceIndex === this.currentSequence.length) {
if (this.currentSequence === Sequences.CdataEnd) {
this.cbs.oncdata(this.sectionStart, this.index, 2);
}
else {
this.cbs.oncomment(this.sectionStart, this.index, 2);
}
this.sequenceIndex = 0;
this.sectionStart = this.index + 1;
this.state = State.Text;
}
}
else if (this.sequenceIndex === 0) {
// Fast-forward to the first character of the sequence
if (this.fastForwardTo(this.currentSequence[0])) {
this.sequenceIndex = 1;
}
}
else if (c !== this.currentSequence[this.sequenceIndex - 1]) {
// Allow long sequences, eg. --->, ]]]>
this.sequenceIndex = 0;
}
}
/**
* HTML only allows ASCII alpha characters (a-z and A-Z) at the beginning of a tag name.
*
* XML allows a lot more characters here (@see https://www.w3.org/TR/REC-xml/#NT-NameStartChar).
* We allow anything that wouldn't end the tag.
*/
isTagStartChar(c) {
return this.xmlMode ? !isEndOfTagSection(c) : isASCIIAlpha(c);
}
startSpecial(sequence, offset) {
this.isSpecial = true;
this.currentSequence = sequence;
this.sequenceIndex = offset;
this.state = State.SpecialStartSequence;
}
stateBeforeTagName(c) {
if (c === CharCodes.ExclamationMark) {
this.state = State.BeforeDeclaration;
this.sectionStart = this.index + 1;
}
else if (c === CharCodes.Questionmark) {
this.state = State.InProcessingInstruction;
this.sectionStart = this.index + 1;
}
else if (this.isTagStartChar(c)) {
const lower = c | 0x20;
this.sectionStart = this.index;
if (!this.xmlMode && lower === Sequences.TitleEnd[2]) {
this.startSpecial(Sequences.TitleEnd, 3);
}
else {
this.state =
!this.xmlMode && lower === Sequences.ScriptEnd[2]
? State.BeforeSpecialS
: State.InTagName;
}
}
else if (c === CharCodes.Slash) {
this.state = State.BeforeClosingTagName;
}
else {
this.state = State.Text;
this.stateText(c);
}
}
stateInTagName(c) {
if (isEndOfTagSection(c)) {
this.cbs.onopentagname(this.sectionStart, this.index);
this.sectionStart = -1;
this.state = State.BeforeAttributeName;
this.stateBeforeAttributeName(c);
}
}
stateBeforeClosingTagName(c) {
if (isWhitespace(c)) {
// Ignore
}
else if (c === CharCodes.Gt) {
this.state = State.Text;
}
else {
this.state = this.isTagStartChar(c)
? State.InClosingTagName
: State.InSpecialComment;
this.sectionStart = this.index;
}
}
stateInClosingTagName(c) {
if (c === CharCodes.Gt || isWhitespace(c)) {
this.cbs.onclosetag(this.sectionStart, this.index);
this.sectionStart = -1;
this.state = State.AfterClosingTagName;
this.stateAfterClosingTagName(c);
}
}
stateAfterClosingTagName(c) {
// Skip everything until ">"
if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) {
this.state = State.Text;
this.baseState = State.Text;
this.sectionStart = this.index + 1;
}
}
stateBeforeAttributeName(c) {
if (c === CharCodes.Gt) {
this.cbs.onopentagend(this.index);
if (this.isSpecial) {
this.state = State.InSpecialTag;
this.sequenceIndex = 0;
}
else {
this.state = State.Text;
}
this.baseState = this.state;
this.sectionStart = this.index + 1;
}
else if (c === CharCodes.Slash) {
this.state = State.InSelfClosingTag;
}
else if (!isWhitespace(c)) {
this.state = State.InAttributeName;
this.sectionStart = this.index;
}
}
stateInSelfClosingTag(c) {
if (c === CharCodes.Gt) {
this.cbs.onselfclosingtag(this.index);
this.state = State.Text;
this.baseState = State.Text;
this.sectionStart = this.index + 1;
this.isSpecial = false; // Reset special state, in case of self-closing special tags
}
else if (!isWhitespace(c)) {
this.state = State.BeforeAttributeName;
this.stateBeforeAttributeName(c);
}
}
stateInAttributeName(c) {
if (c === CharCodes.Eq || isEndOfTagSection(c)) {
this.cbs.onattribname(this.sectionStart, this.index);
this.sectionStart = -1;
this.state = State.AfterAttributeName;
this.stateAfterAttributeName(c);
}
}
stateAfterAttributeName(c) {
if (c === CharCodes.Eq) {
this.state = State.BeforeAttributeValue;
}
else if (c === CharCodes.Slash || c === CharCodes.Gt) {
this.cbs.onattribend(QuoteType.NoValue, this.index);
this.state = State.BeforeAttributeName;
this.stateBeforeAttributeName(c);
}
else if (!isWhitespace(c)) {
this.cbs.onattribend(QuoteType.NoValue, this.index);
this.state = State.InAttributeName;
this.sectionStart = this.index;
}
}
stateBeforeAttributeValue(c) {
if (c === CharCodes.DoubleQuote) {
this.state = State.InAttributeValueDq;
this.sectionStart = this.index + 1;
}
else if (c === CharCodes.SingleQuote) {
this.state = State.InAttributeValueSq;
this.sectionStart = this.index + 1;
}
else if (!isWhitespace(c)) {
this.sectionStart = this.index;
this.state = State.InAttributeValueNq;
this.stateInAttributeValueNoQuotes(c); // Reconsume token
}
}
handleInAttributeValue(c, quote) {
if (c === quote ||
(!this.decodeEntities && this.fastForwardTo(quote))) {
this.cbs.onattribdata(this.sectionStart, this.index);
this.sectionStart = -1;
this.cbs.onattribend(quote === CharCodes.DoubleQuote
? QuoteType.Double
: QuoteType.Single, this.index);
this.state = State.BeforeAttributeName;
}
else if (this.decodeEntities && c === CharCodes.Amp) {
this.baseState = this.state;
this.state = State.BeforeEntity;
}
}
stateInAttributeValueDoubleQuotes(c) {
this.handleInAttributeValue(c, CharCodes.DoubleQuote);
}
stateInAttributeValueSingleQuotes(c) {
this.handleInAttributeValue(c, CharCodes.SingleQuote);
}
stateInAttributeValueNoQuotes(c) {
if (isWhitespace(c) || c === CharCodes.Gt) {
this.cbs.onattribdata(this.sectionStart, this.index);
this.sectionStart = -1;
this.cbs.onattribend(QuoteType.Unquoted, this.index);
this.state = State.BeforeAttributeName;
this.stateBeforeAttributeName(c);
}
else if (this.decodeEntities && c === CharCodes.Amp) {
this.baseState = this.state;
this.state = State.BeforeEntity;
}
}
stateBeforeDeclaration(c) {
if (c === CharCodes.OpeningSquareBracket) {
this.state = State.CDATASequence;
this.sequenceIndex = 0;
}
else {
this.state =
c === CharCodes.Dash
? State.BeforeComment
: State.InDeclaration;
}
}
stateInDeclaration(c) {
if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) {
this.cbs.ondeclaration(this.sectionStart, this.index);
this.state = State.Text;
this.sectionStart = this.index + 1;
}
}
stateInProcessingInstruction(c) {
if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) {
this.cbs.onprocessinginstruction(this.sectionStart, this.index);
this.state = State.Text;
this.sectionStart = this.index + 1;
}
}
stateBeforeComment(c) {
if (c === CharCodes.Dash) {
this.state = State.InCommentLike;
this.currentSequence = Sequences.CommentEnd;
// Allow short comments (eg. <!-->)
this.sequenceIndex = 2;
this.sectionStart = this.index + 1;
}
else {
this.state = State.InDeclaration;
}
}
stateInSpecialComment(c) {
if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) {
this.cbs.oncomment(this.sectionStart, this.index, 0);
this.state = State.Text;
this.sectionStart = this.index + 1;
}
}
stateBeforeSpecialS(c) {
const lower = c | 0x20;
if (lower === Sequences.ScriptEnd[3]) {
this.startSpecial(Sequences.ScriptEnd, 4);
}
else if (lower === Sequences.StyleEnd[3]) {
this.startSpecial(Sequences.StyleEnd, 4);
}
else {
this.state = State.InTagName;
this.stateInTagName(c); // Consume the token again
}
}
stateBeforeEntity(c) {
// Start excess with 1 to include the '&'
this.entityExcess = 1;
this.entityResult = 0;
if (c === CharCodes.Number) {
this.state = State.BeforeNumericEntity;
}
else if (c === CharCodes.Amp) {
// We have two `&` characters in a row. Stay in the current state.
}
else {
this.trieIndex = 0;
this.trieCurrent = this.entityTrie[0];
this.state = State.InNamedEntity;
this.stateInNamedEntity(c);
}
}
stateInNamedEntity(c) {
this.entityExcess += 1;
this.trieIndex = determineBranch(this.entityTrie, this.trieCurrent, this.trieIndex + 1, c);
if (this.trieIndex < 0) {
this.emitNamedEntity();
this.index--;
return;
}
this.trieCurrent = this.entityTrie[this.trieIndex];
const masked = this.trieCurrent & BinTrieFlags.VALUE_LENGTH;
// If the branch is a value, store it and continue
if (masked) {
// The mask is the number of bytes of the value, including the current byte.
const valueLength = (masked >> 14) - 1;
// If we have a legacy entity while parsing strictly, just skip the number of bytes
if (!this.allowLegacyEntity() && c !== CharCodes.Semi) {
this.trieIndex += valueLength;
}
else {
// Add 1 as we have already incremented the excess
const entityStart = this.index - this.entityExcess + 1;
if (entityStart > this.sectionStart) {
this.emitPartial(this.sectionStart, entityStart);
}
// If this is a surrogate pair, consume the next two bytes
this.entityResult = this.trieIndex;
this.trieIndex += valueLength;
this.entityExcess = 0;
this.sectionStart = this.index + 1;
if (valueLength === 0) {
this.emitNamedEntity();
}
}
}
}
emitNamedEntity() {
this.state = this.baseState;
if (this.entityResult === 0) {
return;
}
const valueLength = (this.entityTrie[this.entityResult] & BinTrieFlags.VALUE_LENGTH) >>
14;
switch (valueLength) {
case 1: {
this.emitCodePoint(this.entityTrie[this.entityResult] &
~BinTrieFlags.VALUE_LENGTH);
break;
}
case 2: {
this.emitCodePoint(this.entityTrie[this.entityResult + 1]);
break;
}
case 3: {
this.emitCodePoint(this.entityTrie[this.entityResult + 1]);
this.emitCodePoint(this.entityTrie[this.entityResult + 2]);
}
}
}
stateBeforeNumericEntity(c) {
if ((c | 0x20) === CharCodes.LowerX) {
this.entityExcess++;
this.state = State.InHexEntity;
}
else {
this.state = State.InNumericEntity;
this.stateInNumericEntity(c);
}
}
emitNumericEntity(strict) {
const entityStart = this.index - this.entityExcess - 1;
const numberStart = entityStart + 2 + Number(this.state === State.InHexEntity);
if (numberStart !== this.index) {
// Emit leading data if any
if (entityStart > this.sectionStart) {
this.emitPartial(this.sectionStart, entityStart);
}
this.sectionStart = this.index + Number(strict);
this.emitCodePoint(replaceCodePoint(this.entityResult));
}
this.state = this.baseState;
}
stateInNumericEntity(c) {
if (c === CharCodes.Semi) {
this.emitNumericEntity(true);
}
else if (isNumber(c)) {
this.entityResult = this.entityResult * 10 + (c - CharCodes.Zero);
this.entityExcess++;
}
else {
if (this.allowLegacyEntity()) {
this.emitNumericEntity(false);
}
else {
this.state = this.baseState;
}
this.index--;
}
}
stateInHexEntity(c) {
if (c === CharCodes.Semi) {
this.emitNumericEntity(true);
}
else if (isNumber(c)) {
this.entityResult = this.entityResult * 16 + (c - CharCodes.Zero);
this.entityExcess++;
}
else if (isHexDigit(c)) {
this.entityResult =
this.entityResult * 16 + ((c | 0x20) - CharCodes.LowerA + 10);
this.entityExcess++;
}
else {
if (this.allowLegacyEntity()) {
this.emitNumericEntity(false);
}
else {
this.state = this.baseState;
}
this.index--;
}
}
allowLegacyEntity() {
return (!this.xmlMode &&
(this.baseState === State.Text ||
this.baseState === State.InSpecialTag));
}
/**
* Remove data that has already been consumed from the buffer.
*/
cleanup() {
// If we are inside of text or attributes, emit what we already have.
if (this.running && this.sectionStart !== this.index) {
if (this.state === State.Text ||
(this.state === State.InSpecialTag && this.sequenceIndex === 0)) {
this.cbs.ontext(this.sectionStart, this.index);
this.sectionStart = this.index;
}
else if (this.state === State.InAttributeValueDq ||
this.state === State.InAttributeValueSq ||
this.state === State.InAttributeValueNq) {
this.cbs.onattribdata(this.sectionStart, this.index);
this.sectionStart = this.index;
}
}
}
shouldContinue() {
return this.index < this.buffer.length + this.offset && this.running;
}
/**
* Iterates through the buffer, calling the function corresponding to the current state.
*
* States that are more likely to be hit are higher up, as a performance improvement.
*/
parse() {
while (this.shouldContinue()) {
const c = this.buffer.charCodeAt(this.index - this.offset);
switch (this.state) {
case State.Text: {
this.stateText(c);
break;
}
case State.SpecialStartSequence: {
this.stateSpecialStartSequence(c);
break;
}
case State.InSpecialTag: {
this.stateInSpecialTag(c);
break;
}
case State.CDATASequence: {
this.stateCDATASequence(c);
break;
}
case State.InAttributeValueDq: {
this.stateInAttributeValueDoubleQuotes(c);
break;
}
case State.InAttributeName: {
this.stateInAttributeName(c);
break;
}
case State.InCommentLike: {
this.stateInCommentLike(c);
break;
}
case State.InSpecialComment: {
this.stateInSpecialComment(c);
break;
}
case State.BeforeAttributeName: {
this.stateBeforeAttributeName(c);
break;
}
case State.InTagName: {
this.stateInTagName(c);
break;
}
case State.InClosingTagName: {
this.stateInClosingTagName(c);
break;
}
case State.BeforeTagName: {
this.stateBeforeTagName(c);
break;
}
case State.AfterAttributeName: {
this.stateAfterAttributeName(c);
break;
}
case State.InAttributeValueSq: {
this.stateInAttributeValueSingleQuotes(c);
break;
}
case State.BeforeAttributeValue: {
this.stateBeforeAttributeValue(c);
break;
}
case State.BeforeClosingTagName: {
this.stateBeforeClosingTagName(c);
break;
}
case State.AfterClosingTagName: {
this.stateAfterClosingTagName(c);
break;
}
case State.BeforeSpecialS: {
this.stateBeforeSpecialS(c);
break;
}
case State.InAttributeValueNq: {
this.stateInAttributeValueNoQuotes(c);
break;
}
case State.InSelfClosingTag: {
this.stateInSelfClosingTag(c);
break;
}
case State.InDeclaration: {
this.stateInDeclaration(c);
break;
}
case State.BeforeDeclaration: {
this.stateBeforeDeclaration(c);
break;
}
case State.BeforeComment: {
this.stateBeforeComment(c);
break;
}
case State.InProcessingInstruction: {
this.stateInProcessingInstruction(c);
break;
}
case State.InNamedEntity: {
this.stateInNamedEntity(c);
break;
}
case State.BeforeEntity: {
this.stateBeforeEntity(c);
break;
}
case State.InHexEntity: {
this.stateInHexEntity(c);
break;
}
case State.InNumericEntity: {
this.stateInNumericEntity(c);
break;
}
default: {
// `this._state === State.BeforeNumericEntity`
this.stateBeforeNumericEntity(c);
}
}
this.index++;
}
this.cleanup();
}
finish() {
if (this.state === State.InNamedEntity) {
this.emitNamedEntity();
}
// If there is remaining data, emit it in a reasonable way
if (this.sectionStart < this.index) {
this.handleTrailingData();
}
this.cbs.onend();
}
/** Handle any trailing data. */
handleTrailingData() {
const endIndex = this.buffer.length + this.offset;
if (this.state === State.InCommentLike) {
if (this.currentSequence === Sequences.CdataEnd) {
this.cbs.oncdata(this.sectionStart, endIndex, 0);
}
else {
this.cbs.oncomment(this.sectionStart, endIndex, 0);
}
}
else if (this.state === State.InNumericEntity &&
this.allowLegacyEntity()) {
this.emitNumericEntity(false);
// All trailing data will have been consumed
}
else if (this.state === State.InHexEntity &&
this.allowLegacyEntity()) {
this.emitNumericEntity(false);
// All trailing data will have been consumed
}
else if (this.state === State.InTagName ||
this.state === State.BeforeAttributeName ||
this.state === State.BeforeAttributeValue ||
this.state === State.AfterAttributeName ||
this.state === State.InAttributeName ||
this.state === State.InAttributeValueSq ||
this.state === State.InAttributeValueDq ||
this.state === State.InAttributeValueNq ||
this.state === State.InClosingTagName) {
/*
* If we are currently in an opening or closing tag, us not calling the
* respective callback signals that the tag should be ignored.
*/
}
else {
this.cbs.ontext(this.sectionStart, endIndex);
}
}
emitPartial(start, endIndex) {
if (this.baseState !== State.Text &&
this.baseState !== State.InSpecialTag) {
this.cbs.onattribdata(start, endIndex);
}
else {
this.cbs.ontext(start, endIndex);
}
}
emitCodePoint(cp) {
if (this.baseState !== State.Text &&
this.baseState !== State.InSpecialTag) {
this.cbs.onattribentity(cp);
}
else {
this.cbs.ontextentity(cp);
}
}
}
//# sourceMappingURL=Tokenizer.js.map

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1 @@
{"version":3,"file":"WritableStream.d.ts","sourceRoot":"https://raw.githubusercontent.com/fb55/htmlparser2/c123610e003a1eaebc61febed01cabb6e41eb658/src/","sources":["WritableStream.ts"],"names":[],"mappings":";;AAAA,OAAO,EAAU,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAK7D,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAQvC;;;;GAIG;AACH,qBAAa,cAAe,SAAQ,QAAQ;IACxC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAS;IACjC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAuB;gBAEpC,GAAG,EAAE,OAAO,CAAC,OAAO,CAAC,EAAE,OAAO,CAAC,EAAE,aAAa;IAKjD,MAAM,CACX,KAAK,EAAE,MAAM,GAAG,MAAM,EACtB,QAAQ,EAAE,MAAM,EAChB,QAAQ,EAAE,MAAM,IAAI,GACrB,IAAI;IAOE,MAAM,CAAC,QAAQ,EAAE,MAAM,IAAI,GAAG,IAAI;CAI9C"}

View File

@@ -0,0 +1,32 @@
import { Parser } from "./Parser.js";
/*
* NOTE: If either of these two imports produces a type error,
* please update your @types/node dependency!
*/
import { Writable } from "node:stream";
import { StringDecoder } from "node:string_decoder";
// Following the example in https://nodejs.org/api/stream.html#stream_decoding_buffers_in_a_writable_stream
function isBuffer(_chunk, encoding) {
return encoding === "buffer";
}
/**
* WritableStream makes the `Parser` interface available as a NodeJS stream.
*
* @see Parser
*/
export class WritableStream extends Writable {
constructor(cbs, options) {
super({ decodeStrings: false });
this._decoder = new StringDecoder();
this._parser = new Parser(cbs, options);
}
_write(chunk, encoding, callback) {
this._parser.write(isBuffer(chunk, encoding) ? this._decoder.write(chunk) : chunk);
callback();
}
_final(callback) {
this._parser.end(this._decoder.end());
callback();
}
}
//# sourceMappingURL=WritableStream.js.map

View File

@@ -0,0 +1 @@
{"version":3,"file":"WritableStream.js","sourceRoot":"https://raw.githubusercontent.com/fb55/htmlparser2/c123610e003a1eaebc61febed01cabb6e41eb658/src/","sources":["WritableStream.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAA0B,MAAM,aAAa,CAAC;AAC7D;;;GAGG;AACH,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AACvC,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAEpD,2GAA2G;AAC3G,SAAS,QAAQ,CAAC,MAAuB,EAAE,QAAgB;IACvD,OAAO,QAAQ,KAAK,QAAQ,CAAC;AACjC,CAAC;AAED;;;;GAIG;AACH,MAAM,OAAO,cAAe,SAAQ,QAAQ;IAIxC,YAAY,GAAqB,EAAE,OAAuB;QACtD,KAAK,CAAC,EAAE,aAAa,EAAE,KAAK,EAAE,CAAC,CAAC;QAHnB,aAAQ,GAAG,IAAI,aAAa,EAAE,CAAC;QAI5C,IAAI,CAAC,OAAO,GAAG,IAAI,MAAM,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;IAC5C,CAAC;IAEQ,MAAM,CACX,KAAsB,EACtB,QAAgB,EAChB,QAAoB;QAEpB,IAAI,CAAC,OAAO,CAAC,KAAK,CACd,QAAQ,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CACjE,CAAC;QACF,QAAQ,EAAE,CAAC;IACf,CAAC;IAEQ,MAAM,CAAC,QAAoB;QAChC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,QAAQ,CAAC,GAAG,EAAE,CAAC,CAAC;QACtC,QAAQ,EAAE,CAAC;IACf,CAAC;CACJ"}

View File

@@ -0,0 +1 @@
{"version":3,"file":"index.d.ts","sourceRoot":"https://raw.githubusercontent.com/fb55/htmlparser2/c123610e003a1eaebc61febed01cabb6e41eb658/src/","sources":["index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AACpD,OAAO,EAAE,MAAM,EAAE,KAAK,aAAa,EAAE,MAAM,aAAa,CAAC;AAEzD,OAAO,EAEH,iBAAiB,EACjB,SAAS,EACT,OAAO,EACP,QAAQ,EACX,MAAM,YAAY,CAAC;AAEpB,OAAO,EACH,UAAU,EAEV,UAAU,IAAI,cAAc,EAC5B,KAAK,iBAAiB,GACzB,MAAM,YAAY,CAAC;AAEpB,MAAM,MAAM,OAAO,GAAG,aAAa,GAAG,iBAAiB,CAAC;AAIxD;;;;;GAKG;AACH,wBAAgB,aAAa,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,OAAO,GAAG,QAAQ,CAIvE;AACD;;;;;;;;;GASG;AACH,wBAAgB,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,OAAO,GAAG,SAAS,EAAE,CAErE;AACD;;;;;;GAMG;AACH,wBAAgB,eAAe,CAC3B,QAAQ,EAAE,CAAC,KAAK,EAAE,KAAK,GAAG,IAAI,EAAE,GAAG,EAAE,SAAS,EAAE,KAAK,IAAI,EACzD,OAAO,CAAC,EAAE,OAAO,EACjB,eAAe,CAAC,EAAE,CAAC,OAAO,EAAE,OAAO,KAAK,IAAI,GAC7C,MAAM,CAGR;AAED,OAAO,EACH,OAAO,IAAI,SAAS,EACpB,KAAK,SAAS,IAAI,kBAAkB,GACvC,MAAM,gBAAgB,CAAC;AAMxB,OAAO,KAAK,WAAW,MAAM,gBAAgB,CAAC;AAE9C,OAAO,EAAW,IAAI,EAAE,MAAM,UAAU,CAAC;AAEzC,OAAO,EAAE,OAAO,EAAE,MAAM,UAAU,CAAC;AAInC;;;;;GAKG;AACH,wBAAgB,SAAS,CACrB,IAAI,EAAE,MAAM,EACZ,OAAO,GAAE,OAAiC,GAC3C,IAAI,GAAG,IAAI,CAEb;AAED,OAAO,KAAK,QAAQ,MAAM,UAAU,CAAC"}

View File

@@ -0,0 +1,62 @@
import { Parser } from "./Parser.js";
export { Parser } from "./Parser.js";
import { DomHandler, } from "domhandler";
export { DomHandler,
// Old name for DomHandler
DomHandler as DefaultHandler, } from "domhandler";
// Helper methods
/**
* Parses the data, returns the resulting document.
*
* @param data The data that should be parsed.
* @param options Optional options for the parser and DOM builder.
*/
export function parseDocument(data, options) {
const handler = new DomHandler(undefined, options);
new Parser(handler, options).end(data);
return handler.root;
}
/**
* Parses data, returns an array of the root nodes.
*
* Note that the root nodes still have a `Document` node as their parent.
* Use `parseDocument` to get the `Document` node instead.
*
* @param data The data that should be parsed.
* @param options Optional options for the parser and DOM builder.
* @deprecated Use `parseDocument` instead.
*/
export function parseDOM(data, options) {
return parseDocument(data, options).children;
}
/**
* Creates a parser instance, with an attached DOM handler.
*
* @param callback A callback that will be called once parsing has been completed.
* @param options Optional options for the parser and DOM builder.
* @param elementCallback An optional callback that will be called every time a tag has been completed inside of the DOM.
*/
export function createDomStream(callback, options, elementCallback) {
const handler = new DomHandler(callback, options, elementCallback);
return new Parser(handler, options);
}
export { default as Tokenizer, } from "./Tokenizer.js";
/*
* All of the following exports exist for backwards-compatibility.
* They should probably be removed eventually.
*/
export * as ElementType from "domelementtype";
import { getFeed } from "domutils";
export { getFeed } from "domutils";
const parseFeedDefaultOptions = { xmlMode: true };
/**
* Parse a feed.
*
* @param feed The feed that should be parsed, as a string.
* @param options Optionally, options for parsing. When using this, you should set `xmlMode` to `true`.
*/
export function parseFeed(feed, options = parseFeedDefaultOptions) {
return getFeed(parseDOM(feed, options));
}
export * as DomUtils from "domutils";
//# sourceMappingURL=index.js.map

View File

@@ -0,0 +1 @@
{"version":3,"file":"index.js","sourceRoot":"https://raw.githubusercontent.com/fb55/htmlparser2/c123610e003a1eaebc61febed01cabb6e41eb658/src/","sources":["index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAiB,MAAM,aAAa,CAAC;AACpD,OAAO,EAAE,MAAM,EAAsB,MAAM,aAAa,CAAC;AAEzD,OAAO,EACH,UAAU,GAKb,MAAM,YAAY,CAAC;AAEpB,OAAO,EACH,UAAU;AACV,0BAA0B;AAC1B,UAAU,IAAI,cAAc,GAE/B,MAAM,YAAY,CAAC;AAIpB,iBAAiB;AAEjB;;;;;GAKG;AACH,MAAM,UAAU,aAAa,CAAC,IAAY,EAAE,OAAiB;IACzD,MAAM,OAAO,GAAG,IAAI,UAAU,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC;IACnD,IAAI,MAAM,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IACvC,OAAO,OAAO,CAAC,IAAI,CAAC;AACxB,CAAC;AACD;;;;;;;;;GASG;AACH,MAAM,UAAU,QAAQ,CAAC,IAAY,EAAE,OAAiB;IACpD,OAAO,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,QAAQ,CAAC;AACjD,CAAC;AACD;;;;;;GAMG;AACH,MAAM,UAAU,eAAe,CAC3B,QAAyD,EACzD,OAAiB,EACjB,eAA4C;IAE5C,MAAM,OAAO,GAAG,IAAI,UAAU,CAAC,QAAQ,EAAE,OAAO,EAAE,eAAe,CAAC,CAAC;IACnE,OAAO,IAAI,MAAM,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;AACxC,CAAC;AAED,OAAO,EACH,OAAO,IAAI,SAAS,GAEvB,MAAM,gBAAgB,CAAC;AAExB;;;GAGG;AACH,OAAO,KAAK,WAAW,MAAM,gBAAgB,CAAC;AAE9C,OAAO,EAAE,OAAO,EAAQ,MAAM,UAAU,CAAC;AAEzC,OAAO,EAAE,OAAO,EAAE,MAAM,UAAU,CAAC;AAEnC,MAAM,uBAAuB,GAAG,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC;AAElD;;;;;GAKG;AACH,MAAM,UAAU,SAAS,CACrB,IAAY,EACZ,UAAmB,uBAAuB;IAE1C,OAAO,OAAO,CAAC,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,CAAC;AAC5C,CAAC;AAED,OAAO,KAAK,QAAQ,MAAM,UAAU,CAAC"}

View File

@@ -0,0 +1 @@
{"type":"module"}

View File

@@ -0,0 +1 @@
{"version":3,"file":"index.d.ts","sourceRoot":"https://raw.githubusercontent.com/fb55/htmlparser2/c123610e003a1eaebc61febed01cabb6e41eb658/src/","sources":["index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AACpD,OAAO,EAAE,MAAM,EAAE,KAAK,aAAa,EAAE,MAAM,aAAa,CAAC;AAEzD,OAAO,EAEH,iBAAiB,EACjB,SAAS,EACT,OAAO,EACP,QAAQ,EACX,MAAM,YAAY,CAAC;AAEpB,OAAO,EACH,UAAU,EAEV,UAAU,IAAI,cAAc,EAC5B,KAAK,iBAAiB,GACzB,MAAM,YAAY,CAAC;AAEpB,MAAM,MAAM,OAAO,GAAG,aAAa,GAAG,iBAAiB,CAAC;AAIxD;;;;;GAKG;AACH,wBAAgB,aAAa,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,OAAO,GAAG,QAAQ,CAIvE;AACD;;;;;;;;;GASG;AACH,wBAAgB,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,OAAO,GAAG,SAAS,EAAE,CAErE;AACD;;;;;;GAMG;AACH,wBAAgB,eAAe,CAC3B,QAAQ,EAAE,CAAC,KAAK,EAAE,KAAK,GAAG,IAAI,EAAE,GAAG,EAAE,SAAS,EAAE,KAAK,IAAI,EACzD,OAAO,CAAC,EAAE,OAAO,EACjB,eAAe,CAAC,EAAE,CAAC,OAAO,EAAE,OAAO,KAAK,IAAI,GAC7C,MAAM,CAGR;AAED,OAAO,EACH,OAAO,IAAI,SAAS,EACpB,KAAK,SAAS,IAAI,kBAAkB,GACvC,MAAM,gBAAgB,CAAC;AAMxB,OAAO,KAAK,WAAW,MAAM,gBAAgB,CAAC;AAE9C,OAAO,EAAW,IAAI,EAAE,MAAM,UAAU,CAAC;AAEzC,OAAO,EAAE,OAAO,EAAE,MAAM,UAAU,CAAC;AAInC;;;;;GAKG;AACH,wBAAgB,SAAS,CACrB,IAAI,EAAE,MAAM,EACZ,OAAO,GAAE,OAAiC,GAC3C,IAAI,GAAG,IAAI,CAEb;AAED,OAAO,KAAK,QAAQ,MAAM,UAAU,CAAC"}

100
resources/app/node_modules/htmlparser2/lib/index.js generated vendored Normal file
View File

@@ -0,0 +1,100 @@
"use strict";
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
__setModuleDefault(result, mod);
return result;
};
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.DomUtils = exports.parseFeed = exports.getFeed = exports.ElementType = exports.Tokenizer = exports.createDomStream = exports.parseDOM = exports.parseDocument = exports.DefaultHandler = exports.DomHandler = exports.Parser = void 0;
var Parser_js_1 = require("./Parser.js");
var Parser_js_2 = require("./Parser.js");
Object.defineProperty(exports, "Parser", { enumerable: true, get: function () { return Parser_js_2.Parser; } });
var domhandler_1 = require("domhandler");
var domhandler_2 = require("domhandler");
Object.defineProperty(exports, "DomHandler", { enumerable: true, get: function () { return domhandler_2.DomHandler; } });
// Old name for DomHandler
Object.defineProperty(exports, "DefaultHandler", { enumerable: true, get: function () { return domhandler_2.DomHandler; } });
// Helper methods
/**
* Parses the data, returns the resulting document.
*
* @param data The data that should be parsed.
* @param options Optional options for the parser and DOM builder.
*/
function parseDocument(data, options) {
var handler = new domhandler_1.DomHandler(undefined, options);
new Parser_js_1.Parser(handler, options).end(data);
return handler.root;
}
exports.parseDocument = parseDocument;
/**
* Parses data, returns an array of the root nodes.
*
* Note that the root nodes still have a `Document` node as their parent.
* Use `parseDocument` to get the `Document` node instead.
*
* @param data The data that should be parsed.
* @param options Optional options for the parser and DOM builder.
* @deprecated Use `parseDocument` instead.
*/
function parseDOM(data, options) {
return parseDocument(data, options).children;
}
exports.parseDOM = parseDOM;
/**
* Creates a parser instance, with an attached DOM handler.
*
* @param callback A callback that will be called once parsing has been completed.
* @param options Optional options for the parser and DOM builder.
* @param elementCallback An optional callback that will be called every time a tag has been completed inside of the DOM.
*/
function createDomStream(callback, options, elementCallback) {
var handler = new domhandler_1.DomHandler(callback, options, elementCallback);
return new Parser_js_1.Parser(handler, options);
}
exports.createDomStream = createDomStream;
var Tokenizer_js_1 = require("./Tokenizer.js");
Object.defineProperty(exports, "Tokenizer", { enumerable: true, get: function () { return __importDefault(Tokenizer_js_1).default; } });
/*
* All of the following exports exist for backwards-compatibility.
* They should probably be removed eventually.
*/
exports.ElementType = __importStar(require("domelementtype"));
var domutils_1 = require("domutils");
var domutils_2 = require("domutils");
Object.defineProperty(exports, "getFeed", { enumerable: true, get: function () { return domutils_2.getFeed; } });
var parseFeedDefaultOptions = { xmlMode: true };
/**
* Parse a feed.
*
* @param feed The feed that should be parsed, as a string.
* @param options Optionally, options for parsing. When using this, you should set `xmlMode` to `true`.
*/
function parseFeed(feed, options) {
if (options === void 0) { options = parseFeedDefaultOptions; }
return (0, domutils_1.getFeed)(parseDOM(feed, options));
}
exports.parseFeed = parseFeed;
exports.DomUtils = __importStar(require("domutils"));
//# sourceMappingURL=index.js.map

View File

@@ -0,0 +1 @@
{"version":3,"file":"index.js","sourceRoot":"https://raw.githubusercontent.com/fb55/htmlparser2/c123610e003a1eaebc61febed01cabb6e41eb658/src/","sources":["index.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,yCAAoD;AACpD,yCAAyD;AAAhD,mGAAA,MAAM,OAAA;AAEf,yCAMoB;AAEpB,yCAKoB;AAJhB,wGAAA,UAAU,OAAA;AACV,0BAA0B;AAC1B,4GAAA,UAAU,OAAkB;AAMhC,iBAAiB;AAEjB;;;;;GAKG;AACH,SAAgB,aAAa,CAAC,IAAY,EAAE,OAAiB;IACzD,IAAM,OAAO,GAAG,IAAI,uBAAU,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC;IACnD,IAAI,kBAAM,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IACvC,OAAO,OAAO,CAAC,IAAI,CAAC;AACxB,CAAC;AAJD,sCAIC;AACD;;;;;;;;;GASG;AACH,SAAgB,QAAQ,CAAC,IAAY,EAAE,OAAiB;IACpD,OAAO,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,QAAQ,CAAC;AACjD,CAAC;AAFD,4BAEC;AACD;;;;;;GAMG;AACH,SAAgB,eAAe,CAC3B,QAAyD,EACzD,OAAiB,EACjB,eAA4C;IAE5C,IAAM,OAAO,GAAG,IAAI,uBAAU,CAAC,QAAQ,EAAE,OAAO,EAAE,eAAe,CAAC,CAAC;IACnE,OAAO,IAAI,kBAAM,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;AACxC,CAAC;AAPD,0CAOC;AAED,+CAGwB;AAFpB,0HAAA,OAAO,OAAa;AAIxB;;;GAGG;AACH,8DAA8C;AAE9C,qCAAyC;AAEzC,qCAAmC;AAA1B,mGAAA,OAAO,OAAA;AAEhB,IAAM,uBAAuB,GAAG,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC;AAElD;;;;;GAKG;AACH,SAAgB,SAAS,CACrB,IAAY,EACZ,OAA0C;IAA1C,wBAAA,EAAA,iCAA0C;IAE1C,OAAO,IAAA,kBAAO,EAAC,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,CAAC;AAC5C,CAAC;AALD,8BAKC;AAED,qDAAqC"}

72
resources/app/node_modules/htmlparser2/package.json generated vendored Normal file
View File

@@ -0,0 +1,72 @@
{
"name": "htmlparser2",
"description": "Fast & forgiving HTML/XML parser",
"version": "8.0.2",
"author": "Felix Boehm <me@feedic.com>",
"funding": [
"https://github.com/fb55/htmlparser2?sponsor=1",
{
"type": "github",
"url": "https://github.com/sponsors/fb55"
}
],
"license": "MIT",
"sideEffects": false,
"repository": {
"type": "git",
"url": "git://github.com/fb55/htmlparser2.git"
},
"directories": {
"lib": "lib/"
},
"main": "lib/index.js",
"types": "lib/index.d.ts",
"module": "lib/esm/index.js",
"exports": {
".": {
"require": "./lib/index.js",
"import": "./lib/esm/index.js"
},
"./lib/WritableStream": {
"require": "./lib/WritableStream.js",
"import": "./lib/esm/WritableStream.js"
}
},
"files": [
"lib/**/*"
],
"dependencies": {
"domelementtype": "^2.3.0",
"domhandler": "^5.0.3",
"domutils": "^3.0.1",
"entities": "^4.4.0"
},
"devDependencies": {
"@types/jest": "^29.5.0",
"@types/node": "^18.15.5",
"@typescript-eslint/eslint-plugin": "^5.56.0",
"@typescript-eslint/parser": "^5.56.0",
"eslint": "^8.36.0",
"eslint-config-prettier": "^8.8.0",
"eslint-plugin-n": "^15.6.1",
"eslint-plugin-unicorn": "^46.0.0",
"jest": "^29.5.0",
"prettier": "^2.8.6",
"ts-jest": "^29.0.5",
"typescript": "^4.9.5"
},
"jest": {
"preset": "ts-jest",
"testEnvironment": "node",
"coverageProvider": "v8",
"moduleNameMapper": {
"^(.*)\\.js$": [
"$1",
"$1.js"
]
}
},
"prettier": {
"tabWidth": 4
}
}