This commit is contained in:
2025-01-04 00:34:03 +01:00
parent 41829408dc
commit 0ca14bbc19
18111 changed files with 1871397 additions and 0 deletions

View File

@@ -0,0 +1,52 @@
// @ts-check
"use strict";
/**
* Generate trampoline stubs for each rule imported into this namespace.
*
* @example
* import bar from "./lib.js" // Default rule imported into this namespace
* import {baz} from "./lib.js" // One rule imported into this namespace by name
*
* @type {PEG.Pass}
*/
function addImportedRules(ast) {
let libraryNumber = 0;
for (const imp of ast.imports) {
for (const what of imp.what) {
let original = undefined;
switch (what.type) {
case "import_binding_all":
// Don't create stub.
continue;
case "import_binding_default":
// Use the default (usually first) rule.
break;
case "import_binding":
original = what.binding;
break;
case "import_binding_rename":
original = what.rename;
break;
default:
throw new TypeError("Unknown binding type");
}
ast.rules.push({
type: "rule",
name: what.binding,
nameLocation: what.location,
expression: {
type: "library_ref",
name: original,
library: imp.from.module,
libraryNumber,
location: what.location,
},
location: imp.from.location,
});
}
libraryNumber++;
}
}
module.exports = addImportedRules;

View File

@@ -0,0 +1,43 @@
// @ts-check
"use strict";
const visitor = require("../visitor");
/**
* @param {PEG.ast.Grammar} ast
* @param {string} name
* @returns {number}
*/
function findLibraryNumber(ast, name) {
let libraryNumber = 0;
for (const imp of ast.imports) {
for (const what of imp.what) {
if ((what.type === "import_binding_all") && (what.binding === name)) {
return libraryNumber;
}
}
libraryNumber++;
}
return -1;
}
/** @type {PEG.Pass} */
function fixLibraryNumbers(ast, _options, session) {
const check = visitor.build({
library_ref(/** @type {PEG.ast.LibraryReference} */ node) {
if (node.libraryNumber === -1) {
node.libraryNumber = findLibraryNumber(ast, node.library);
if (node.libraryNumber === -1) {
session.error(
`Unknown module "${node.library}"`,
node.location
);
}
}
},
});
check(ast);
}
module.exports = fixLibraryNumbers;

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,190 @@
"use strict";
const visitor = require("../visitor");
const asts = require("../asts");
const GrammarError = require("../../grammar-error");
const ALWAYS_MATCH = 1;
const SOMETIMES_MATCH = 0;
const NEVER_MATCH = -1;
// Inference match result of the each node. Can be:
// -1: negative result, matching of that node always fails
// 0: neutral result, may be fail, may be match
// 1: positive result, always match
function inferenceMatchResult(ast) {
function sometimesMatch(node) { return (node.match = SOMETIMES_MATCH); }
function alwaysMatch(node) {
// eslint-disable-next-line no-use-before-define -- Mutual recursion
inference(node.expression);
return (node.match = ALWAYS_MATCH);
}
function inferenceExpression(node) {
// eslint-disable-next-line no-use-before-define -- Mutual recursion
return (node.match = inference(node.expression));
}
function inferenceElements(elements, forChoice) {
const length = elements.length;
let always = 0;
let never = 0;
for (let i = 0; i < length; ++i) {
// eslint-disable-next-line no-use-before-define -- Mutual recursion
const result = inference(elements[i]);
if (result === ALWAYS_MATCH) { ++always; }
if (result === NEVER_MATCH) { ++never; }
}
if (always === length) {
return ALWAYS_MATCH;
}
if (forChoice) {
return never === length ? NEVER_MATCH : SOMETIMES_MATCH;
}
return never > 0 ? NEVER_MATCH : SOMETIMES_MATCH;
}
const inference = visitor.build({
rule(node) {
let oldResult = undefined;
let count = 0;
// If property not yet calculated, do that
if (typeof node.match === "undefined") {
node.match = SOMETIMES_MATCH;
do {
oldResult = node.match;
node.match = inference(node.expression);
// 6 == 3! -- permutations count for all transitions from one match
// state to another.
// After 6 iterations the cycle with guarantee begins
// For example, an input of `start = [] start` will generate the
// sequence: 0 -> -1 -> -1 (then stop)
//
// A more complex grammar theoretically would generate the
// sequence: 0 -> 1 -> 0 -> -1 -> 0 -> 1 -> ... (then cycle)
// but there are no examples of such grammars yet (possible, they
// do not exist at all)
// istanbul ignore next This is canary test, shouldn't trigger in real life
if (++count > 6) {
throw new GrammarError(
"Infinity cycle detected when trying to evaluate node match result",
node.location
);
}
} while (oldResult !== node.match);
}
return node.match;
},
named: inferenceExpression,
choice(node) {
return (node.match = inferenceElements(node.alternatives, true));
},
action: inferenceExpression,
sequence(node) {
return (node.match = inferenceElements(node.elements, false));
},
labeled: inferenceExpression,
text: inferenceExpression,
simple_and: inferenceExpression,
simple_not(node) {
return (node.match = -inference(node.expression));
},
optional: alwaysMatch,
zero_or_more: alwaysMatch,
one_or_more: inferenceExpression,
repeated(node) {
const match = inference(node.expression);
const dMatch = node.delimiter ? inference(node.delimiter) : NEVER_MATCH;
// If minimum is `null` it is equals to maximum (parsed from `|exact|` syntax)
const min = node.min ? node.min : node.max;
// If any boundary are variable - it can be negative, and it that case
// node does not match, but it may be match with some other values
if (min.type !== "constant" || node.max.type !== "constant") {
return (node.match = SOMETIMES_MATCH);
}
// Now both boundaries is constants
// If the upper boundary is zero or minimum exceeds maximum,
// matching is impossible
if (node.max.value === 0
|| (node.max.value !== null && min.value > node.max.value)
) {
return (node.match = NEVER_MATCH);
}
if (match === NEVER_MATCH) {
// If an expression always fails, a range will also always fail
// (with the one exception - never matched expression repeated
// zero times always match and returns an empty array).
return (node.match = min.value === 0 ? ALWAYS_MATCH : NEVER_MATCH);
}
if (match === ALWAYS_MATCH) {
if (node.delimiter && min.value >= 2) {
// If an expression always match the final result determined only
// by the delimiter, but delimiter used only when count of elements
// two and more
return (node.match = dMatch);
}
return (node.match = ALWAYS_MATCH);
}
// Here `match === SOMETIMES_MATCH`
if (node.delimiter && min.value >= 2) {
// If an expression always match the final result determined only
// by the delimiter, but delimiter used only when count of elements
// two and more
return (
// If a delimiter never match then the range also never match (because
// there at least one delimiter)
node.match = dMatch === NEVER_MATCH ? NEVER_MATCH : SOMETIMES_MATCH
);
}
return (node.match = min.value === 0 ? ALWAYS_MATCH : SOMETIMES_MATCH);
},
group: inferenceExpression,
semantic_and: sometimesMatch,
semantic_not: sometimesMatch,
rule_ref(node) {
const rule = asts.findRule(ast, node.name);
if (!rule) {
return SOMETIMES_MATCH;
}
return (node.match = inference(rule));
},
library_ref() {
// Can't look into pre-compiled rules.
return 0;
},
literal(node) {
// Empty literal always match on any input
const match = node.value.length === 0 ? ALWAYS_MATCH : SOMETIMES_MATCH;
return (node.match = match);
},
class(node) {
// Empty character class never match on any input
const match = node.parts.length === 0 ? NEVER_MATCH : SOMETIMES_MATCH;
return (node.match = match);
},
// |any| not match on empty input
any: sometimesMatch,
});
inference(ast);
}
inferenceMatchResult.ALWAYS_MATCH = ALWAYS_MATCH;
inferenceMatchResult.SOMETIMES_MATCH = SOMETIMES_MATCH;
inferenceMatchResult.NEVER_MATCH = NEVER_MATCH;
module.exports = inferenceMatchResult;

View File

@@ -0,0 +1,191 @@
// @ts-check
"use strict";
/**
* @typedef {import("../../peg")} PEG
*/
/** @type {PEG.compiler.visitor} */
const visitor = require("../visitor");
/**
* @param {unknown} target
* @param {unknown} source
*/
function cloneOver(target, source) {
const t = /** @type {Record<string,unknown>} */ (target);
const s = /** @type {Record<string,unknown>} */ (source);
Object.keys(t).forEach(key => delete t[key]);
Object.keys(s).forEach(key => { t[key] = s[key]; });
}
/**
* Clean up the parts array of a `class` node, by sorting,
* then removing "contained" ranges, and merging overlapping
* or adjacent ranges.
*
* @param {PEG.ast.CharacterClass["parts"]} parts
*/
function cleanParts(parts) {
// Sort parts on increasing start, and then decreasing end.
parts.sort((a, b) => {
const [aStart, aEnd] = Array.isArray(a) ? a : [a, a];
const [bStart, bEnd] = Array.isArray(b) ? b : [b, b];
if (aStart !== bStart) {
return aStart < bStart ? -1 : 1;
}
if (aEnd !== bEnd) {
return aEnd > bEnd ? -1 : 1;
}
return 0;
});
let prevStart = "";
let prevEnd = "";
for (let i = 0; i < parts.length; i++) {
const part = parts[i];
const [curStart, curEnd] = Array.isArray(part) ? part : [part, part];
if (curEnd <= prevEnd) {
// Current range is contained in previous range,
// so drop it.
parts.splice(i--, 1);
continue;
}
if (prevEnd.charCodeAt(0) + 1 >= curStart.charCodeAt(0)) {
// Current and previous ranges overlap, or are adjacent.
// Drop the current, and extend the previous range.
parts.splice(i--, 1);
parts[i] = [prevStart, prevEnd = curEnd];
continue;
}
prevStart = curStart;
prevEnd = curEnd;
}
return parts;
}
/**
* Merges a choice character classes into a character class
* @param {PEG.ast.Grammar} ast
*/
function mergeCharacterClasses(ast) {
// Build a map from rule names to rules for quick lookup of
// ref_rules.
const rules = Object.create(null);
ast.rules.forEach(rule => (rules[rule.name] = rule.expression));
// Keep a map of which rules have been processed, so that when
// we find a ref_rule, we can make sure its processed, before we
// try to use it.
const processedRules = Object.create(null);
const [asClass, merge] = [
/**
* Determine whether a node can be represented as a simple character class,
* and return that class if so.
*
* @param {PEG.ast.Expression} node - the node to inspect
* @param {boolean} [clone] - if true, always return a new node that
* can be modified by the caller
* @returns {PEG.ast.CharacterClass | null}
*/
(node, clone) => {
if (node.type === "class" && !node.inverted) {
if (clone) {
node = { ...node };
node.parts = [...node.parts];
}
return node;
}
if (node.type === "literal" && node.value.length === 1) {
return {
type: "class",
parts: [node.value],
inverted: false,
ignoreCase: node.ignoreCase,
location: node.location,
};
}
if (node.type === "rule_ref") {
const ref = rules[node.name];
if (ref) {
if (!processedRules[node.name]) {
processedRules[node.name] = true;
merge(ref);
}
const cls = asClass(ref, true);
if (cls) {
cls.location = node.location;
}
return cls;
}
}
return null;
},
visitor.build({
choice(node) {
/** @type {PEG.ast.CharacterClass | null} */
let prev = null;
let changed = false;
node.alternatives.forEach((alt, i) => {
merge(alt);
const cls = asClass(alt);
if (!cls) {
prev = null;
return;
}
if (prev && prev.ignoreCase === cls.ignoreCase) {
prev.parts.push(...cls.parts);
node.alternatives[i - 1] = prev;
node.alternatives[i] = prev;
prev.location = {
source: prev.location.source,
start: prev.location.start,
end: cls.location.end,
};
changed = true;
} else {
prev = cls;
}
});
if (changed) {
node.alternatives = node.alternatives.filter(
(alt, i, arr) => !i || alt !== arr[i - 1]
);
node.alternatives.forEach((alt, i) => {
if (alt.type === "class") {
alt.parts = cleanParts(alt.parts);
if (alt.parts.length === 1
&& !Array.isArray(alt.parts[0])
&& !alt.inverted) {
node.alternatives[i] = {
type: "literal",
value: alt.parts[0],
ignoreCase: alt.ignoreCase,
location: alt.location,
};
}
}
});
if (node.alternatives.length === 1) {
cloneOver(node, node.alternatives[0]);
}
}
},
text(node) {
merge(node.expression);
if (node.expression.type === "class"
|| node.expression.type === "literal") {
const location = node.location;
cloneOver(node, node.expression);
node.location = location;
}
},
}),
];
ast.rules.forEach(rule => {
processedRules[rule.name] = true;
merge(rule.expression);
});
}
module.exports = mergeCharacterClasses;

View File

@@ -0,0 +1,49 @@
"use strict";
const asts = require("../asts");
const visitor = require("../visitor");
// Removes proxy rules -- that is, rules that only delegate to other rule.
function removeProxyRules(ast, options, session) {
function isProxyRule(node) {
return node.type === "rule" && node.expression.type === "rule_ref";
}
function replaceRuleRefs(ast, from, to) {
const replace = visitor.build({
rule_ref(node) {
if (node.name === from) {
node.name = to;
session.info(
`Proxy rule "${from}" replaced by the rule "${to}"`,
node.location,
[{
message: "This rule will be used",
location: asts.findRule(ast, to).nameLocation,
}]
);
}
},
});
replace(ast);
}
const indices = [];
ast.rules.forEach((rule, i) => {
if (isProxyRule(rule)) {
replaceRuleRefs(ast, rule.name, rule.expression.name);
if (options.allowedStartRules.indexOf(rule.name) === -1) {
indices.push(i);
}
}
});
indices.reverse();
indices.forEach(i => { ast.rules.splice(i, 1); });
}
module.exports = removeProxyRules;

View File

@@ -0,0 +1,28 @@
// @ts-check
"use strict";
/** @type {PEG.Pass} */
function reportDuplicateImports(ast, _options, session) {
/** @type {Record<string, PEG.LocationRange>} */
const all = {};
for (const imp of ast.imports) {
for (const what of imp.what) {
if (what.type === "import_binding_all") {
if (Object.prototype.hasOwnProperty.call(all, what.binding)) {
session.error(
`Module "${what.binding}" is already imported`,
what.location,
[{
message: "Original module location",
location: all[what.binding],
}]
);
}
all[what.binding] = what.location;
}
}
}
}
module.exports = reportDuplicateImports;

View File

@@ -0,0 +1,72 @@
"use strict";
const visitor = require("../visitor");
// Checks that each label is defined only once within each scope.
function reportDuplicateLabels(ast, options, session) {
function cloneEnv(env) {
const clone = {};
Object.keys(env).forEach(name => {
clone[name] = env[name];
});
return clone;
}
function checkExpressionWithClonedEnv(node, env) {
// eslint-disable-next-line no-use-before-define -- Mutual recursion
check(node.expression, cloneEnv(env));
}
const check = visitor.build({
rule(node) {
check(node.expression, { });
},
choice(node, env) {
node.alternatives.forEach(alternative => {
check(alternative, cloneEnv(env));
});
},
action: checkExpressionWithClonedEnv,
labeled(node, env) {
const label = node.label;
if (label && Object.prototype.hasOwnProperty.call(env, label)) {
session.error(
`Label "${node.label}" is already defined`,
node.labelLocation,
[{
message: "Original label location",
location: env[label],
}]
);
}
check(node.expression, env);
env[node.label] = node.labelLocation;
},
text: checkExpressionWithClonedEnv,
simple_and: checkExpressionWithClonedEnv,
simple_not: checkExpressionWithClonedEnv,
optional: checkExpressionWithClonedEnv,
zero_or_more: checkExpressionWithClonedEnv,
one_or_more: checkExpressionWithClonedEnv,
repeated(node, env) {
if (node.delimiter) {
check(node.delimiter, cloneEnv(env));
}
check(node.expression, cloneEnv(env));
},
group: checkExpressionWithClonedEnv,
});
check(ast);
}
module.exports = reportDuplicateLabels;

View File

@@ -0,0 +1,32 @@
"use strict";
const visitor = require("../visitor");
// Checks that each rule is defined only once.
function reportDuplicateRules(ast, options, session) {
const rules = {};
const check = visitor.build({
rule(node) {
if (Object.prototype.hasOwnProperty.call(rules, node.name)) {
session.error(
`Rule "${node.name}" is already defined`,
node.nameLocation,
[{
message: "Original rule location",
location: rules[node.name],
}]
);
// Do not rewrite original rule location
return;
}
rules[node.name] = node.nameLocation;
},
});
check(ast);
}
module.exports = reportDuplicateRules;

View File

@@ -0,0 +1,37 @@
"use strict";
const visitor = require("../visitor");
//
// Compiler pass to ensure the following are enforced:
//
// - plucking can not be done with an action block
//
function reportIncorrectPlucking(ast, options, session) {
const check = visitor.build({
action(node) {
check(node.expression, node);
},
labeled(node, action) {
if (node.pick) {
if (action) {
session.error(
"\"@\" cannot be used with an action block",
node.labelLocation,
[{
message: "Action block location",
location: action.codeLocation,
}]
);
}
}
check(node.expression);
},
});
check(ast);
}
module.exports = reportIncorrectPlucking;

View File

@@ -0,0 +1,101 @@
"use strict";
const asts = require("../asts");
const visitor = require("../visitor");
// Reports left recursion in the grammar, which prevents infinite recursion in
// the generated parser.
//
// Both direct and indirect recursion is detected. The pass also correctly
// reports cases like this:
//
// start = "a"? start
//
// In general, if a rule reference can be reached without consuming any input,
// it can lead to left recursion.
function reportInfiniteRecursion(ast, options, session) {
// Array with rule names for error message
const visitedRules = [];
// Array with rule_refs for diagnostic
const backtraceRefs = [];
const check = visitor.build({
rule(node) {
if (session.errors > 0) {
return;
}
visitedRules.push(node.name);
check(node.expression);
visitedRules.pop();
},
sequence(node) {
if (session.errors > 0) {
return;
}
node.elements.every(element => {
check(element);
if (session.errors > 0) {
return false;
}
return !asts.alwaysConsumesOnSuccess(ast, element);
});
},
repeated(node) {
if (session.errors > 0) {
return;
}
check(node.expression);
// If an expression does not consume input then recursion
// over delimiter is possible
if (node.delimiter
&& !asts.alwaysConsumesOnSuccess(ast, node.expression)
) {
check(node.delimiter);
}
},
rule_ref(node) {
if (session.errors > 0) {
return;
}
backtraceRefs.push(node);
const rule = asts.findRule(ast, node.name);
if (visitedRules.indexOf(node.name) !== -1) {
visitedRules.push(node.name);
session.error(
"Possible infinite loop when parsing (left recursion: "
+ visitedRules.join(" -> ")
+ ")",
rule.nameLocation,
backtraceRefs.map((ref, i, a) => ({
message: i + 1 !== a.length
? `Step ${i + 1}: call of the rule "${ref.name}" without input consumption`
: `Step ${i + 1}: call itself without input consumption - left recursion`,
location: ref.location,
}))
);
// Because we enter into recursion we should break it
return;
}
// Because we run all checks in one stage, some rules could be missing - this check
// executed in parallel
if (rule) {
check(rule);
}
backtraceRefs.pop();
},
});
check(ast);
}
module.exports = reportInfiniteRecursion;

View File

@@ -0,0 +1,64 @@
"use strict";
const asts = require("../asts");
const visitor = require("../visitor");
// Reports expressions that don't consume any input inside |*|, |+| or repeated in the
// grammar, which prevents infinite loops in the generated parser.
function reportInfiniteRepetition(ast, options, session) {
const check = visitor.build({
zero_or_more(node) {
if (!asts.alwaysConsumesOnSuccess(ast, node.expression)) {
session.error(
"Possible infinite loop when parsing (repetition used with an expression that may not consume any input)",
node.location
);
}
},
one_or_more(node) {
if (!asts.alwaysConsumesOnSuccess(ast, node.expression)) {
session.error(
"Possible infinite loop when parsing (repetition used with an expression that may not consume any input)",
node.location
);
}
},
repeated(node) {
// No need to check min or max. They can only be numbers, variable
// names, or code blocks.
if (node.delimiter) {
check(node.delimiter);
}
if (asts.alwaysConsumesOnSuccess(ast, node.expression)
|| (node.delimiter
&& asts.alwaysConsumesOnSuccess(ast, node.delimiter))) {
return;
}
if (node.max.value === null) {
session.error(
"Possible infinite loop when parsing (unbounded range repetition used with an expression that may not consume any input)",
node.location
);
} else {
// If minimum is `null` it is equals to maximum (parsed from `|exact|` syntax)
const min = node.min ? node.min : node.max;
// Because the high boundary is defined, infinity repetition is not possible
// but the grammar will waste of CPU
session.warning(
min.type === "constant" && node.max.type === "constant"
? `An expression may not consume any input and may always match ${node.max.value} times`
: "An expression may not consume any input and may always match with a maximum repetition count",
node.location
);
}
},
});
check(ast);
}
module.exports = reportInfiniteRepetition;

View File

@@ -0,0 +1,22 @@
"use strict";
const asts = require("../asts");
const visitor = require("../visitor");
// Checks that all referenced rules exist.
function reportUndefinedRules(ast, options, session) {
const check = visitor.build({
rule_ref(node) {
if (!asts.findRule(ast, node.name)) {
session.error(
`Rule "${node.name}" is not defined`,
node.location
);
}
},
});
check(ast);
}
module.exports = reportUndefinedRules;