fix(pr-validation): strip HTML comments via linear scan to satisfy CodeQL

Replace the regex-based comment stripper (flagged by CodeQL
js/incomplete-multi-character-sanitization, alert #330) with a single linear
indexOf scan. Behaviour is identical on complete, unterminated and nested
comments, but there is no regex backtracking and no loop-until-stable, so the
CodeQL alert clears without reintroducing the CPU-DoS risk.
This commit is contained in:
Gauvino
2026-06-05 13:15:39 +02:00
parent 935cacff81
commit 116aff2f8e

View File

@@ -29,15 +29,25 @@ try {
const association = (process.env.AUTHOR_ASSOCIATION || "").toUpperCase();
const isMaintainer = ["OWNER", "MEMBER", "COLLABORATOR"].includes(association);
// Strip HTML comments in a single linear pass: remove complete `<!-- … -->`
// blocks, then drop any leftover unterminated `<!-- …` to end-of-string. This
// leaves no `<!--` behind (satisfies CodeQL) without the quadratic re-scan loop
// a malicious deeply-nested body could abuse for CPU-DoS.
const stripComments = (s) =>
s
.replace(/<!--[\s\S]*?-->/g, "")
.replace(/<!--[\s\S]*$/, "")
.trim();
// Strip HTML comments in a single linear pass (indexOf scan): no regex backtracking
// and no loop-until-stable, so a crafted body can't drive it into super-linear time,
// and it leaves no `<!--` behind. An unterminated `<!-- …` drops to end-of-string.
const stripComments = (s) => {
let out = "";
let i = 0;
for (;;) {
const start = s.indexOf("<!--", i);
if (start === -1) {
out += s.slice(i);
break;
}
out += s.slice(i, start);
const end = s.indexOf("-->", start + 4);
if (end === -1) break; // unterminated comment: drop the rest
i = end + 3;
}
return out.trim();
};
// Grab the text under a heading whose title contains `keyword`, up to the next heading
// or the end of the body.