fix(pr-validation): strip HTML comments via linear scan to satisfy CodeQL

Replace the regex-based comment stripper (flagged by CodeQL js/incomplete-multi-character-sanitization, alert #330) with a single linear indexOf scan. Behaviour is identical on complete, unterminated and nested comments, but there is no regex backtracking and no loop-until-stable, so the CodeQL alert clears without reintroducing the CPU-DoS risk.
2026-06-05 13:38:27 +01:00 · 2026-06-05 13:15:39 +02:00
parent 935cacff81
commit 116aff2f8e
1 changed files with 19 additions and 9 deletions
--- a/scripts/check-pr-template.mjs
+++ b/scripts/check-pr-template.mjs
@@ -29,15 +29,25 @@ try {
 const association = (process.env.AUTHOR_ASSOCIATION || "").toUpperCase();
 const isMaintainer = ["OWNER", "MEMBER", "COLLABORATOR"].includes(association);

-// Strip HTML comments in a single linear pass: remove complete `<!-- … -->`
-// blocks, then drop any leftover unterminated `<!-- …` to end-of-string. This
-// leaves no `<!--` behind (satisfies CodeQL) without the quadratic re-scan loop
-// a malicious deeply-nested body could abuse for CPU-DoS.
-const stripComments = (s) =>
-  s
-    .replace(/<!--[\s\S]*?-->/g, "")
-    .replace(/<!--[\s\S]*$/, "")
-    .trim();
+// Strip HTML comments in a single linear pass (indexOf scan): no regex backtracking
+// and no loop-until-stable, so a crafted body can't drive it into super-linear time,
+// and it leaves no `<!--` behind. An unterminated `<!-- …` drops to end-of-string.
+const stripComments = (s) => {
+  let out = "";
+  let i = 0;
+  for (;;) {
+    const start = s.indexOf("<!--", i);
+    if (start === -1) {
+      out += s.slice(i);
+      break;
+    }
+    out += s.slice(i, start);
+    const end = s.indexOf("-->", start + 4);
+    if (end === -1) break; // unterminated comment: drop the rest
+    i = end + 3;
+  }
+  return out.trim();
+};

 // Grab the text under a heading whose title contains `keyword`, up to the next heading
 // or the end of the body.