Compare commits

..

2 Commits

Author SHA1 Message Date
Gauvino
d2a0fd875a fix(detect-duplicate): sanitize reposted issue titles
Security audit: the bot echoes other issues' titles back into a comment, so a
maliciously-named issue could ping (@everyone) or inject markdown/HTML. Break
@-mentions with a zero-width space and strip markdown/HTML control chars before
posting.
2026-06-01 20:32:39 +02:00
Gauvino
d2c48de60d ci(issues): flag likely-duplicate issues on open
Adds .github/workflows/detect-duplicate.yml + scripts/detect-duplicate-issue.mjs
(Bun, dep-free, no API key): on a new issue, compares its title/body to open
issues via Jaccard similarity (with light stemming and stop-words), and if the
top matches pass a threshold, posts one comment listing them and adds a
'possible duplicate' label. Inspired by seerr's detect-duplicate, minus the
embedding/Groq dependency.
2026-06-01 17:38:19 +02:00
5 changed files with 329 additions and 70 deletions

View File

@@ -1,29 +1,24 @@
name: 🏷🔀Merge Conflict Labeler
on:
push:
branches: [develop]
# SECURITY: pull_request_target runs with the base repo's write token and secrets.
# This job only labels via the API and is safe ONLY because it never checks out or
# runs the PR head's code. NEVER add `actions/checkout` of the PR head (or any `run:`
# that interpolates PR-controlled data) to this workflow — that would turn it into a
# full repo-compromise vector.
pull_request_target:
branches: [develop]
types: [synchronize]
jobs:
label:
name: 🏷️ Labeling Merge Conflicts
runs-on: ubuntu-24.04
if: ${{ github.repository == 'streamyfin/streamyfin' }}
permissions:
contents: read
pull-requests: write
steps:
- name: 🚩 Apply merge conflict label
uses: eps1lon/actions-label-merge-conflict@1df065ebe6e3310545d4f4c4e862e43bdca146f0 # v3.0.3
with:
dirtyLabel: '⚔️ merge-conflict'
commentOnDirty: 'This pull request has merge conflicts. Please resolve the conflicts so the PR can be successfully reviewed and merged.'
repoToken: '${{ secrets.GITHUB_TOKEN }}'
name: 🏷🔀Merge Conflict Labeler
on:
push:
branches: [develop]
pull_request_target:
branches: [develop]
types: [synchronize]
jobs:
label:
name: 🏷️ Labeling Merge Conflicts
runs-on: ubuntu-24.04
if: ${{ github.repository == 'streamyfin/streamyfin' }}
permissions:
contents: read
pull-requests: write
steps:
- name: 🚩 Apply merge conflict label
uses: eps1lon/actions-label-merge-conflict@1df065ebe6e3310545d4f4c4e862e43bdca146f0 # v3.0.3
with:
dirtyLabel: '⚔️ merge-conflict'
commentOnDirty: 'This pull request has merge conflicts. Please resolve the conflicts so the PR can be successfully reviewed and merged.'
repoToken: '${{ secrets.GITHUB_TOKEN }}'

38
.github/workflows/detect-duplicate.yml vendored Normal file
View File

@@ -0,0 +1,38 @@
name: 🔁 Detect Duplicate Issues
on:
issues:
types: [opened]
permissions:
contents: read
concurrency:
group: detect-duplicate-${{ github.event.issue.number }}
cancel-in-progress: true
jobs:
detect:
name: 🔍 Find similar issues
if: github.actor != 'github-actions[bot]'
runs-on: ubuntu-24.04
permissions:
issues: write
contents: read
steps:
- name: 📥 Checkout repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: 🍞 Setup Bun
uses: oven-sh/setup-bun@0c5077e51419868618aeaa5fe8019c62421857d6 # v2.2.0
with:
bun-version: latest
- name: 🔍 Detect duplicate issues
run: bun scripts/detect-duplicate-issue.mjs
env:
GH_TOKEN: ${{ github.token }}
GITHUB_REPOSITORY: ${{ github.repository }}
ISSUE_NUMBER: ${{ github.event.issue.number }}
ISSUE_TITLE: ${{ github.event.issue.title }}
ISSUE_BODY: ${{ github.event.issue.body }}

View File

@@ -1,22 +1,12 @@
#!/bin/bash
# Local helper: fast-forward master into develop and back. Aborts on any failure and
# restores the branch you started on. Not used in CI.
set -euo pipefail
if [[ -n $(git status --porcelain) ]]; then
echo "Error: working tree is not clean — commit or stash first." >&2
exit 1
fi
start_branch=$(git rev-parse --abbrev-ref HEAD)
trap 'git checkout "$start_branch" >/dev/null 2>&1 || true' EXIT
git checkout master
git pull --ff-only
git checkout develop
git merge master
git push --follow-tags
git checkout master
git merge develop --ff-only
git push
git checkout develop
[[ -z $(git status --porcelain) ]] &&
git checkout master &&
git pull --ff-only &&
git checkout develop &&
git merge master &&
git push --follow-tags &&
git checkout master &&
git merge develop --ff-only &&
git push &&
git checkout develop ||
(echo "Error: Failed to merge" && exit 1)

View File

@@ -0,0 +1,202 @@
#!/usr/bin/env bun
/**
* Flags likely-duplicate issues when a new issue is opened, using lexical similarity
* (Jaccard over word sets of the title and body) — no API key, no embeddings.
*
* On a match it posts ONE comment listing the closest open issues and adds the
* "possible duplicate" label. If nothing is similar enough, it does nothing.
*
* Env:
* GITHUB_REPOSITORY owner/repo
* ISSUE_NUMBER the new issue number
* ISSUE_TITLE the new issue title
* ISSUE_BODY the new issue body
* GH_TOKEN/GITHUB_TOKEN for gh (provided in CI)
* DUP_THRESHOLD similarity threshold 0..1 (default 0.3)
* DUP_MAX max matches to report (default 5)
* DUP_FIXTURE optional path to a JSON array of {number,title,body} (local testing)
* DRY_RUN if set, print results instead of commenting/labelling
*/
import { execFileSync } from "node:child_process";
import { readFileSync } from "node:fs";
const REPO = process.env.GITHUB_REPOSITORY || "streamyfin/streamyfin";
const NUMBER = Number(process.env.ISSUE_NUMBER);
const TITLE = process.env.ISSUE_TITLE || "";
const BODY = process.env.ISSUE_BODY || "";
const THRESHOLD = Number(process.env.DUP_THRESHOLD) || 0.3;
const MAX = Number(process.env.DUP_MAX) || 5;
const DRY = !!process.env.DRY_RUN;
const LABEL = "possible duplicate";
// Generic stop words only — keep domain/feature/platform words (android, downloads,
// subtitles…) since those are exactly what makes two reports the same or different.
const STOP = new Set(
(
"a an the and or but if then of to in on at by for with from as is are was were be been being do does did " +
"it its this that these those i you we they me my your our their he she him her " +
"when while where what which who how why so just then than too very can could would should will " +
"not no nor only own same s t don dont im ive please thanks hi hello also still get got use used using " +
"app application streamyfin issue bug"
).split(/\s+/),
);
const stem = (w) => w.replace(/(ing|ed|es|s)$/, "");
const tokens = (s) =>
(s || "")
.toLowerCase()
.replace(/```[\s\S]*?```/g, " ") // drop code blocks
.replace(/<!--[\s\S]*?-->/g, " ") // drop html comments
.replace(/https?:\/\/\S+/g, " ") // drop urls
.replace(/[^a-z0-9\s]/g, " ")
.split(/\s+/)
.filter((w) => w.length > 2 && !STOP.has(w))
.map(stem)
.filter((w) => w.length > 2);
const jaccard = (a, b) => {
const A = new Set(a);
const B = new Set(b);
if (!A.size || !B.size) return 0;
let inter = 0;
for (const x of A) if (B.has(x)) inter++;
return inter / (A.size + B.size - inter);
};
const newTitle = tokens(TITLE);
const newBody = tokens(BODY);
const score = (o) =>
0.6 * jaccard(newTitle, tokens(o.title)) +
0.4 * jaccard(newBody, tokens(o.body));
// fetch open issues (excluding PRs and the new issue itself)
let issues;
if (process.env.DUP_FIXTURE) {
issues = JSON.parse(readFileSync(process.env.DUP_FIXTURE, "utf8"));
} else {
const raw = execFileSync(
"gh",
[
"api",
`repos/${REPO}/issues`,
"--paginate",
"-X",
"GET",
"-f",
"state=open",
"-f",
"per_page=100",
"--jq",
".[] | select(.pull_request | not) | {number, title, body}",
],
{ encoding: "utf8", maxBuffer: 1e8 },
);
issues = raw
.split("\n")
.filter(Boolean)
.map((l) => JSON.parse(l));
}
const matches = issues
.filter((o) => o.number !== NUMBER)
.map((o) => ({ ...o, s: score(o) }))
.filter((o) => o.s >= THRESHOLD)
.sort((a, b) => b.s - a.s)
.slice(0, MAX);
if (!matches.length) {
console.log("No likely duplicates found.");
process.exit(0);
}
// Neutralise other issues' titles before echoing them back: break @mentions and
// strip markdown/HTML control chars so a maliciously-named issue can't ping people
// or inject formatting into our comment. GitHub linkifies "#123" on its own.
const safeTitle = (t) =>
(t || "")
.replace(/@/g, "@")
.replace(/[`<>|*_~[\]]/g, " ")
.replace(/\s+/g, " ")
.trim()
.slice(0, 140);
const list = matches
.map(
(m) =>
`- #${m.number}${safeTitle(m.title)} (≈ ${Math.round(m.s * 100)}% similar)`,
)
.join("\n");
const comment = [
"<!-- duplicate-detector -->",
"🔍 **This looks like it might be a duplicate.** Possibly related open issues:",
"",
list,
"",
"If yours is different, ignore this — a maintainer will confirm. Otherwise, please 👍 the existing issue and add any extra details there.",
].join("\n");
console.log(`Found ${matches.length} possible duplicate(s):\n${list}`);
if (DRY) {
console.log("\nDRY_RUN: not commenting/labelling.");
process.exit(0);
}
execFileSync(
"gh",
[
"api",
"-X",
"POST",
`repos/${REPO}/issues/${NUMBER}/comments`,
"-f",
`body=${comment}`,
],
{ stdio: "ignore" },
);
try {
execFileSync(
"gh",
[
"api",
"-X",
"POST",
`repos/${REPO}/issues/${NUMBER}/labels`,
"-f",
`labels[]=${LABEL}`,
],
{ stdio: "ignore" },
);
} catch {
// label may not exist yet — create then add
execFileSync(
"gh",
[
"api",
"-X",
"POST",
`repos/${REPO}/labels`,
"-f",
`name=${LABEL}`,
"-f",
"color=fbca04",
"-f",
"description=Automatically flagged as a possible duplicate",
],
{ stdio: "ignore" },
);
execFileSync(
"gh",
[
"api",
"-X",
"POST",
`repos/${REPO}/issues/${NUMBER}/labels`,
"-f",
`labels[]=${LABEL}`,
],
{ stdio: "ignore" },
);
}
console.log("Commented and labelled.");

View File

@@ -1,28 +1,62 @@
#!/usr/bin/env node
// Symlinks the platform-specific native dirs to `ios` / `android` depending on EXPO_TV.
// Uses fs APIs (no shell) so there is no command-injection surface.
const fs = require("node:fs");
const _fs = require("node:fs");
const path = require("node:path");
const process = require("node:process");
const { execSync } = require("node:child_process");
const root = process.cwd();
const isTV = process.env.EXPO_TV && process.env.EXPO_TV !== "0";
// const tvosPath = path.join(root, 'iostv');
// const iosPath = path.join(root, 'iosmobile');
// const androidPath = path.join(root, 'androidmobile');
// const androidTVPath = path.join(root, 'androidtv');
// const device = process.argv[2];
// const platform = process.argv[2];
const isTV = process.env.EXPO_TV || false;
const links = isTV
? { ios: path.join(root, "iostv"), android: path.join(root, "androidtv") }
: {
ios: path.join(root, "iosmobile"),
android: path.join(root, "androidmobile"),
};
const paths = new Map([
["tvos", path.join(root, "iostv")],
["ios", path.join(root, "iosmobile")],
["android", path.join(root, "androidmobile")],
["androidtv", path.join(root, "androidtv")],
]);
for (const [link, target] of Object.entries(links)) {
fs.mkdirSync(target, { recursive: true });
try {
fs.unlinkSync(link); // replace an existing symlink/file (ln -nsf)
} catch {
// nothing to remove
}
fs.symlinkSync(target, link);
console.log(`${link} -> ${target}`);
// const platformPath = paths.get(platform);
if (isTV) {
stdout = execSync(
`mkdir -p ${paths.get("tvos")}; ln -nsf ${paths.get("tvos")} ios`,
);
console.log(stdout.toString());
stdout = execSync(
`mkdir -p ${paths.get("androidtv")}; ln -nsf ${paths.get(
"androidtv",
)} android`,
);
console.log(stdout.toString());
} else {
stdout = execSync(
`mkdir -p ${paths.get("ios")}; ln -nsf ${paths.get("ios")} ios`,
);
console.log(stdout.toString());
stdout = execSync(
`mkdir -p ${paths.get("android")}; ln -nsf ${paths.get("android")} android`,
);
console.log(stdout.toString());
}
// target = "";
// switch (platform) {
// case "tvos":
// target = "ios";
// break;
// case "ios":
// target = "ios";
// break;
// case "android":
// target = "android";
// break;
// case "androidtv":
// target = "android";
// break;
// }