From 2f3e81fec203bc3491962439a80cb6fed36873f4 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Tue, 28 Apr 2026 08:26:50 +0100 Subject: [PATCH] ci: guard docs against poisoned tool text --- scripts/check-docs-mdx.mjs | 59 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/scripts/check-docs-mdx.mjs b/scripts/check-docs-mdx.mjs index 3d63f121ebb..485683b2383 100644 --- a/scripts/check-docs-mdx.mjs +++ b/scripts/check-docs-mdx.mjs @@ -47,6 +47,33 @@ const MINTLIFY_LANGUAGE_CODES = new Set([ "hu", ]); +const POISON_TEXT_PATTERNS = [ + { + pattern: /\banalysis\s+to=functions\./iu, + message: "Leaked tool-call channel marker.", + }, + { + pattern: /\b(?:commentary|final)\s+to=functions\./iu, + message: "Leaked tool-call channel marker.", + }, + { + pattern: /\bfunctions\.(?:read|write|exec|search|run)\b/iu, + message: "Leaked internal tool name.", + }, + { + pattern: /\b[A-Za-z_\u3400-\u9fff][\w\u3400-\u9fff-]*_input=\{/u, + message: "Leaked tool-call input payload.", + }, + { + pattern: /\/home\/runner\/work\//u, + message: "Leaked GitHub Actions workspace path.", + }, + { + pattern: /彩神马争霸/u, + message: "Known spam/gambling text from a poisoned translation.", + }, +]; + function parseArgs(argv) { const roots = []; let jsonOut = ""; @@ -133,8 +160,40 @@ function checkMintlifyMdxStructure(filePath, raw) { })); } +function lineColumnForIndex(raw, offset) { + const prefix = raw.slice(0, offset); + const lines = prefix.split(/\r?\n/u); + return { + line: lines.length, + column: lines.at(-1).length + 1, + }; +} + +function checkPoisonText(filePath, raw) { + const errors = []; + for (const { pattern, message } of POISON_TEXT_PATTERNS) { + const match = pattern.exec(raw); + if (!match) { + continue; + } + const location = lineColumnForIndex(raw, match.index); + errors.push({ + type: "poison-text", + file: filePath, + line: location.line, + column: location.column, + message, + }); + } + return errors; +} + async function checkMdxFile(filePath) { const raw = fs.readFileSync(filePath, "utf8"); + const poisonErrors = checkPoisonText(filePath, raw); + if (poisonErrors.length > 0) { + return poisonErrors; + } const structureErrors = checkMintlifyMdxStructure(filePath, raw); if (structureErrors.length > 0) { return structureErrors;