ci: guard docs against poisoned tool text

This commit is contained in:
Peter Steinberger
2026-04-28 08:26:50 +01:00
parent bcf4628092
commit 2f3e81fec2

View File

@@ -47,6 +47,33 @@ const MINTLIFY_LANGUAGE_CODES = new Set([
"hu",
]);
const POISON_TEXT_PATTERNS = [
{
pattern: /\banalysis\s+to=functions\./iu,
message: "Leaked tool-call channel marker.",
},
{
pattern: /\b(?:commentary|final)\s+to=functions\./iu,
message: "Leaked tool-call channel marker.",
},
{
pattern: /\bfunctions\.(?:read|write|exec|search|run)\b/iu,
message: "Leaked internal tool name.",
},
{
pattern: /\b[A-Za-z_\u3400-\u9fff][\w\u3400-\u9fff-]*_input=\{/u,
message: "Leaked tool-call input payload.",
},
{
pattern: /\/home\/runner\/work\//u,
message: "Leaked GitHub Actions workspace path.",
},
{
pattern: /彩神马争霸/u,
message: "Known spam/gambling text from a poisoned translation.",
},
];
function parseArgs(argv) {
const roots = [];
let jsonOut = "";
@@ -133,8 +160,40 @@ function checkMintlifyMdxStructure(filePath, raw) {
}));
}
function lineColumnForIndex(raw, offset) {
const prefix = raw.slice(0, offset);
const lines = prefix.split(/\r?\n/u);
return {
line: lines.length,
column: lines.at(-1).length + 1,
};
}
function checkPoisonText(filePath, raw) {
const errors = [];
for (const { pattern, message } of POISON_TEXT_PATTERNS) {
const match = pattern.exec(raw);
if (!match) {
continue;
}
const location = lineColumnForIndex(raw, match.index);
errors.push({
type: "poison-text",
file: filePath,
line: location.line,
column: location.column,
message,
});
}
return errors;
}
async function checkMdxFile(filePath) {
const raw = fs.readFileSync(filePath, "utf8");
const poisonErrors = checkPoisonText(filePath, raw);
if (poisonErrors.length > 0) {
return poisonErrors;
}
const structureErrors = checkMintlifyMdxStructure(filePath, raw);
if (structureErrors.length > 0) {
return structureErrors;