Files
openclaw/scripts/check-web-fetch-provider-boundaries.mjs
Vincent Koc 6eca1949d5 refactor(plugins): tighten web fetch provider boundary (#59646)
* refactor(plugins): tighten web fetch provider boundary

* fix(config): sync fetch secret parity and baseline

* fix(ci): enforce web fetch boundary guard
2026-04-02 20:53:57 +09:00

129 lines
3.6 KiB
JavaScript

#!/usr/bin/env node
import { promises as fs } from "node:fs";
import path from "node:path";
import { fileURLToPath } from "node:url";
import { runAsScript } from "./lib/ts-guard-utils.mjs";
const repoRoot = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "..");
const scanExtensions = new Set([".ts", ".js", ".mjs", ".cjs"]);
const ignoredDirNames = new Set([
".artifacts",
".git",
".turbo",
"build",
"coverage",
"dist",
"extensions",
"node_modules",
]);
const allowedFiles = new Set([
"src/agents/tools/web-fetch.test-harness.ts",
"src/config/legacy-web-fetch.ts",
"src/config/zod-schema.agent-runtime.ts",
"src/plugins/bundled-provider-auth-env-vars.generated.ts",
"src/secrets/target-registry-data.ts",
]);
const suspiciousPatterns = [
/fetchFirecrawlContent/,
/firecrawl-fetch-provider\.js/,
/createFirecrawlWebFetchProvider/,
/providerId:\s*"firecrawl"/,
/provider:\s*"firecrawl"/,
/id:\s*"firecrawl"/,
];
async function walkFiles(rootDir) {
const out = [];
let entries = [];
try {
entries = await fs.readdir(rootDir, { withFileTypes: true });
} catch (error) {
if (error && typeof error === "object" && "code" in error && error.code === "ENOENT") {
return out;
}
throw error;
}
for (const entry of entries) {
const entryPath = path.join(rootDir, entry.name);
if (entry.isDirectory()) {
if (!ignoredDirNames.has(entry.name)) {
out.push(...(await walkFiles(entryPath)));
}
continue;
}
if (entry.isFile() && scanExtensions.has(path.extname(entry.name))) {
out.push(entryPath);
}
}
return out;
}
function normalizeRepoPath(filePath) {
return path.relative(repoRoot, filePath).split(path.sep).join("/");
}
export async function collectWebFetchProviderBoundaryViolations() {
const files = await walkFiles(path.join(repoRoot, "src"));
const violations = [];
for (const filePath of files) {
const relativeFile = normalizeRepoPath(filePath);
if (allowedFiles.has(relativeFile) || relativeFile.includes(".test.")) {
continue;
}
const content = await fs.readFile(filePath, "utf8");
const lines = content.split(/\r?\n/);
for (const [index, line] of lines.entries()) {
if (!line.includes("firecrawl") && !line.includes("Firecrawl")) {
continue;
}
if (!suspiciousPatterns.some((pattern) => pattern.test(line))) {
continue;
}
violations.push({
file: relativeFile,
line: index + 1,
reason: "core web-fetch runtime/tooling contains Firecrawl-specific fetch logic",
});
}
}
return violations.toSorted(
(left, right) => left.file.localeCompare(right.file) || left.line - right.line,
);
}
export async function main(argv = process.argv.slice(2), io) {
const json = argv.includes("--json");
const violations = await collectWebFetchProviderBoundaryViolations();
const writeStdout = (chunk) => {
if (io?.stdout?.write) {
io.stdout.write(chunk);
return;
}
process.stdout.write(chunk);
};
const writeStderr = (chunk) => {
if (io?.stderr?.write) {
io.stderr.write(chunk);
return;
}
process.stderr.write(chunk);
};
if (json) {
writeStdout(`${JSON.stringify(violations, null, 2)}\n`);
} else if (violations.length > 0) {
for (const violation of violations) {
writeStderr(`${violation.file}:${violation.line} ${violation.reason}\n`);
}
}
return violations.length === 0 ? 0 : 1;
}
runAsScript(import.meta.url, async (argv, io) => {
const exitCode = await main(argv, io);
if (!io && exitCode !== 0) {
process.exit(exitCode);
}
return exitCode;
});