mirror of
https://github.com/openclaw/openclaw.git
synced 2026-04-03 21:31:26 +00:00
refactor(plugins): tighten web fetch provider boundary (#59646)
* refactor(plugins): tighten web fetch provider boundary * fix(config): sync fetch secret parity and baseline * fix(ci): enforce web fetch boundary guard
This commit is contained in:
128
scripts/check-web-fetch-provider-boundaries.mjs
Normal file
128
scripts/check-web-fetch-provider-boundaries.mjs
Normal file
@@ -0,0 +1,128 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
import { promises as fs } from "node:fs";
|
||||
import path from "node:path";
|
||||
import { fileURLToPath } from "node:url";
|
||||
import { runAsScript } from "./lib/ts-guard-utils.mjs";
|
||||
|
||||
const repoRoot = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "..");
|
||||
const scanExtensions = new Set([".ts", ".js", ".mjs", ".cjs"]);
|
||||
const ignoredDirNames = new Set([
|
||||
".artifacts",
|
||||
".git",
|
||||
".turbo",
|
||||
"build",
|
||||
"coverage",
|
||||
"dist",
|
||||
"extensions",
|
||||
"node_modules",
|
||||
]);
|
||||
const allowedFiles = new Set([
|
||||
"src/agents/tools/web-fetch.test-harness.ts",
|
||||
"src/config/legacy-web-fetch.ts",
|
||||
"src/config/zod-schema.agent-runtime.ts",
|
||||
"src/plugins/bundled-provider-auth-env-vars.generated.ts",
|
||||
"src/secrets/target-registry-data.ts",
|
||||
]);
|
||||
const suspiciousPatterns = [
|
||||
/fetchFirecrawlContent/,
|
||||
/firecrawl-fetch-provider\.js/,
|
||||
/createFirecrawlWebFetchProvider/,
|
||||
/providerId:\s*"firecrawl"/,
|
||||
/provider:\s*"firecrawl"/,
|
||||
/id:\s*"firecrawl"/,
|
||||
];
|
||||
|
||||
async function walkFiles(rootDir) {
|
||||
const out = [];
|
||||
let entries = [];
|
||||
try {
|
||||
entries = await fs.readdir(rootDir, { withFileTypes: true });
|
||||
} catch (error) {
|
||||
if (error && typeof error === "object" && "code" in error && error.code === "ENOENT") {
|
||||
return out;
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
for (const entry of entries) {
|
||||
const entryPath = path.join(rootDir, entry.name);
|
||||
if (entry.isDirectory()) {
|
||||
if (!ignoredDirNames.has(entry.name)) {
|
||||
out.push(...(await walkFiles(entryPath)));
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (entry.isFile() && scanExtensions.has(path.extname(entry.name))) {
|
||||
out.push(entryPath);
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
function normalizeRepoPath(filePath) {
|
||||
return path.relative(repoRoot, filePath).split(path.sep).join("/");
|
||||
}
|
||||
|
||||
export async function collectWebFetchProviderBoundaryViolations() {
|
||||
const files = await walkFiles(path.join(repoRoot, "src"));
|
||||
const violations = [];
|
||||
for (const filePath of files) {
|
||||
const relativeFile = normalizeRepoPath(filePath);
|
||||
if (allowedFiles.has(relativeFile) || relativeFile.includes(".test.")) {
|
||||
continue;
|
||||
}
|
||||
const content = await fs.readFile(filePath, "utf8");
|
||||
const lines = content.split(/\r?\n/);
|
||||
for (const [index, line] of lines.entries()) {
|
||||
if (!line.includes("firecrawl") && !line.includes("Firecrawl")) {
|
||||
continue;
|
||||
}
|
||||
if (!suspiciousPatterns.some((pattern) => pattern.test(line))) {
|
||||
continue;
|
||||
}
|
||||
violations.push({
|
||||
file: relativeFile,
|
||||
line: index + 1,
|
||||
reason: "core web-fetch runtime/tooling contains Firecrawl-specific fetch logic",
|
||||
});
|
||||
}
|
||||
}
|
||||
return violations.toSorted(
|
||||
(left, right) => left.file.localeCompare(right.file) || left.line - right.line,
|
||||
);
|
||||
}
|
||||
|
||||
export async function main(argv = process.argv.slice(2), io) {
|
||||
const json = argv.includes("--json");
|
||||
const violations = await collectWebFetchProviderBoundaryViolations();
|
||||
const writeStdout = (chunk) => {
|
||||
if (io?.stdout?.write) {
|
||||
io.stdout.write(chunk);
|
||||
return;
|
||||
}
|
||||
process.stdout.write(chunk);
|
||||
};
|
||||
const writeStderr = (chunk) => {
|
||||
if (io?.stderr?.write) {
|
||||
io.stderr.write(chunk);
|
||||
return;
|
||||
}
|
||||
process.stderr.write(chunk);
|
||||
};
|
||||
if (json) {
|
||||
writeStdout(`${JSON.stringify(violations, null, 2)}\n`);
|
||||
} else if (violations.length > 0) {
|
||||
for (const violation of violations) {
|
||||
writeStderr(`${violation.file}:${violation.line} ${violation.reason}\n`);
|
||||
}
|
||||
}
|
||||
return violations.length === 0 ? 0 : 1;
|
||||
}
|
||||
|
||||
runAsScript(import.meta.url, async (argv, io) => {
|
||||
const exitCode = await main(argv, io);
|
||||
if (!io && exitCode !== 0) {
|
||||
process.exit(exitCode);
|
||||
}
|
||||
return exitCode;
|
||||
});
|
||||
@@ -1,4 +1,5 @@
|
||||
import { extractReadableContent, fetchFirecrawlContent } from "../src/agents/tools/web-tools.js";
|
||||
import { fetchFirecrawlContent } from "../extensions/firecrawl/api.ts";
|
||||
import { extractReadableContent } from "../src/agents/tools/web-tools.js";
|
||||
|
||||
const DEFAULT_URLS = [
|
||||
"https://en.wikipedia.org/wiki/Web_scraping",
|
||||
|
||||
Reference in New Issue
Block a user