#!/usr/bin/env node import { promises as fs } from "node:fs"; import path from "node:path"; import { fileURLToPath } from "node:url"; import { runAsScript } from "./lib/ts-guard-utils.mjs"; const repoRoot = path.resolve(path.dirname(fileURLToPath(import.meta.url)), ".."); const scanExtensions = new Set([".ts", ".js", ".mjs", ".cjs"]); const ignoredDirNames = new Set([ ".artifacts", ".git", ".turbo", "build", "coverage", "dist", "extensions", "node_modules", ]); const allowedFiles = new Set([ "src/agents/tools/web-fetch.test-harness.ts", "src/config/legacy-web-fetch.ts", "src/config/zod-schema.agent-runtime.ts", "src/plugins/bundled-provider-auth-env-vars.generated.ts", "src/secrets/target-registry-data.ts", ]); const suspiciousPatterns = [ /fetchFirecrawlContent/, /firecrawl-fetch-provider\.js/, /createFirecrawlWebFetchProvider/, /providerId:\s*"firecrawl"/, /provider:\s*"firecrawl"/, /id:\s*"firecrawl"/, ]; async function walkFiles(rootDir) { const out = []; let entries = []; try { entries = await fs.readdir(rootDir, { withFileTypes: true }); } catch (error) { if (error && typeof error === "object" && "code" in error && error.code === "ENOENT") { return out; } throw error; } for (const entry of entries) { const entryPath = path.join(rootDir, entry.name); if (entry.isDirectory()) { if (!ignoredDirNames.has(entry.name)) { out.push(...(await walkFiles(entryPath))); } continue; } if (entry.isFile() && scanExtensions.has(path.extname(entry.name))) { out.push(entryPath); } } return out; } function normalizeRepoPath(filePath) { return path.relative(repoRoot, filePath).split(path.sep).join("/"); } export async function collectWebFetchProviderBoundaryViolations() { const files = await walkFiles(path.join(repoRoot, "src")); const violations = []; for (const filePath of files) { const relativeFile = normalizeRepoPath(filePath); if (allowedFiles.has(relativeFile) || relativeFile.includes(".test.")) { continue; } const content = await fs.readFile(filePath, "utf8"); const lines = content.split(/\r?\n/); for (const [index, line] of lines.entries()) { if (!line.includes("firecrawl") && !line.includes("Firecrawl")) { continue; } if (!suspiciousPatterns.some((pattern) => pattern.test(line))) { continue; } violations.push({ file: relativeFile, line: index + 1, reason: "core web-fetch runtime/tooling contains Firecrawl-specific fetch logic", }); } } return violations.toSorted( (left, right) => left.file.localeCompare(right.file) || left.line - right.line, ); } export async function main(argv = process.argv.slice(2), io) { const json = argv.includes("--json"); const violations = await collectWebFetchProviderBoundaryViolations(); const writeStdout = (chunk) => { if (io?.stdout?.write) { io.stdout.write(chunk); return; } process.stdout.write(chunk); }; const writeStderr = (chunk) => { if (io?.stderr?.write) { io.stderr.write(chunk); return; } process.stderr.write(chunk); }; if (json) { writeStdout(`${JSON.stringify(violations, null, 2)}\n`); } else if (violations.length > 0) { for (const violation of violations) { writeStderr(`${violation.file}:${violation.line} ${violation.reason}\n`); } } return violations.length === 0 ? 0 : 1; } runAsScript(import.meta.url, async (argv, io) => { const exitCode = await main(argv, io); if (!io && exitCode !== 0) { process.exit(exitCode); } return exitCode; });