mirror of
https://github.com/openclaw/openclaw.git
synced 2026-04-02 21:01:51 +00:00
refactor(plugins): tighten web fetch provider boundary (#59646)
* refactor(plugins): tighten web fetch provider boundary * fix(config): sync fetch secret parity and baseline * fix(ci): enforce web fetch boundary guard
This commit is contained in:
7
.github/workflows/ci.yml
vendored
7
.github/workflows/ci.yml
vendored
@@ -546,6 +546,11 @@ jobs:
|
||||
continue-on-error: true
|
||||
run: pnpm run lint:web-search-provider-boundaries
|
||||
|
||||
- name: Run web fetch provider boundary guard
|
||||
id: web_fetch_provider_boundary
|
||||
continue-on-error: true
|
||||
run: pnpm run lint:web-fetch-provider-boundaries
|
||||
|
||||
- name: Run extension src boundary guard
|
||||
id: extension_src_outside_plugin_sdk_boundary
|
||||
continue-on-error: true
|
||||
@@ -593,6 +598,7 @@ jobs:
|
||||
NO_EXTENSION_TEST_CORE_IMPORTS_OUTCOME: ${{ steps.no_extension_test_core_imports.outcome }}
|
||||
PLUGIN_SDK_SUBPATHS_EXPORTED_OUTCOME: ${{ steps.plugin_sdk_subpaths_exported.outcome }}
|
||||
WEB_SEARCH_PROVIDER_BOUNDARY_OUTCOME: ${{ steps.web_search_provider_boundary.outcome }}
|
||||
WEB_FETCH_PROVIDER_BOUNDARY_OUTCOME: ${{ steps.web_fetch_provider_boundary.outcome }}
|
||||
EXTENSION_SRC_OUTSIDE_PLUGIN_SDK_BOUNDARY_OUTCOME: ${{ steps.extension_src_outside_plugin_sdk_boundary.outcome }}
|
||||
EXTENSION_PLUGIN_SDK_INTERNAL_BOUNDARY_OUTCOME: ${{ steps.extension_plugin_sdk_internal_boundary.outcome }}
|
||||
EXTENSION_RELATIVE_OUTSIDE_PACKAGE_BOUNDARY_OUTCOME: ${{ steps.extension_relative_outside_package_boundary.outcome }}
|
||||
@@ -612,6 +618,7 @@ jobs:
|
||||
"lint:plugins:no-extension-test-core-imports|$NO_EXTENSION_TEST_CORE_IMPORTS_OUTCOME" \
|
||||
"lint:plugins:plugin-sdk-subpaths-exported|$PLUGIN_SDK_SUBPATHS_EXPORTED_OUTCOME" \
|
||||
"web-search-provider-boundary|$WEB_SEARCH_PROVIDER_BOUNDARY_OUTCOME" \
|
||||
"web-fetch-provider-boundary|$WEB_FETCH_PROVIDER_BOUNDARY_OUTCOME" \
|
||||
"extension-src-outside-plugin-sdk-boundary|$EXTENSION_SRC_OUTSIDE_PLUGIN_SDK_BOUNDARY_OUTCOME" \
|
||||
"extension-plugin-sdk-internal-boundary|$EXTENSION_PLUGIN_SDK_INTERNAL_BOUNDARY_OUTCOME" \
|
||||
"extension-relative-outside-package-boundary|$EXTENSION_RELATIVE_OUTSIDE_PACKAGE_BOUNDARY_OUTCOME" \
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
{"generatedBy":"scripts/generate-config-doc-baseline.ts","recordType":"meta","totalPaths":5767}
|
||||
{"generatedBy":"scripts/generate-config-doc-baseline.ts","recordType":"meta","totalPaths":5764}
|
||||
{"recordType":"path","path":"acp","kind":"core","type":"object","required":false,"deprecated":false,"sensitive":false,"tags":["advanced"],"label":"ACP","help":"ACP runtime controls for enabling dispatch, selecting backends, constraining allowed agent targets, and tuning streamed turn projection behavior.","hasChildren":true}
|
||||
{"recordType":"path","path":"acp.allowedAgents","kind":"core","type":"array","required":false,"deprecated":false,"sensitive":false,"tags":["access"],"label":"ACP Allowed Agents","help":"Allowlist of ACP target agent ids permitted for ACP runtime sessions. Empty means no additional allowlist restriction.","hasChildren":true}
|
||||
{"recordType":"path","path":"acp.allowedAgents.*","kind":"core","type":"string","required":false,"deprecated":false,"sensitive":false,"tags":[],"hasChildren":false}
|
||||
@@ -4402,6 +4402,12 @@
|
||||
{"recordType":"path","path":"plugins.entries.feishu.subagent.allowModelOverride","kind":"plugin","type":"boolean","required":false,"deprecated":false,"sensitive":false,"tags":["access"],"label":"Allow Plugin Subagent Model Override","help":"Explicitly allows this plugin to request provider/model overrides in background subagent runs. Keep false unless the plugin is trusted to steer model selection.","hasChildren":false}
|
||||
{"recordType":"path","path":"plugins.entries.firecrawl","kind":"plugin","type":"object","required":false,"deprecated":false,"sensitive":false,"tags":["advanced"],"label":"@openclaw/firecrawl-plugin","help":"OpenClaw Firecrawl plugin (plugin: firecrawl)","hasChildren":true}
|
||||
{"recordType":"path","path":"plugins.entries.firecrawl.config","kind":"plugin","type":"object","required":false,"deprecated":false,"sensitive":false,"tags":["advanced"],"label":"@openclaw/firecrawl-plugin Config","help":"Plugin-defined config payload for firecrawl.","hasChildren":true}
|
||||
{"recordType":"path","path":"plugins.entries.firecrawl.config.webFetch","kind":"plugin","type":"object","required":false,"deprecated":false,"sensitive":false,"tags":[],"hasChildren":true}
|
||||
{"recordType":"path","path":"plugins.entries.firecrawl.config.webFetch.apiKey","kind":"plugin","type":["object","string"],"required":false,"deprecated":false,"sensitive":true,"tags":["auth","security"],"label":"Firecrawl Fetch API Key","help":"Firecrawl API key for web fetch fallback (fallback: FIRECRAWL_API_KEY env var).","hasChildren":false}
|
||||
{"recordType":"path","path":"plugins.entries.firecrawl.config.webFetch.baseUrl","kind":"plugin","type":"string","required":false,"deprecated":false,"sensitive":false,"tags":["advanced","url-secret"],"label":"Firecrawl Fetch Base URL","help":"Firecrawl Fetch base URL override.","hasChildren":false}
|
||||
{"recordType":"path","path":"plugins.entries.firecrawl.config.webFetch.maxAgeMs","kind":"plugin","type":"number","required":false,"deprecated":false,"sensitive":false,"tags":[],"hasChildren":false}
|
||||
{"recordType":"path","path":"plugins.entries.firecrawl.config.webFetch.onlyMainContent","kind":"plugin","type":"boolean","required":false,"deprecated":false,"sensitive":false,"tags":[],"hasChildren":false}
|
||||
{"recordType":"path","path":"plugins.entries.firecrawl.config.webFetch.timeoutSeconds","kind":"plugin","type":"number","required":false,"deprecated":false,"sensitive":false,"tags":[],"hasChildren":false}
|
||||
{"recordType":"path","path":"plugins.entries.firecrawl.config.webSearch","kind":"plugin","type":"object","required":false,"deprecated":false,"sensitive":false,"tags":[],"hasChildren":true}
|
||||
{"recordType":"path","path":"plugins.entries.firecrawl.config.webSearch.apiKey","kind":"plugin","type":["object","string"],"required":false,"deprecated":false,"sensitive":true,"tags":["auth","security"],"label":"Firecrawl Search API Key","help":"Firecrawl API key for web search (fallback: FIRECRAWL_API_KEY env var).","hasChildren":false}
|
||||
{"recordType":"path","path":"plugins.entries.firecrawl.config.webSearch.baseUrl","kind":"plugin","type":"string","required":false,"deprecated":false,"sensitive":false,"tags":["advanced","url-secret"],"label":"Firecrawl Search Base URL","help":"Firecrawl Search base URL override.","hasChildren":false}
|
||||
@@ -5694,20 +5700,11 @@
|
||||
{"recordType":"path","path":"tools.web.fetch","kind":"core","type":"object","required":false,"deprecated":false,"sensitive":false,"tags":[],"hasChildren":true}
|
||||
{"recordType":"path","path":"tools.web.fetch.cacheTtlMinutes","kind":"core","type":"number","required":false,"deprecated":false,"sensitive":false,"tags":["performance","storage","tools"],"label":"Web Fetch Cache TTL (min)","help":"Cache TTL in minutes for web_fetch results.","hasChildren":false}
|
||||
{"recordType":"path","path":"tools.web.fetch.enabled","kind":"core","type":"boolean","required":false,"deprecated":false,"sensitive":false,"tags":["tools"],"label":"Enable Web Fetch Tool","help":"Enable the web_fetch tool (lightweight HTTP fetch).","hasChildren":false}
|
||||
{"recordType":"path","path":"tools.web.fetch.firecrawl","kind":"core","type":"object","required":false,"deprecated":false,"sensitive":false,"tags":[],"hasChildren":true}
|
||||
{"recordType":"path","path":"tools.web.fetch.firecrawl.apiKey","kind":"core","type":["object","string"],"required":false,"deprecated":false,"sensitive":true,"tags":["auth","security","tools"],"label":"Firecrawl API Key","help":"Firecrawl API key (fallback: FIRECRAWL_API_KEY env var).","hasChildren":true}
|
||||
{"recordType":"path","path":"tools.web.fetch.firecrawl.apiKey.id","kind":"core","type":"string","required":true,"deprecated":false,"sensitive":false,"tags":[],"hasChildren":false}
|
||||
{"recordType":"path","path":"tools.web.fetch.firecrawl.apiKey.provider","kind":"core","type":"string","required":true,"deprecated":false,"sensitive":false,"tags":[],"hasChildren":false}
|
||||
{"recordType":"path","path":"tools.web.fetch.firecrawl.apiKey.source","kind":"core","type":"string","required":true,"deprecated":false,"sensitive":false,"tags":[],"hasChildren":false}
|
||||
{"recordType":"path","path":"tools.web.fetch.firecrawl.baseUrl","kind":"core","type":"string","required":false,"deprecated":false,"sensitive":false,"tags":["tools","url-secret"],"label":"Firecrawl Base URL","help":"Firecrawl base URL (e.g. https://api.firecrawl.dev or custom endpoint).","hasChildren":false}
|
||||
{"recordType":"path","path":"tools.web.fetch.firecrawl.enabled","kind":"core","type":"boolean","required":false,"deprecated":false,"sensitive":false,"tags":["tools"],"label":"Enable Firecrawl Fallback","help":"Enable Firecrawl fallback for web_fetch (if configured).","hasChildren":false}
|
||||
{"recordType":"path","path":"tools.web.fetch.firecrawl.maxAgeMs","kind":"core","type":"integer","required":false,"deprecated":false,"sensitive":false,"tags":["performance","tools"],"label":"Firecrawl Cache Max Age (ms)","help":"Firecrawl maxAge (ms) for cached results when supported by the API.","hasChildren":false}
|
||||
{"recordType":"path","path":"tools.web.fetch.firecrawl.onlyMainContent","kind":"core","type":"boolean","required":false,"deprecated":false,"sensitive":false,"tags":["tools"],"label":"Firecrawl Main Content Only","help":"When true, Firecrawl returns only the main content (default: true).","hasChildren":false}
|
||||
{"recordType":"path","path":"tools.web.fetch.firecrawl.timeoutSeconds","kind":"core","type":"integer","required":false,"deprecated":false,"sensitive":false,"tags":["performance","tools"],"label":"Firecrawl Timeout (sec)","help":"Timeout in seconds for Firecrawl requests.","hasChildren":false}
|
||||
{"recordType":"path","path":"tools.web.fetch.maxChars","kind":"core","type":"integer","required":false,"deprecated":false,"sensitive":false,"tags":["performance","tools"],"label":"Web Fetch Max Chars","help":"Max characters returned by web_fetch (truncated).","hasChildren":false}
|
||||
{"recordType":"path","path":"tools.web.fetch.maxCharsCap","kind":"core","type":"integer","required":false,"deprecated":false,"sensitive":false,"tags":["performance","tools"],"label":"Web Fetch Hard Max Chars","help":"Hard cap for web_fetch maxChars (applies to config and tool calls).","hasChildren":false}
|
||||
{"recordType":"path","path":"tools.web.fetch.maxRedirects","kind":"core","type":"integer","required":false,"deprecated":false,"sensitive":false,"tags":["performance","storage","tools"],"label":"Web Fetch Max Redirects","help":"Maximum redirects allowed for web_fetch (default: 3).","hasChildren":false}
|
||||
{"recordType":"path","path":"tools.web.fetch.maxResponseBytes","kind":"core","type":"integer","required":false,"deprecated":false,"sensitive":false,"tags":["performance","tools"],"label":"Web Fetch Max Download Size (bytes)","help":"Max download size before truncation.","hasChildren":false}
|
||||
{"recordType":"path","path":"tools.web.fetch.provider","kind":"core","type":"string","required":false,"deprecated":false,"sensitive":false,"tags":["tools"],"label":"Web Fetch Provider","help":"Web fetch fallback provider id.","hasChildren":false}
|
||||
{"recordType":"path","path":"tools.web.fetch.readability","kind":"core","type":"boolean","required":false,"deprecated":false,"sensitive":false,"tags":["tools"],"label":"Web Fetch Readability Extraction","help":"Use Readability to extract main content from HTML (fallbacks to basic HTML cleanup).","hasChildren":false}
|
||||
{"recordType":"path","path":"tools.web.fetch.timeoutSeconds","kind":"core","type":"integer","required":false,"deprecated":false,"sensitive":false,"tags":["performance","tools"],"label":"Web Fetch Timeout (sec)","help":"Timeout in seconds for web_fetch requests.","hasChildren":false}
|
||||
{"recordType":"path","path":"tools.web.fetch.userAgent","kind":"core","type":"string","required":false,"deprecated":false,"sensitive":false,"tags":["tools"],"label":"Web Fetch User-Agent","help":"Override User-Agent header for web_fetch requests.","hasChildren":false}
|
||||
|
||||
66
extensions/firecrawl/api.ts
Normal file
66
extensions/firecrawl/api.ts
Normal file
@@ -0,0 +1,66 @@
|
||||
import type { OpenClawConfig } from "openclaw/plugin-sdk/config-runtime";
|
||||
import { runFirecrawlScrape } from "./src/firecrawl-client.js";
|
||||
|
||||
export type FetchFirecrawlContentParams = {
|
||||
url: string;
|
||||
extractMode: "markdown" | "text";
|
||||
apiKey: string;
|
||||
baseUrl: string;
|
||||
onlyMainContent: boolean;
|
||||
maxAgeMs: number;
|
||||
proxy: "auto" | "basic" | "stealth";
|
||||
storeInCache: boolean;
|
||||
timeoutSeconds: number;
|
||||
maxChars?: number;
|
||||
};
|
||||
|
||||
export type FetchFirecrawlContentResult = {
|
||||
text: string;
|
||||
title?: string;
|
||||
finalUrl?: string;
|
||||
status?: number;
|
||||
warning?: string;
|
||||
};
|
||||
|
||||
export async function fetchFirecrawlContent(
|
||||
params: FetchFirecrawlContentParams,
|
||||
): Promise<FetchFirecrawlContentResult> {
|
||||
const cfg: OpenClawConfig = {
|
||||
plugins: {
|
||||
entries: {
|
||||
firecrawl: {
|
||||
enabled: true,
|
||||
config: {
|
||||
webFetch: {
|
||||
apiKey: params.apiKey,
|
||||
baseUrl: params.baseUrl,
|
||||
onlyMainContent: params.onlyMainContent,
|
||||
maxAgeMs: params.maxAgeMs,
|
||||
timeoutSeconds: params.timeoutSeconds,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
const result = await runFirecrawlScrape({
|
||||
cfg,
|
||||
url: params.url,
|
||||
extractMode: params.extractMode,
|
||||
maxChars: params.maxChars,
|
||||
proxy: params.proxy,
|
||||
storeInCache: params.storeInCache,
|
||||
onlyMainContent: params.onlyMainContent,
|
||||
maxAgeMs: params.maxAgeMs,
|
||||
timeoutSeconds: params.timeoutSeconds,
|
||||
});
|
||||
|
||||
return {
|
||||
text: typeof result.text === "string" ? result.text : "",
|
||||
title: typeof result.title === "string" ? result.title : undefined,
|
||||
finalUrl: typeof result.finalUrl === "string" ? result.finalUrl : undefined,
|
||||
status: typeof result.status === "number" ? result.status : undefined,
|
||||
warning: typeof result.warning === "string" ? result.warning : undefined,
|
||||
};
|
||||
}
|
||||
@@ -29,13 +29,17 @@ vi.mock("./firecrawl-client.js", () => ({
|
||||
|
||||
describe("firecrawl tools", () => {
|
||||
const priorFetch = global.fetch;
|
||||
let fetchFirecrawlContent: typeof import("../api.js").fetchFirecrawlContent;
|
||||
let createFirecrawlWebSearchProvider: typeof import("./firecrawl-search-provider.js").createFirecrawlWebSearchProvider;
|
||||
let createFirecrawlWebFetchProvider: typeof import("./firecrawl-fetch-provider.js").createFirecrawlWebFetchProvider;
|
||||
let createFirecrawlSearchTool: typeof import("./firecrawl-search-tool.js").createFirecrawlSearchTool;
|
||||
let createFirecrawlScrapeTool: typeof import("./firecrawl-scrape-tool.js").createFirecrawlScrapeTool;
|
||||
let firecrawlClientTesting: typeof import("./firecrawl-client.js").__testing;
|
||||
|
||||
beforeAll(async () => {
|
||||
vi.resetModules();
|
||||
({ fetchFirecrawlContent } = await import("../api.js"));
|
||||
({ createFirecrawlWebFetchProvider } = await import("./firecrawl-fetch-provider.js"));
|
||||
({ createFirecrawlWebSearchProvider } = await import("./firecrawl-search-provider.js"));
|
||||
({ createFirecrawlSearchTool } = await import("./firecrawl-search-tool.js"));
|
||||
({ createFirecrawlScrapeTool } = await import("./firecrawl-scrape-tool.js"));
|
||||
@@ -199,6 +203,62 @@ describe("firecrawl tools", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("keeps the compare-helper fetch facade owned by the Firecrawl extension", async () => {
|
||||
await fetchFirecrawlContent({
|
||||
url: "https://docs.openclaw.ai",
|
||||
extractMode: "markdown",
|
||||
apiKey: "firecrawl-key",
|
||||
baseUrl: "https://api.firecrawl.dev",
|
||||
onlyMainContent: false,
|
||||
maxAgeMs: 5000,
|
||||
proxy: "stealth",
|
||||
storeInCache: false,
|
||||
timeoutSeconds: 22,
|
||||
maxChars: 1500,
|
||||
});
|
||||
|
||||
expect(runFirecrawlScrape).toHaveBeenCalledWith({
|
||||
cfg: {
|
||||
plugins: {
|
||||
entries: {
|
||||
firecrawl: {
|
||||
enabled: true,
|
||||
config: {
|
||||
webFetch: {
|
||||
apiKey: "firecrawl-key",
|
||||
baseUrl: "https://api.firecrawl.dev",
|
||||
onlyMainContent: false,
|
||||
maxAgeMs: 5000,
|
||||
timeoutSeconds: 22,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
url: "https://docs.openclaw.ai",
|
||||
extractMode: "markdown",
|
||||
maxChars: 1500,
|
||||
proxy: "stealth",
|
||||
storeInCache: false,
|
||||
onlyMainContent: false,
|
||||
maxAgeMs: 5000,
|
||||
timeoutSeconds: 22,
|
||||
});
|
||||
});
|
||||
|
||||
it("applies minimal provider-selection config for fetch providers", () => {
|
||||
const provider = createFirecrawlWebFetchProvider();
|
||||
if (!provider.applySelectionConfig) {
|
||||
throw new Error("Expected applySelectionConfig to be defined");
|
||||
}
|
||||
const applied = provider.applySelectionConfig({});
|
||||
|
||||
expect(provider.id).toBe("firecrawl");
|
||||
expect(provider.credentialPath).toBe("plugins.entries.firecrawl.config.webFetch.apiKey");
|
||||
expect(applied.plugins?.entries?.firecrawl?.enabled).toBe(true);
|
||||
});
|
||||
|
||||
it("passes proxy and storeInCache through the fetch provider tool", async () => {
|
||||
const { createFirecrawlWebFetchProvider } = await import("./firecrawl-fetch-provider.js");
|
||||
const provider = createFirecrawlWebFetchProvider();
|
||||
|
||||
@@ -1112,6 +1112,7 @@
|
||||
"lint:tmp:no-random-messaging": "node scripts/check-no-random-messaging-tmp.mjs",
|
||||
"lint:tmp:no-raw-channel-fetch": "node scripts/check-no-raw-channel-fetch.mjs",
|
||||
"lint:ui:no-raw-window-open": "node scripts/check-no-raw-window-open.mjs",
|
||||
"lint:web-fetch-provider-boundaries": "node scripts/check-web-fetch-provider-boundaries.mjs",
|
||||
"lint:web-search-provider-boundaries": "node scripts/check-web-search-provider-boundaries.mjs",
|
||||
"lint:webhook:no-low-level-body-read": "node scripts/check-webhook-auth-body-order.mjs",
|
||||
"mac:open": "open dist/OpenClaw.app",
|
||||
|
||||
128
scripts/check-web-fetch-provider-boundaries.mjs
Normal file
128
scripts/check-web-fetch-provider-boundaries.mjs
Normal file
@@ -0,0 +1,128 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
import { promises as fs } from "node:fs";
|
||||
import path from "node:path";
|
||||
import { fileURLToPath } from "node:url";
|
||||
import { runAsScript } from "./lib/ts-guard-utils.mjs";
|
||||
|
||||
const repoRoot = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "..");
|
||||
const scanExtensions = new Set([".ts", ".js", ".mjs", ".cjs"]);
|
||||
const ignoredDirNames = new Set([
|
||||
".artifacts",
|
||||
".git",
|
||||
".turbo",
|
||||
"build",
|
||||
"coverage",
|
||||
"dist",
|
||||
"extensions",
|
||||
"node_modules",
|
||||
]);
|
||||
const allowedFiles = new Set([
|
||||
"src/agents/tools/web-fetch.test-harness.ts",
|
||||
"src/config/legacy-web-fetch.ts",
|
||||
"src/config/zod-schema.agent-runtime.ts",
|
||||
"src/plugins/bundled-provider-auth-env-vars.generated.ts",
|
||||
"src/secrets/target-registry-data.ts",
|
||||
]);
|
||||
const suspiciousPatterns = [
|
||||
/fetchFirecrawlContent/,
|
||||
/firecrawl-fetch-provider\.js/,
|
||||
/createFirecrawlWebFetchProvider/,
|
||||
/providerId:\s*"firecrawl"/,
|
||||
/provider:\s*"firecrawl"/,
|
||||
/id:\s*"firecrawl"/,
|
||||
];
|
||||
|
||||
async function walkFiles(rootDir) {
|
||||
const out = [];
|
||||
let entries = [];
|
||||
try {
|
||||
entries = await fs.readdir(rootDir, { withFileTypes: true });
|
||||
} catch (error) {
|
||||
if (error && typeof error === "object" && "code" in error && error.code === "ENOENT") {
|
||||
return out;
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
for (const entry of entries) {
|
||||
const entryPath = path.join(rootDir, entry.name);
|
||||
if (entry.isDirectory()) {
|
||||
if (!ignoredDirNames.has(entry.name)) {
|
||||
out.push(...(await walkFiles(entryPath)));
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (entry.isFile() && scanExtensions.has(path.extname(entry.name))) {
|
||||
out.push(entryPath);
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
function normalizeRepoPath(filePath) {
|
||||
return path.relative(repoRoot, filePath).split(path.sep).join("/");
|
||||
}
|
||||
|
||||
export async function collectWebFetchProviderBoundaryViolations() {
|
||||
const files = await walkFiles(path.join(repoRoot, "src"));
|
||||
const violations = [];
|
||||
for (const filePath of files) {
|
||||
const relativeFile = normalizeRepoPath(filePath);
|
||||
if (allowedFiles.has(relativeFile) || relativeFile.includes(".test.")) {
|
||||
continue;
|
||||
}
|
||||
const content = await fs.readFile(filePath, "utf8");
|
||||
const lines = content.split(/\r?\n/);
|
||||
for (const [index, line] of lines.entries()) {
|
||||
if (!line.includes("firecrawl") && !line.includes("Firecrawl")) {
|
||||
continue;
|
||||
}
|
||||
if (!suspiciousPatterns.some((pattern) => pattern.test(line))) {
|
||||
continue;
|
||||
}
|
||||
violations.push({
|
||||
file: relativeFile,
|
||||
line: index + 1,
|
||||
reason: "core web-fetch runtime/tooling contains Firecrawl-specific fetch logic",
|
||||
});
|
||||
}
|
||||
}
|
||||
return violations.toSorted(
|
||||
(left, right) => left.file.localeCompare(right.file) || left.line - right.line,
|
||||
);
|
||||
}
|
||||
|
||||
export async function main(argv = process.argv.slice(2), io) {
|
||||
const json = argv.includes("--json");
|
||||
const violations = await collectWebFetchProviderBoundaryViolations();
|
||||
const writeStdout = (chunk) => {
|
||||
if (io?.stdout?.write) {
|
||||
io.stdout.write(chunk);
|
||||
return;
|
||||
}
|
||||
process.stdout.write(chunk);
|
||||
};
|
||||
const writeStderr = (chunk) => {
|
||||
if (io?.stderr?.write) {
|
||||
io.stderr.write(chunk);
|
||||
return;
|
||||
}
|
||||
process.stderr.write(chunk);
|
||||
};
|
||||
if (json) {
|
||||
writeStdout(`${JSON.stringify(violations, null, 2)}\n`);
|
||||
} else if (violations.length > 0) {
|
||||
for (const violation of violations) {
|
||||
writeStderr(`${violation.file}:${violation.line} ${violation.reason}\n`);
|
||||
}
|
||||
}
|
||||
return violations.length === 0 ? 0 : 1;
|
||||
}
|
||||
|
||||
runAsScript(import.meta.url, async (argv, io) => {
|
||||
const exitCode = await main(argv, io);
|
||||
if (!io && exitCode !== 0) {
|
||||
process.exit(exitCode);
|
||||
}
|
||||
return exitCode;
|
||||
});
|
||||
@@ -1,4 +1,5 @@
|
||||
import { extractReadableContent, fetchFirecrawlContent } from "../src/agents/tools/web-tools.js";
|
||||
import { fetchFirecrawlContent } from "../extensions/firecrawl/api.ts";
|
||||
import { extractReadableContent } from "../src/agents/tools/web-tools.js";
|
||||
|
||||
const DEFAULT_URLS = [
|
||||
"https://en.wikipedia.org/wiki/Web_scraping",
|
||||
|
||||
@@ -67,19 +67,6 @@ type WebFetchConfig = NonNullable<OpenClawConfig["tools"]>["web"] extends infer
|
||||
: undefined
|
||||
: undefined;
|
||||
|
||||
export type FetchFirecrawlContentParams = {
|
||||
url: string;
|
||||
extractMode: ExtractMode;
|
||||
apiKey: string;
|
||||
baseUrl: string;
|
||||
onlyMainContent: boolean;
|
||||
maxAgeMs: number;
|
||||
proxy: "auto" | "basic" | "stealth";
|
||||
storeInCache: boolean;
|
||||
timeoutSeconds: number;
|
||||
maxChars?: number;
|
||||
};
|
||||
|
||||
function resolveFetchConfig(cfg?: OpenClawConfig): WebFetchConfig {
|
||||
const fetch = cfg?.tools?.web?.fetch;
|
||||
if (!fetch || typeof fetch !== "object") {
|
||||
@@ -247,65 +234,6 @@ function normalizeContentType(value: string | null | undefined): string | undefi
|
||||
return trimmed || undefined;
|
||||
}
|
||||
|
||||
export async function fetchFirecrawlContent(params: FetchFirecrawlContentParams): Promise<{
|
||||
text: string;
|
||||
title?: string;
|
||||
finalUrl?: string;
|
||||
status?: number;
|
||||
warning?: string;
|
||||
}> {
|
||||
const config: OpenClawConfig = {
|
||||
tools: {
|
||||
web: {
|
||||
fetch: {
|
||||
provider: "firecrawl",
|
||||
},
|
||||
},
|
||||
},
|
||||
plugins: {
|
||||
entries: {
|
||||
firecrawl: {
|
||||
enabled: true,
|
||||
config: {
|
||||
webFetch: {
|
||||
apiKey: params.apiKey,
|
||||
baseUrl: params.baseUrl,
|
||||
onlyMainContent: params.onlyMainContent,
|
||||
maxAgeMs: params.maxAgeMs,
|
||||
timeoutSeconds: params.timeoutSeconds,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
const resolved = resolveWebFetchDefinition({
|
||||
config,
|
||||
preferRuntimeProviders: false,
|
||||
providerId: "firecrawl",
|
||||
});
|
||||
if (!resolved) {
|
||||
throw new Error("Firecrawl web fetch provider is unavailable.");
|
||||
}
|
||||
|
||||
const payload = await resolved.definition.execute({
|
||||
url: params.url,
|
||||
extractMode: params.extractMode,
|
||||
maxChars: params.maxChars ?? DEFAULT_FETCH_MAX_CHARS,
|
||||
proxy: params.proxy,
|
||||
storeInCache: params.storeInCache,
|
||||
});
|
||||
|
||||
return {
|
||||
text: typeof payload.text === "string" ? payload.text : "",
|
||||
title: typeof payload.title === "string" ? payload.title : undefined,
|
||||
finalUrl: typeof payload.finalUrl === "string" ? payload.finalUrl : undefined,
|
||||
status: typeof payload.status === "number" ? payload.status : undefined,
|
||||
warning: typeof payload.warning === "string" ? payload.warning : undefined,
|
||||
};
|
||||
}
|
||||
|
||||
type WebFetchRuntimeParams = {
|
||||
url: string;
|
||||
extractMode: ExtractMode;
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
export { createWebFetchTool, extractReadableContent, fetchFirecrawlContent } from "./web-fetch.js";
|
||||
export { createWebFetchTool, extractReadableContent } from "./web-fetch.js";
|
||||
export { createWebSearchTool } from "./web-search.js";
|
||||
|
||||
@@ -544,6 +544,51 @@ describe("normalizeCompatibilityConfigValues", () => {
|
||||
]);
|
||||
});
|
||||
|
||||
it("keeps explicit plugin-owned web fetch config while filling missing legacy fields", () => {
|
||||
const res = normalizeCompatibilityConfigValues({
|
||||
tools: {
|
||||
web: {
|
||||
fetch: {
|
||||
provider: "firecrawl",
|
||||
firecrawl: {
|
||||
apiKey: "legacy-firecrawl-key",
|
||||
baseUrl: "https://api.firecrawl.dev",
|
||||
onlyMainContent: false,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
plugins: {
|
||||
entries: {
|
||||
firecrawl: {
|
||||
enabled: true,
|
||||
config: {
|
||||
webFetch: {
|
||||
apiKey: "explicit-firecrawl-key",
|
||||
timeoutSeconds: 30,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
} as OpenClawConfig);
|
||||
|
||||
expect(res.config.plugins?.entries?.firecrawl).toEqual({
|
||||
enabled: true,
|
||||
config: {
|
||||
webFetch: {
|
||||
apiKey: "explicit-firecrawl-key",
|
||||
timeoutSeconds: 30,
|
||||
baseUrl: "https://api.firecrawl.dev",
|
||||
onlyMainContent: false,
|
||||
},
|
||||
},
|
||||
});
|
||||
expect(res.changes).toEqual([
|
||||
"Merged tools.web.fetch.firecrawl → plugins.entries.firecrawl.config.webFetch (filled missing fields from legacy; kept explicit plugin config values).",
|
||||
]);
|
||||
});
|
||||
|
||||
it("migrates legacy talk flat fields to provider/providers", () => {
|
||||
const res = normalizeCompatibilityConfigValues({
|
||||
talk: {
|
||||
|
||||
@@ -136,6 +136,7 @@ export function installWebSearchProviderContractSuite(params: {
|
||||
export function installWebFetchProviderContractSuite(params: {
|
||||
provider: Lazy<WebFetchProviderPlugin>;
|
||||
credentialValue: Lazy<unknown>;
|
||||
pluginId?: string;
|
||||
}) {
|
||||
it("satisfies the base web fetch provider contract", () => {
|
||||
const provider = resolveLazy(params.provider);
|
||||
@@ -152,11 +153,28 @@ export function installWebFetchProviderContractSuite(params: {
|
||||
|
||||
expect(provider.envVars).toEqual([...new Set(provider.envVars)]);
|
||||
expect(provider.envVars.every((entry) => entry.trim().length > 0)).toBe(true);
|
||||
expect(provider.credentialPath.trim()).not.toBe("");
|
||||
if (provider.inactiveSecretPaths) {
|
||||
expect(provider.inactiveSecretPaths).toEqual([...new Set(provider.inactiveSecretPaths)]);
|
||||
// Runtime inactive-path classification uses inactiveSecretPaths as the complete list.
|
||||
expect(provider.inactiveSecretPaths).toContain(provider.credentialPath);
|
||||
}
|
||||
|
||||
const fetchConfigTarget: Record<string, unknown> = {};
|
||||
provider.setCredentialValue(fetchConfigTarget, credentialValue);
|
||||
expect(provider.getCredentialValue(fetchConfigTarget)).toEqual(credentialValue);
|
||||
|
||||
if (provider.setConfiguredCredentialValue && provider.getConfiguredCredentialValue) {
|
||||
const configTarget = {} as OpenClawConfig;
|
||||
provider.setConfiguredCredentialValue(configTarget, credentialValue);
|
||||
expect(provider.getConfiguredCredentialValue(configTarget)).toEqual(credentialValue);
|
||||
}
|
||||
|
||||
if (provider.applySelectionConfig && params.pluginId) {
|
||||
const applied = provider.applySelectionConfig({} as OpenClawConfig);
|
||||
expect(applied.plugins?.entries?.[params.pluginId]?.enabled).toBe(true);
|
||||
}
|
||||
|
||||
const config = {
|
||||
tools: {
|
||||
web: {
|
||||
|
||||
@@ -1421,12 +1421,19 @@ export type WebFetchProviderPlugin = {
|
||||
signupUrl: string;
|
||||
docsUrl?: string;
|
||||
autoDetectOrder?: number;
|
||||
/** Canonical plugin-owned config path for this provider's primary fetch credential. */
|
||||
credentialPath: string;
|
||||
/**
|
||||
* Legacy or inactive credential paths that should warn but not activate this provider.
|
||||
* Include credentialPath here when overriding the list, because runtime classification
|
||||
* treats inactiveSecretPaths as the full inactive surface for this provider.
|
||||
*/
|
||||
inactiveSecretPaths?: string[];
|
||||
getCredentialValue: (fetchConfig?: Record<string, unknown>) => unknown;
|
||||
setCredentialValue: (fetchConfigTarget: Record<string, unknown>, value: unknown) => void;
|
||||
getConfiguredCredentialValue?: (config?: OpenClawConfig) => unknown;
|
||||
setConfiguredCredentialValue?: (configTarget: OpenClawConfig, value: unknown) => void;
|
||||
/** Apply the minimal config needed to select this provider without scattering plugin config writes in core. */
|
||||
applySelectionConfig?: (config: OpenClawConfig) => OpenClawConfig;
|
||||
resolveRuntimeMetadata?: (
|
||||
ctx: WebFetchRuntimeMetadataContext,
|
||||
|
||||
@@ -108,6 +108,9 @@ describe("exec SecretRef id parity", () => {
|
||||
if (id.startsWith("plugins.entries.") && id.includes(".config.webSearch.apiKey")) {
|
||||
return "tools.web.search";
|
||||
}
|
||||
if (id.startsWith("plugins.entries.") && id.includes(".config.webFetch.apiKey")) {
|
||||
return "tools.web.fetch";
|
||||
}
|
||||
if (id.startsWith("tools.web.search.")) {
|
||||
return "tools.web.search";
|
||||
}
|
||||
|
||||
@@ -39,6 +39,7 @@ export function describeWebFetchProviderContracts(pluginId: string) {
|
||||
}
|
||||
return entry.credentialValue;
|
||||
},
|
||||
pluginId,
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
31
test/web-fetch-provider-boundary.test.ts
Normal file
31
test/web-fetch-provider-boundary.test.ts
Normal file
@@ -0,0 +1,31 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import {
|
||||
collectWebFetchProviderBoundaryViolations,
|
||||
main,
|
||||
} from "../scripts/check-web-fetch-provider-boundaries.mjs";
|
||||
import { createCapturedIo } from "./helpers/captured-io.js";
|
||||
|
||||
const violationsPromise = collectWebFetchProviderBoundaryViolations();
|
||||
const jsonOutputPromise = getJsonOutput();
|
||||
|
||||
async function getJsonOutput() {
|
||||
const captured = createCapturedIo();
|
||||
const exitCode = await main(["--json"], captured.io);
|
||||
return {
|
||||
exitCode,
|
||||
stderr: captured.readStderr(),
|
||||
json: JSON.parse(captured.readStdout()),
|
||||
};
|
||||
}
|
||||
|
||||
describe("web fetch provider boundary inventory", () => {
|
||||
it("keeps Firecrawl-specific fetch logic out of core runtime/tooling", async () => {
|
||||
const violations = await violationsPromise;
|
||||
const jsonOutput = await jsonOutputPromise;
|
||||
|
||||
expect(violations).toEqual([]);
|
||||
expect(jsonOutput.exitCode).toBe(0);
|
||||
expect(jsonOutput.stderr).toBe("");
|
||||
expect(jsonOutput.json).toEqual([]);
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user