From 6eca1949d5d1684df67fa3f6be14f99de6e6da84 Mon Sep 17 00:00:00 2001 From: Vincent Koc Date: Thu, 2 Apr 2026 20:53:57 +0900 Subject: [PATCH] refactor(plugins): tighten web fetch provider boundary (#59646) * refactor(plugins): tighten web fetch provider boundary * fix(config): sync fetch secret parity and baseline * fix(ci): enforce web fetch boundary guard --- .github/workflows/ci.yml | 7 + docs/.generated/config-baseline.jsonl | 19 ++- extensions/firecrawl/api.ts | 66 +++++++++ .../firecrawl/src/firecrawl-tools.test.ts | 60 ++++++++ package.json | 1 + .../check-web-fetch-provider-boundaries.mjs | 128 ++++++++++++++++++ scripts/firecrawl-compare.ts | 3 +- src/agents/tools/web-fetch.ts | 72 ---------- src/agents/tools/web-tools.ts | 2 +- .../doctor-legacy-config.migrations.test.ts | 45 ++++++ src/plugins/contracts/suites.ts | 18 +++ src/plugins/types.ts | 7 + src/secrets/exec-secret-ref-id-parity.test.ts | 3 + .../plugins/web-fetch-provider-contract.ts | 1 + test/web-fetch-provider-boundary.test.ts | 31 +++++ 15 files changed, 378 insertions(+), 85 deletions(-) create mode 100644 extensions/firecrawl/api.ts create mode 100644 scripts/check-web-fetch-provider-boundaries.mjs create mode 100644 test/web-fetch-provider-boundary.test.ts diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6c975f84085..758a9445734 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -546,6 +546,11 @@ jobs: continue-on-error: true run: pnpm run lint:web-search-provider-boundaries + - name: Run web fetch provider boundary guard + id: web_fetch_provider_boundary + continue-on-error: true + run: pnpm run lint:web-fetch-provider-boundaries + - name: Run extension src boundary guard id: extension_src_outside_plugin_sdk_boundary continue-on-error: true @@ -593,6 +598,7 @@ jobs: NO_EXTENSION_TEST_CORE_IMPORTS_OUTCOME: ${{ steps.no_extension_test_core_imports.outcome }} PLUGIN_SDK_SUBPATHS_EXPORTED_OUTCOME: ${{ steps.plugin_sdk_subpaths_exported.outcome }} WEB_SEARCH_PROVIDER_BOUNDARY_OUTCOME: ${{ steps.web_search_provider_boundary.outcome }} + WEB_FETCH_PROVIDER_BOUNDARY_OUTCOME: ${{ steps.web_fetch_provider_boundary.outcome }} EXTENSION_SRC_OUTSIDE_PLUGIN_SDK_BOUNDARY_OUTCOME: ${{ steps.extension_src_outside_plugin_sdk_boundary.outcome }} EXTENSION_PLUGIN_SDK_INTERNAL_BOUNDARY_OUTCOME: ${{ steps.extension_plugin_sdk_internal_boundary.outcome }} EXTENSION_RELATIVE_OUTSIDE_PACKAGE_BOUNDARY_OUTCOME: ${{ steps.extension_relative_outside_package_boundary.outcome }} @@ -612,6 +618,7 @@ jobs: "lint:plugins:no-extension-test-core-imports|$NO_EXTENSION_TEST_CORE_IMPORTS_OUTCOME" \ "lint:plugins:plugin-sdk-subpaths-exported|$PLUGIN_SDK_SUBPATHS_EXPORTED_OUTCOME" \ "web-search-provider-boundary|$WEB_SEARCH_PROVIDER_BOUNDARY_OUTCOME" \ + "web-fetch-provider-boundary|$WEB_FETCH_PROVIDER_BOUNDARY_OUTCOME" \ "extension-src-outside-plugin-sdk-boundary|$EXTENSION_SRC_OUTSIDE_PLUGIN_SDK_BOUNDARY_OUTCOME" \ "extension-plugin-sdk-internal-boundary|$EXTENSION_PLUGIN_SDK_INTERNAL_BOUNDARY_OUTCOME" \ "extension-relative-outside-package-boundary|$EXTENSION_RELATIVE_OUTSIDE_PACKAGE_BOUNDARY_OUTCOME" \ diff --git a/docs/.generated/config-baseline.jsonl b/docs/.generated/config-baseline.jsonl index c82a49455b6..458786c165f 100644 --- a/docs/.generated/config-baseline.jsonl +++ b/docs/.generated/config-baseline.jsonl @@ -1,4 +1,4 @@ -{"generatedBy":"scripts/generate-config-doc-baseline.ts","recordType":"meta","totalPaths":5767} +{"generatedBy":"scripts/generate-config-doc-baseline.ts","recordType":"meta","totalPaths":5764} {"recordType":"path","path":"acp","kind":"core","type":"object","required":false,"deprecated":false,"sensitive":false,"tags":["advanced"],"label":"ACP","help":"ACP runtime controls for enabling dispatch, selecting backends, constraining allowed agent targets, and tuning streamed turn projection behavior.","hasChildren":true} {"recordType":"path","path":"acp.allowedAgents","kind":"core","type":"array","required":false,"deprecated":false,"sensitive":false,"tags":["access"],"label":"ACP Allowed Agents","help":"Allowlist of ACP target agent ids permitted for ACP runtime sessions. Empty means no additional allowlist restriction.","hasChildren":true} {"recordType":"path","path":"acp.allowedAgents.*","kind":"core","type":"string","required":false,"deprecated":false,"sensitive":false,"tags":[],"hasChildren":false} @@ -4402,6 +4402,12 @@ {"recordType":"path","path":"plugins.entries.feishu.subagent.allowModelOverride","kind":"plugin","type":"boolean","required":false,"deprecated":false,"sensitive":false,"tags":["access"],"label":"Allow Plugin Subagent Model Override","help":"Explicitly allows this plugin to request provider/model overrides in background subagent runs. Keep false unless the plugin is trusted to steer model selection.","hasChildren":false} {"recordType":"path","path":"plugins.entries.firecrawl","kind":"plugin","type":"object","required":false,"deprecated":false,"sensitive":false,"tags":["advanced"],"label":"@openclaw/firecrawl-plugin","help":"OpenClaw Firecrawl plugin (plugin: firecrawl)","hasChildren":true} {"recordType":"path","path":"plugins.entries.firecrawl.config","kind":"plugin","type":"object","required":false,"deprecated":false,"sensitive":false,"tags":["advanced"],"label":"@openclaw/firecrawl-plugin Config","help":"Plugin-defined config payload for firecrawl.","hasChildren":true} +{"recordType":"path","path":"plugins.entries.firecrawl.config.webFetch","kind":"plugin","type":"object","required":false,"deprecated":false,"sensitive":false,"tags":[],"hasChildren":true} +{"recordType":"path","path":"plugins.entries.firecrawl.config.webFetch.apiKey","kind":"plugin","type":["object","string"],"required":false,"deprecated":false,"sensitive":true,"tags":["auth","security"],"label":"Firecrawl Fetch API Key","help":"Firecrawl API key for web fetch fallback (fallback: FIRECRAWL_API_KEY env var).","hasChildren":false} +{"recordType":"path","path":"plugins.entries.firecrawl.config.webFetch.baseUrl","kind":"plugin","type":"string","required":false,"deprecated":false,"sensitive":false,"tags":["advanced","url-secret"],"label":"Firecrawl Fetch Base URL","help":"Firecrawl Fetch base URL override.","hasChildren":false} +{"recordType":"path","path":"plugins.entries.firecrawl.config.webFetch.maxAgeMs","kind":"plugin","type":"number","required":false,"deprecated":false,"sensitive":false,"tags":[],"hasChildren":false} +{"recordType":"path","path":"plugins.entries.firecrawl.config.webFetch.onlyMainContent","kind":"plugin","type":"boolean","required":false,"deprecated":false,"sensitive":false,"tags":[],"hasChildren":false} +{"recordType":"path","path":"plugins.entries.firecrawl.config.webFetch.timeoutSeconds","kind":"plugin","type":"number","required":false,"deprecated":false,"sensitive":false,"tags":[],"hasChildren":false} {"recordType":"path","path":"plugins.entries.firecrawl.config.webSearch","kind":"plugin","type":"object","required":false,"deprecated":false,"sensitive":false,"tags":[],"hasChildren":true} {"recordType":"path","path":"plugins.entries.firecrawl.config.webSearch.apiKey","kind":"plugin","type":["object","string"],"required":false,"deprecated":false,"sensitive":true,"tags":["auth","security"],"label":"Firecrawl Search API Key","help":"Firecrawl API key for web search (fallback: FIRECRAWL_API_KEY env var).","hasChildren":false} {"recordType":"path","path":"plugins.entries.firecrawl.config.webSearch.baseUrl","kind":"plugin","type":"string","required":false,"deprecated":false,"sensitive":false,"tags":["advanced","url-secret"],"label":"Firecrawl Search Base URL","help":"Firecrawl Search base URL override.","hasChildren":false} @@ -5694,20 +5700,11 @@ {"recordType":"path","path":"tools.web.fetch","kind":"core","type":"object","required":false,"deprecated":false,"sensitive":false,"tags":[],"hasChildren":true} {"recordType":"path","path":"tools.web.fetch.cacheTtlMinutes","kind":"core","type":"number","required":false,"deprecated":false,"sensitive":false,"tags":["performance","storage","tools"],"label":"Web Fetch Cache TTL (min)","help":"Cache TTL in minutes for web_fetch results.","hasChildren":false} {"recordType":"path","path":"tools.web.fetch.enabled","kind":"core","type":"boolean","required":false,"deprecated":false,"sensitive":false,"tags":["tools"],"label":"Enable Web Fetch Tool","help":"Enable the web_fetch tool (lightweight HTTP fetch).","hasChildren":false} -{"recordType":"path","path":"tools.web.fetch.firecrawl","kind":"core","type":"object","required":false,"deprecated":false,"sensitive":false,"tags":[],"hasChildren":true} -{"recordType":"path","path":"tools.web.fetch.firecrawl.apiKey","kind":"core","type":["object","string"],"required":false,"deprecated":false,"sensitive":true,"tags":["auth","security","tools"],"label":"Firecrawl API Key","help":"Firecrawl API key (fallback: FIRECRAWL_API_KEY env var).","hasChildren":true} -{"recordType":"path","path":"tools.web.fetch.firecrawl.apiKey.id","kind":"core","type":"string","required":true,"deprecated":false,"sensitive":false,"tags":[],"hasChildren":false} -{"recordType":"path","path":"tools.web.fetch.firecrawl.apiKey.provider","kind":"core","type":"string","required":true,"deprecated":false,"sensitive":false,"tags":[],"hasChildren":false} -{"recordType":"path","path":"tools.web.fetch.firecrawl.apiKey.source","kind":"core","type":"string","required":true,"deprecated":false,"sensitive":false,"tags":[],"hasChildren":false} -{"recordType":"path","path":"tools.web.fetch.firecrawl.baseUrl","kind":"core","type":"string","required":false,"deprecated":false,"sensitive":false,"tags":["tools","url-secret"],"label":"Firecrawl Base URL","help":"Firecrawl base URL (e.g. https://api.firecrawl.dev or custom endpoint).","hasChildren":false} -{"recordType":"path","path":"tools.web.fetch.firecrawl.enabled","kind":"core","type":"boolean","required":false,"deprecated":false,"sensitive":false,"tags":["tools"],"label":"Enable Firecrawl Fallback","help":"Enable Firecrawl fallback for web_fetch (if configured).","hasChildren":false} -{"recordType":"path","path":"tools.web.fetch.firecrawl.maxAgeMs","kind":"core","type":"integer","required":false,"deprecated":false,"sensitive":false,"tags":["performance","tools"],"label":"Firecrawl Cache Max Age (ms)","help":"Firecrawl maxAge (ms) for cached results when supported by the API.","hasChildren":false} -{"recordType":"path","path":"tools.web.fetch.firecrawl.onlyMainContent","kind":"core","type":"boolean","required":false,"deprecated":false,"sensitive":false,"tags":["tools"],"label":"Firecrawl Main Content Only","help":"When true, Firecrawl returns only the main content (default: true).","hasChildren":false} -{"recordType":"path","path":"tools.web.fetch.firecrawl.timeoutSeconds","kind":"core","type":"integer","required":false,"deprecated":false,"sensitive":false,"tags":["performance","tools"],"label":"Firecrawl Timeout (sec)","help":"Timeout in seconds for Firecrawl requests.","hasChildren":false} {"recordType":"path","path":"tools.web.fetch.maxChars","kind":"core","type":"integer","required":false,"deprecated":false,"sensitive":false,"tags":["performance","tools"],"label":"Web Fetch Max Chars","help":"Max characters returned by web_fetch (truncated).","hasChildren":false} {"recordType":"path","path":"tools.web.fetch.maxCharsCap","kind":"core","type":"integer","required":false,"deprecated":false,"sensitive":false,"tags":["performance","tools"],"label":"Web Fetch Hard Max Chars","help":"Hard cap for web_fetch maxChars (applies to config and tool calls).","hasChildren":false} {"recordType":"path","path":"tools.web.fetch.maxRedirects","kind":"core","type":"integer","required":false,"deprecated":false,"sensitive":false,"tags":["performance","storage","tools"],"label":"Web Fetch Max Redirects","help":"Maximum redirects allowed for web_fetch (default: 3).","hasChildren":false} {"recordType":"path","path":"tools.web.fetch.maxResponseBytes","kind":"core","type":"integer","required":false,"deprecated":false,"sensitive":false,"tags":["performance","tools"],"label":"Web Fetch Max Download Size (bytes)","help":"Max download size before truncation.","hasChildren":false} +{"recordType":"path","path":"tools.web.fetch.provider","kind":"core","type":"string","required":false,"deprecated":false,"sensitive":false,"tags":["tools"],"label":"Web Fetch Provider","help":"Web fetch fallback provider id.","hasChildren":false} {"recordType":"path","path":"tools.web.fetch.readability","kind":"core","type":"boolean","required":false,"deprecated":false,"sensitive":false,"tags":["tools"],"label":"Web Fetch Readability Extraction","help":"Use Readability to extract main content from HTML (fallbacks to basic HTML cleanup).","hasChildren":false} {"recordType":"path","path":"tools.web.fetch.timeoutSeconds","kind":"core","type":"integer","required":false,"deprecated":false,"sensitive":false,"tags":["performance","tools"],"label":"Web Fetch Timeout (sec)","help":"Timeout in seconds for web_fetch requests.","hasChildren":false} {"recordType":"path","path":"tools.web.fetch.userAgent","kind":"core","type":"string","required":false,"deprecated":false,"sensitive":false,"tags":["tools"],"label":"Web Fetch User-Agent","help":"Override User-Agent header for web_fetch requests.","hasChildren":false} diff --git a/extensions/firecrawl/api.ts b/extensions/firecrawl/api.ts new file mode 100644 index 00000000000..e77b2ea0c9d --- /dev/null +++ b/extensions/firecrawl/api.ts @@ -0,0 +1,66 @@ +import type { OpenClawConfig } from "openclaw/plugin-sdk/config-runtime"; +import { runFirecrawlScrape } from "./src/firecrawl-client.js"; + +export type FetchFirecrawlContentParams = { + url: string; + extractMode: "markdown" | "text"; + apiKey: string; + baseUrl: string; + onlyMainContent: boolean; + maxAgeMs: number; + proxy: "auto" | "basic" | "stealth"; + storeInCache: boolean; + timeoutSeconds: number; + maxChars?: number; +}; + +export type FetchFirecrawlContentResult = { + text: string; + title?: string; + finalUrl?: string; + status?: number; + warning?: string; +}; + +export async function fetchFirecrawlContent( + params: FetchFirecrawlContentParams, +): Promise { + const cfg: OpenClawConfig = { + plugins: { + entries: { + firecrawl: { + enabled: true, + config: { + webFetch: { + apiKey: params.apiKey, + baseUrl: params.baseUrl, + onlyMainContent: params.onlyMainContent, + maxAgeMs: params.maxAgeMs, + timeoutSeconds: params.timeoutSeconds, + }, + }, + }, + }, + }, + }; + + const result = await runFirecrawlScrape({ + cfg, + url: params.url, + extractMode: params.extractMode, + maxChars: params.maxChars, + proxy: params.proxy, + storeInCache: params.storeInCache, + onlyMainContent: params.onlyMainContent, + maxAgeMs: params.maxAgeMs, + timeoutSeconds: params.timeoutSeconds, + }); + + return { + text: typeof result.text === "string" ? result.text : "", + title: typeof result.title === "string" ? result.title : undefined, + finalUrl: typeof result.finalUrl === "string" ? result.finalUrl : undefined, + status: typeof result.status === "number" ? result.status : undefined, + warning: typeof result.warning === "string" ? result.warning : undefined, + }; +} diff --git a/extensions/firecrawl/src/firecrawl-tools.test.ts b/extensions/firecrawl/src/firecrawl-tools.test.ts index 4a8c4126047..865509f4bf4 100644 --- a/extensions/firecrawl/src/firecrawl-tools.test.ts +++ b/extensions/firecrawl/src/firecrawl-tools.test.ts @@ -29,13 +29,17 @@ vi.mock("./firecrawl-client.js", () => ({ describe("firecrawl tools", () => { const priorFetch = global.fetch; + let fetchFirecrawlContent: typeof import("../api.js").fetchFirecrawlContent; let createFirecrawlWebSearchProvider: typeof import("./firecrawl-search-provider.js").createFirecrawlWebSearchProvider; + let createFirecrawlWebFetchProvider: typeof import("./firecrawl-fetch-provider.js").createFirecrawlWebFetchProvider; let createFirecrawlSearchTool: typeof import("./firecrawl-search-tool.js").createFirecrawlSearchTool; let createFirecrawlScrapeTool: typeof import("./firecrawl-scrape-tool.js").createFirecrawlScrapeTool; let firecrawlClientTesting: typeof import("./firecrawl-client.js").__testing; beforeAll(async () => { vi.resetModules(); + ({ fetchFirecrawlContent } = await import("../api.js")); + ({ createFirecrawlWebFetchProvider } = await import("./firecrawl-fetch-provider.js")); ({ createFirecrawlWebSearchProvider } = await import("./firecrawl-search-provider.js")); ({ createFirecrawlSearchTool } = await import("./firecrawl-search-tool.js")); ({ createFirecrawlScrapeTool } = await import("./firecrawl-scrape-tool.js")); @@ -199,6 +203,62 @@ describe("firecrawl tools", () => { }); }); + it("keeps the compare-helper fetch facade owned by the Firecrawl extension", async () => { + await fetchFirecrawlContent({ + url: "https://docs.openclaw.ai", + extractMode: "markdown", + apiKey: "firecrawl-key", + baseUrl: "https://api.firecrawl.dev", + onlyMainContent: false, + maxAgeMs: 5000, + proxy: "stealth", + storeInCache: false, + timeoutSeconds: 22, + maxChars: 1500, + }); + + expect(runFirecrawlScrape).toHaveBeenCalledWith({ + cfg: { + plugins: { + entries: { + firecrawl: { + enabled: true, + config: { + webFetch: { + apiKey: "firecrawl-key", + baseUrl: "https://api.firecrawl.dev", + onlyMainContent: false, + maxAgeMs: 5000, + timeoutSeconds: 22, + }, + }, + }, + }, + }, + }, + url: "https://docs.openclaw.ai", + extractMode: "markdown", + maxChars: 1500, + proxy: "stealth", + storeInCache: false, + onlyMainContent: false, + maxAgeMs: 5000, + timeoutSeconds: 22, + }); + }); + + it("applies minimal provider-selection config for fetch providers", () => { + const provider = createFirecrawlWebFetchProvider(); + if (!provider.applySelectionConfig) { + throw new Error("Expected applySelectionConfig to be defined"); + } + const applied = provider.applySelectionConfig({}); + + expect(provider.id).toBe("firecrawl"); + expect(provider.credentialPath).toBe("plugins.entries.firecrawl.config.webFetch.apiKey"); + expect(applied.plugins?.entries?.firecrawl?.enabled).toBe(true); + }); + it("passes proxy and storeInCache through the fetch provider tool", async () => { const { createFirecrawlWebFetchProvider } = await import("./firecrawl-fetch-provider.js"); const provider = createFirecrawlWebFetchProvider(); diff --git a/package.json b/package.json index c1a7492b6b9..120cb3df185 100644 --- a/package.json +++ b/package.json @@ -1112,6 +1112,7 @@ "lint:tmp:no-random-messaging": "node scripts/check-no-random-messaging-tmp.mjs", "lint:tmp:no-raw-channel-fetch": "node scripts/check-no-raw-channel-fetch.mjs", "lint:ui:no-raw-window-open": "node scripts/check-no-raw-window-open.mjs", + "lint:web-fetch-provider-boundaries": "node scripts/check-web-fetch-provider-boundaries.mjs", "lint:web-search-provider-boundaries": "node scripts/check-web-search-provider-boundaries.mjs", "lint:webhook:no-low-level-body-read": "node scripts/check-webhook-auth-body-order.mjs", "mac:open": "open dist/OpenClaw.app", diff --git a/scripts/check-web-fetch-provider-boundaries.mjs b/scripts/check-web-fetch-provider-boundaries.mjs new file mode 100644 index 00000000000..2d39159f4c8 --- /dev/null +++ b/scripts/check-web-fetch-provider-boundaries.mjs @@ -0,0 +1,128 @@ +#!/usr/bin/env node + +import { promises as fs } from "node:fs"; +import path from "node:path"; +import { fileURLToPath } from "node:url"; +import { runAsScript } from "./lib/ts-guard-utils.mjs"; + +const repoRoot = path.resolve(path.dirname(fileURLToPath(import.meta.url)), ".."); +const scanExtensions = new Set([".ts", ".js", ".mjs", ".cjs"]); +const ignoredDirNames = new Set([ + ".artifacts", + ".git", + ".turbo", + "build", + "coverage", + "dist", + "extensions", + "node_modules", +]); +const allowedFiles = new Set([ + "src/agents/tools/web-fetch.test-harness.ts", + "src/config/legacy-web-fetch.ts", + "src/config/zod-schema.agent-runtime.ts", + "src/plugins/bundled-provider-auth-env-vars.generated.ts", + "src/secrets/target-registry-data.ts", +]); +const suspiciousPatterns = [ + /fetchFirecrawlContent/, + /firecrawl-fetch-provider\.js/, + /createFirecrawlWebFetchProvider/, + /providerId:\s*"firecrawl"/, + /provider:\s*"firecrawl"/, + /id:\s*"firecrawl"/, +]; + +async function walkFiles(rootDir) { + const out = []; + let entries = []; + try { + entries = await fs.readdir(rootDir, { withFileTypes: true }); + } catch (error) { + if (error && typeof error === "object" && "code" in error && error.code === "ENOENT") { + return out; + } + throw error; + } + for (const entry of entries) { + const entryPath = path.join(rootDir, entry.name); + if (entry.isDirectory()) { + if (!ignoredDirNames.has(entry.name)) { + out.push(...(await walkFiles(entryPath))); + } + continue; + } + if (entry.isFile() && scanExtensions.has(path.extname(entry.name))) { + out.push(entryPath); + } + } + return out; +} + +function normalizeRepoPath(filePath) { + return path.relative(repoRoot, filePath).split(path.sep).join("/"); +} + +export async function collectWebFetchProviderBoundaryViolations() { + const files = await walkFiles(path.join(repoRoot, "src")); + const violations = []; + for (const filePath of files) { + const relativeFile = normalizeRepoPath(filePath); + if (allowedFiles.has(relativeFile) || relativeFile.includes(".test.")) { + continue; + } + const content = await fs.readFile(filePath, "utf8"); + const lines = content.split(/\r?\n/); + for (const [index, line] of lines.entries()) { + if (!line.includes("firecrawl") && !line.includes("Firecrawl")) { + continue; + } + if (!suspiciousPatterns.some((pattern) => pattern.test(line))) { + continue; + } + violations.push({ + file: relativeFile, + line: index + 1, + reason: "core web-fetch runtime/tooling contains Firecrawl-specific fetch logic", + }); + } + } + return violations.toSorted( + (left, right) => left.file.localeCompare(right.file) || left.line - right.line, + ); +} + +export async function main(argv = process.argv.slice(2), io) { + const json = argv.includes("--json"); + const violations = await collectWebFetchProviderBoundaryViolations(); + const writeStdout = (chunk) => { + if (io?.stdout?.write) { + io.stdout.write(chunk); + return; + } + process.stdout.write(chunk); + }; + const writeStderr = (chunk) => { + if (io?.stderr?.write) { + io.stderr.write(chunk); + return; + } + process.stderr.write(chunk); + }; + if (json) { + writeStdout(`${JSON.stringify(violations, null, 2)}\n`); + } else if (violations.length > 0) { + for (const violation of violations) { + writeStderr(`${violation.file}:${violation.line} ${violation.reason}\n`); + } + } + return violations.length === 0 ? 0 : 1; +} + +runAsScript(import.meta.url, async (argv, io) => { + const exitCode = await main(argv, io); + if (!io && exitCode !== 0) { + process.exit(exitCode); + } + return exitCode; +}); diff --git a/scripts/firecrawl-compare.ts b/scripts/firecrawl-compare.ts index 58a8d96995a..345a66e1670 100644 --- a/scripts/firecrawl-compare.ts +++ b/scripts/firecrawl-compare.ts @@ -1,4 +1,5 @@ -import { extractReadableContent, fetchFirecrawlContent } from "../src/agents/tools/web-tools.js"; +import { fetchFirecrawlContent } from "../extensions/firecrawl/api.ts"; +import { extractReadableContent } from "../src/agents/tools/web-tools.js"; const DEFAULT_URLS = [ "https://en.wikipedia.org/wiki/Web_scraping", diff --git a/src/agents/tools/web-fetch.ts b/src/agents/tools/web-fetch.ts index cf04469a3c3..595ec750abe 100644 --- a/src/agents/tools/web-fetch.ts +++ b/src/agents/tools/web-fetch.ts @@ -67,19 +67,6 @@ type WebFetchConfig = NonNullable["web"] extends infer : undefined : undefined; -export type FetchFirecrawlContentParams = { - url: string; - extractMode: ExtractMode; - apiKey: string; - baseUrl: string; - onlyMainContent: boolean; - maxAgeMs: number; - proxy: "auto" | "basic" | "stealth"; - storeInCache: boolean; - timeoutSeconds: number; - maxChars?: number; -}; - function resolveFetchConfig(cfg?: OpenClawConfig): WebFetchConfig { const fetch = cfg?.tools?.web?.fetch; if (!fetch || typeof fetch !== "object") { @@ -247,65 +234,6 @@ function normalizeContentType(value: string | null | undefined): string | undefi return trimmed || undefined; } -export async function fetchFirecrawlContent(params: FetchFirecrawlContentParams): Promise<{ - text: string; - title?: string; - finalUrl?: string; - status?: number; - warning?: string; -}> { - const config: OpenClawConfig = { - tools: { - web: { - fetch: { - provider: "firecrawl", - }, - }, - }, - plugins: { - entries: { - firecrawl: { - enabled: true, - config: { - webFetch: { - apiKey: params.apiKey, - baseUrl: params.baseUrl, - onlyMainContent: params.onlyMainContent, - maxAgeMs: params.maxAgeMs, - timeoutSeconds: params.timeoutSeconds, - }, - }, - }, - }, - }, - }; - - const resolved = resolveWebFetchDefinition({ - config, - preferRuntimeProviders: false, - providerId: "firecrawl", - }); - if (!resolved) { - throw new Error("Firecrawl web fetch provider is unavailable."); - } - - const payload = await resolved.definition.execute({ - url: params.url, - extractMode: params.extractMode, - maxChars: params.maxChars ?? DEFAULT_FETCH_MAX_CHARS, - proxy: params.proxy, - storeInCache: params.storeInCache, - }); - - return { - text: typeof payload.text === "string" ? payload.text : "", - title: typeof payload.title === "string" ? payload.title : undefined, - finalUrl: typeof payload.finalUrl === "string" ? payload.finalUrl : undefined, - status: typeof payload.status === "number" ? payload.status : undefined, - warning: typeof payload.warning === "string" ? payload.warning : undefined, - }; -} - type WebFetchRuntimeParams = { url: string; extractMode: ExtractMode; diff --git a/src/agents/tools/web-tools.ts b/src/agents/tools/web-tools.ts index 3acaa4c763f..f509afb8dd1 100644 --- a/src/agents/tools/web-tools.ts +++ b/src/agents/tools/web-tools.ts @@ -1,2 +1,2 @@ -export { createWebFetchTool, extractReadableContent, fetchFirecrawlContent } from "./web-fetch.js"; +export { createWebFetchTool, extractReadableContent } from "./web-fetch.js"; export { createWebSearchTool } from "./web-search.js"; diff --git a/src/commands/doctor-legacy-config.migrations.test.ts b/src/commands/doctor-legacy-config.migrations.test.ts index 73bf913ccd1..b1b4cc7e25c 100644 --- a/src/commands/doctor-legacy-config.migrations.test.ts +++ b/src/commands/doctor-legacy-config.migrations.test.ts @@ -544,6 +544,51 @@ describe("normalizeCompatibilityConfigValues", () => { ]); }); + it("keeps explicit plugin-owned web fetch config while filling missing legacy fields", () => { + const res = normalizeCompatibilityConfigValues({ + tools: { + web: { + fetch: { + provider: "firecrawl", + firecrawl: { + apiKey: "legacy-firecrawl-key", + baseUrl: "https://api.firecrawl.dev", + onlyMainContent: false, + }, + }, + }, + }, + plugins: { + entries: { + firecrawl: { + enabled: true, + config: { + webFetch: { + apiKey: "explicit-firecrawl-key", + timeoutSeconds: 30, + }, + }, + }, + }, + }, + } as OpenClawConfig); + + expect(res.config.plugins?.entries?.firecrawl).toEqual({ + enabled: true, + config: { + webFetch: { + apiKey: "explicit-firecrawl-key", + timeoutSeconds: 30, + baseUrl: "https://api.firecrawl.dev", + onlyMainContent: false, + }, + }, + }); + expect(res.changes).toEqual([ + "Merged tools.web.fetch.firecrawl → plugins.entries.firecrawl.config.webFetch (filled missing fields from legacy; kept explicit plugin config values).", + ]); + }); + it("migrates legacy talk flat fields to provider/providers", () => { const res = normalizeCompatibilityConfigValues({ talk: { diff --git a/src/plugins/contracts/suites.ts b/src/plugins/contracts/suites.ts index e2daa2be9f9..5d46d9caab5 100644 --- a/src/plugins/contracts/suites.ts +++ b/src/plugins/contracts/suites.ts @@ -136,6 +136,7 @@ export function installWebSearchProviderContractSuite(params: { export function installWebFetchProviderContractSuite(params: { provider: Lazy; credentialValue: Lazy; + pluginId?: string; }) { it("satisfies the base web fetch provider contract", () => { const provider = resolveLazy(params.provider); @@ -152,11 +153,28 @@ export function installWebFetchProviderContractSuite(params: { expect(provider.envVars).toEqual([...new Set(provider.envVars)]); expect(provider.envVars.every((entry) => entry.trim().length > 0)).toBe(true); + expect(provider.credentialPath.trim()).not.toBe(""); + if (provider.inactiveSecretPaths) { + expect(provider.inactiveSecretPaths).toEqual([...new Set(provider.inactiveSecretPaths)]); + // Runtime inactive-path classification uses inactiveSecretPaths as the complete list. + expect(provider.inactiveSecretPaths).toContain(provider.credentialPath); + } const fetchConfigTarget: Record = {}; provider.setCredentialValue(fetchConfigTarget, credentialValue); expect(provider.getCredentialValue(fetchConfigTarget)).toEqual(credentialValue); + if (provider.setConfiguredCredentialValue && provider.getConfiguredCredentialValue) { + const configTarget = {} as OpenClawConfig; + provider.setConfiguredCredentialValue(configTarget, credentialValue); + expect(provider.getConfiguredCredentialValue(configTarget)).toEqual(credentialValue); + } + + if (provider.applySelectionConfig && params.pluginId) { + const applied = provider.applySelectionConfig({} as OpenClawConfig); + expect(applied.plugins?.entries?.[params.pluginId]?.enabled).toBe(true); + } + const config = { tools: { web: { diff --git a/src/plugins/types.ts b/src/plugins/types.ts index dacdf7f68bc..ab2562206ed 100644 --- a/src/plugins/types.ts +++ b/src/plugins/types.ts @@ -1421,12 +1421,19 @@ export type WebFetchProviderPlugin = { signupUrl: string; docsUrl?: string; autoDetectOrder?: number; + /** Canonical plugin-owned config path for this provider's primary fetch credential. */ credentialPath: string; + /** + * Legacy or inactive credential paths that should warn but not activate this provider. + * Include credentialPath here when overriding the list, because runtime classification + * treats inactiveSecretPaths as the full inactive surface for this provider. + */ inactiveSecretPaths?: string[]; getCredentialValue: (fetchConfig?: Record) => unknown; setCredentialValue: (fetchConfigTarget: Record, value: unknown) => void; getConfiguredCredentialValue?: (config?: OpenClawConfig) => unknown; setConfiguredCredentialValue?: (configTarget: OpenClawConfig, value: unknown) => void; + /** Apply the minimal config needed to select this provider without scattering plugin config writes in core. */ applySelectionConfig?: (config: OpenClawConfig) => OpenClawConfig; resolveRuntimeMetadata?: ( ctx: WebFetchRuntimeMetadataContext, diff --git a/src/secrets/exec-secret-ref-id-parity.test.ts b/src/secrets/exec-secret-ref-id-parity.test.ts index 13d96423f05..24f88078634 100644 --- a/src/secrets/exec-secret-ref-id-parity.test.ts +++ b/src/secrets/exec-secret-ref-id-parity.test.ts @@ -108,6 +108,9 @@ describe("exec SecretRef id parity", () => { if (id.startsWith("plugins.entries.") && id.includes(".config.webSearch.apiKey")) { return "tools.web.search"; } + if (id.startsWith("plugins.entries.") && id.includes(".config.webFetch.apiKey")) { + return "tools.web.fetch"; + } if (id.startsWith("tools.web.search.")) { return "tools.web.search"; } diff --git a/test/helpers/plugins/web-fetch-provider-contract.ts b/test/helpers/plugins/web-fetch-provider-contract.ts index 4c6b0526e14..311e999711b 100644 --- a/test/helpers/plugins/web-fetch-provider-contract.ts +++ b/test/helpers/plugins/web-fetch-provider-contract.ts @@ -39,6 +39,7 @@ export function describeWebFetchProviderContracts(pluginId: string) { } return entry.credentialValue; }, + pluginId, }); }); } diff --git a/test/web-fetch-provider-boundary.test.ts b/test/web-fetch-provider-boundary.test.ts new file mode 100644 index 00000000000..295b797d6b8 --- /dev/null +++ b/test/web-fetch-provider-boundary.test.ts @@ -0,0 +1,31 @@ +import { describe, expect, it } from "vitest"; +import { + collectWebFetchProviderBoundaryViolations, + main, +} from "../scripts/check-web-fetch-provider-boundaries.mjs"; +import { createCapturedIo } from "./helpers/captured-io.js"; + +const violationsPromise = collectWebFetchProviderBoundaryViolations(); +const jsonOutputPromise = getJsonOutput(); + +async function getJsonOutput() { + const captured = createCapturedIo(); + const exitCode = await main(["--json"], captured.io); + return { + exitCode, + stderr: captured.readStderr(), + json: JSON.parse(captured.readStdout()), + }; +} + +describe("web fetch provider boundary inventory", () => { + it("keeps Firecrawl-specific fetch logic out of core runtime/tooling", async () => { + const violations = await violationsPromise; + const jsonOutput = await jsonOutputPromise; + + expect(violations).toEqual([]); + expect(jsonOutput.exitCode).toBe(0); + expect(jsonOutput.stderr).toBe(""); + expect(jsonOutput.json).toEqual([]); + }); +});