!feat(plugins): add web fetch provider boundary (#59465)

* feat(plugins): add web fetch provider boundary

* feat(plugins): add web fetch provider modules

* refactor(web-fetch): remove remaining core firecrawl fetch config

* fix(web-fetch): address review follow-ups

* fix(web-fetch): harden provider runtime boundaries

* fix(web-fetch): restore firecrawl compare helper

* fix(web-fetch): restore env-based provider autodetect

* fix(web-fetch): tighten provider hardening

* fix(web-fetch): restore fetch autodetect and compat args

* chore(changelog): note firecrawl fetch config break
This commit is contained in:
Vincent Koc
2026-04-02 20:25:19 +09:00
committed by GitHub
parent 82d5e6a2f7
commit 38d2faee20
72 changed files with 3425 additions and 1119 deletions

View File

@@ -38,6 +38,7 @@ const pluginRegistrationContractTests: PluginRegistrationContractParams[] = [
},
{
pluginId: "firecrawl",
webFetchProviderIds: ["firecrawl"],
webSearchProviderIds: ["firecrawl"],
toolNames: ["firecrawl_search", "firecrawl_scrape"],
},

View File

@@ -1,4 +1,5 @@
import { describe, expect, it } from "vitest";
import { resolveBundledWebFetchPluginIds } from "../bundled-web-fetch.js";
import { resolveBundledWebSearchPluginIds } from "../bundled-web-search.js";
import { loadPluginManifestRegistry } from "../manifest-registry.js";
import {
@@ -7,8 +8,10 @@ import {
pluginRegistrationContractRegistry,
providerContractLoadError,
providerContractPluginIds,
resolveWebFetchProviderContractEntriesForPluginId,
resolveWebSearchProviderContractEntriesForPluginId,
speechProviderContractRegistry,
webFetchProviderContractRegistry,
} from "./registry.js";
import { uniqueSortedStrings } from "./testkit.js";
@@ -55,6 +58,10 @@ describe("plugin contract registry", () => {
name: "does not duplicate bundled provider ids",
ids: () => pluginRegistrationContractRegistry.flatMap((entry) => entry.providerIds),
},
{
name: "does not duplicate bundled web fetch provider ids",
ids: () => pluginRegistrationContractRegistry.flatMap((entry) => entry.webFetchProviderIds),
},
{
name: "does not duplicate bundled web search provider ids",
ids: () => pluginRegistrationContractRegistry.flatMap((entry) => entry.webSearchProviderIds),
@@ -94,6 +101,31 @@ describe("plugin contract registry", () => {
});
});
it("covers every bundled web fetch plugin from the shared resolver", () => {
const bundledWebFetchPluginIds = resolveBundledWebFetchPluginIds({});
expect(
uniqueSortedStrings(
pluginRegistrationContractRegistry
.filter((entry) => entry.webFetchProviderIds.length > 0)
.map((entry) => entry.pluginId),
),
).toEqual(bundledWebFetchPluginIds);
});
it(
"loads bundled web fetch providers for each shared-resolver plugin",
{ timeout: REGISTRY_CONTRACT_TIMEOUT_MS },
() => {
for (const pluginId of resolveBundledWebFetchPluginIds({})) {
expect(resolveWebFetchProviderContractEntriesForPluginId(pluginId).length).toBeGreaterThan(
0,
);
}
expect(webFetchProviderContractRegistry.length).toBeGreaterThan(0);
},
);
it("covers every bundled web search plugin from the shared resolver", () => {
const bundledWebSearchPluginIds = resolveBundledWebSearchPluginIds({});

View File

@@ -1,11 +1,12 @@
import { afterEach, describe, expect, it, vi } from "vitest";
import type { ProviderPlugin, WebSearchProviderPlugin } from "../types.js";
import type { ProviderPlugin, WebFetchProviderPlugin, WebSearchProviderPlugin } from "../types.js";
type MockPluginRecord = {
id: string;
status: "loaded" | "error";
error?: string;
providerIds: string[];
webFetchProviderIds: string[];
webSearchProviderIds: string[];
};
@@ -13,12 +14,14 @@ type MockRuntimeRegistry = {
plugins: MockPluginRecord[];
diagnostics: Array<{ pluginId?: string; message: string }>;
providers: Array<{ pluginId: string; provider: ProviderPlugin }>;
webFetchProviders: Array<{ pluginId: string; provider: WebFetchProviderPlugin }>;
webSearchProviders: Array<{ pluginId: string; provider: WebSearchProviderPlugin }>;
};
function createMockRuntimeRegistry(params: {
plugin: MockPluginRecord;
providers?: Array<{ pluginId: string; provider: ProviderPlugin }>;
webFetchProviders?: Array<{ pluginId: string; provider: WebFetchProviderPlugin }>;
webSearchProviders?: Array<{ pluginId: string; provider: WebSearchProviderPlugin }>;
diagnostics?: Array<{ pluginId?: string; message: string }>;
}): MockRuntimeRegistry {
@@ -26,6 +29,7 @@ function createMockRuntimeRegistry(params: {
plugins: [params.plugin],
diagnostics: params.diagnostics ?? [],
providers: params.providers ?? [],
webFetchProviders: params.webFetchProviders ?? [],
webSearchProviders: params.webSearchProviders ?? [],
};
}
@@ -46,6 +50,7 @@ describe("plugin contract registry scoped retries", () => {
status: "error",
error: "transient xai load failure",
providerIds: [],
webFetchProviderIds: [],
webSearchProviderIds: [],
},
diagnostics: [{ pluginId: "xai", message: "transient xai load failure" }],
@@ -57,6 +62,7 @@ describe("plugin contract registry scoped retries", () => {
id: "xai",
status: "loaded",
providerIds: ["xai"],
webFetchProviderIds: [],
webSearchProviderIds: ["grok"],
},
providers: [
@@ -95,6 +101,7 @@ describe("plugin contract registry scoped retries", () => {
status: "error",
error: "transient grok load failure",
providerIds: [],
webFetchProviderIds: [],
webSearchProviderIds: [],
},
diagnostics: [{ pluginId: "xai", message: "transient grok load failure" }],
@@ -106,6 +113,7 @@ describe("plugin contract registry scoped retries", () => {
id: "xai",
status: "loaded",
providerIds: ["xai"],
webFetchProviderIds: [],
webSearchProviderIds: ["grok"],
},
webSearchProviders: [
@@ -152,6 +160,7 @@ describe("plugin contract registry scoped retries", () => {
id: "byteplus",
status: "loaded",
providerIds: ["byteplus"],
webFetchProviderIds: [],
webSearchProviderIds: [],
},
providers: [
@@ -177,4 +186,70 @@ describe("plugin contract registry scoped retries", () => {
expect(requireProviderContractProvider("byteplus-plan").id).toBe("byteplus");
expect(loadBundledCapabilityRuntimeRegistry).toHaveBeenCalledTimes(1);
});
it("retries web fetch provider loads after a transient plugin-scoped runtime error", async () => {
const loadBundledCapabilityRuntimeRegistry = vi
.fn()
.mockReturnValueOnce(
createMockRuntimeRegistry({
plugin: {
id: "firecrawl",
status: "error",
error: "transient firecrawl fetch load failure",
providerIds: [],
webFetchProviderIds: [],
webSearchProviderIds: [],
},
diagnostics: [
{ pluginId: "firecrawl", message: "transient firecrawl fetch load failure" },
],
}),
)
.mockReturnValueOnce(
createMockRuntimeRegistry({
plugin: {
id: "firecrawl",
status: "loaded",
providerIds: [],
webFetchProviderIds: ["firecrawl"],
webSearchProviderIds: ["firecrawl"],
},
webFetchProviders: [
{
pluginId: "firecrawl",
provider: {
id: "firecrawl",
label: "Firecrawl",
hint: "Fetch with Firecrawl",
envVars: ["FIRECRAWL_API_KEY"],
placeholder: "fc-...",
signupUrl: "https://firecrawl.dev",
credentialPath: "plugins.entries.firecrawl.config.webFetch.apiKey",
requiresCredential: true,
getCredentialValue: () => undefined,
setCredentialValue() {},
createTool: () => ({
description: "fetch",
parameters: {},
execute: async () => ({}),
}),
} as WebFetchProviderPlugin,
},
],
}),
);
vi.doMock("../bundled-capability-runtime.js", () => ({
loadBundledCapabilityRuntimeRegistry,
}));
const { resolveWebFetchProviderContractEntriesForPluginId } = await import("./registry.js");
expect(
resolveWebFetchProviderContractEntriesForPluginId("firecrawl").map(
(entry) => entry.provider.id,
),
).toEqual(["firecrawl"]);
expect(loadBundledCapabilityRuntimeRegistry).toHaveBeenCalledTimes(2);
});
});

View File

@@ -4,6 +4,7 @@ import {
BUNDLED_PLUGIN_CONTRACT_SNAPSHOTS,
BUNDLED_PROVIDER_PLUGIN_IDS,
BUNDLED_SPEECH_PLUGIN_IDS,
BUNDLED_WEB_FETCH_PLUGIN_IDS,
BUNDLED_WEB_SEARCH_PLUGIN_IDS,
} from "../bundled-capability-metadata.js";
import { loadBundledCapabilityRuntimeRegistry } from "../bundled-capability-runtime.js";
@@ -12,6 +13,7 @@ import type {
MediaUnderstandingProviderPlugin,
ProviderPlugin,
SpeechProviderPlugin,
WebFetchProviderPlugin,
WebSearchProviderPlugin,
} from "../types.js";
import {
@@ -31,6 +33,9 @@ type ProviderContractEntry = CapabilityContractEntry<ProviderPlugin>;
type WebSearchProviderContractEntry = CapabilityContractEntry<WebSearchProviderPlugin> & {
credentialValue: unknown;
};
type WebFetchProviderContractEntry = CapabilityContractEntry<WebFetchProviderPlugin> & {
credentialValue: unknown;
};
type SpeechProviderContractEntry = CapabilityContractEntry<SpeechProviderPlugin>;
type MediaUnderstandingProviderContractEntry =
@@ -44,6 +49,7 @@ type PluginRegistrationContractEntry = {
speechProviderIds: string[];
mediaUnderstandingProviderIds: string[];
imageGenerationProviderIds: string[];
webFetchProviderIds: string[];
webSearchProviderIds: string[];
toolNames: string[];
};
@@ -77,6 +83,11 @@ function uniqueStrings(values: readonly string[]): string[] {
let providerContractRegistryCache: ProviderContractEntry[] | null = null;
let providerContractRegistryByPluginIdCache: Map<string, ProviderContractEntry[]> | null = null;
let webFetchProviderContractRegistryCache: WebFetchProviderContractEntry[] | null = null;
let webFetchProviderContractRegistryByPluginIdCache: Map<
string,
WebFetchProviderContractEntry[]
> | null = null;
let webSearchProviderContractRegistryCache: WebSearchProviderContractEntry[] | null = null;
let webSearchProviderContractRegistryByPluginIdCache: Map<
string,
@@ -106,6 +117,7 @@ function formatBundledCapabilityPluginLoadError(params: {
`status=${plugin.status}`,
...(plugin.error ? [`error=${plugin.error}`] : []),
`providerIds=[${plugin.providerIds.join(", ")}]`,
`webFetchProviderIds=[${plugin.webFetchProviderIds.join(", ")}]`,
`webSearchProviderIds=[${plugin.webSearchProviderIds.join(", ")}]`,
]
: ["plugin record missing"];
@@ -253,6 +265,65 @@ function resolveWebSearchCredentialValue(provider: WebSearchProviderPlugin): unk
return envVar.toLowerCase().includes("api_key") ? `${provider.id}-test` : "sk-test";
}
function resolveWebFetchCredentialValue(provider: WebFetchProviderPlugin): unknown {
if (provider.requiresCredential === false) {
return `${provider.id}-no-key-needed`;
}
const envVar = provider.envVars.find((entry) => entry.trim().length > 0);
if (!envVar) {
return `${provider.id}-test`;
}
return envVar.toLowerCase().includes("api_key") ? `${provider.id}-test` : "sk-test";
}
function loadWebFetchProviderContractRegistry(): WebFetchProviderContractEntry[] {
if (!webFetchProviderContractRegistryCache) {
const registry = loadBundledCapabilityRuntimeRegistry({
pluginIds: BUNDLED_WEB_FETCH_PLUGIN_IDS,
pluginSdkResolution: "dist",
});
webFetchProviderContractRegistryCache = registry.webFetchProviders.map((entry) => ({
pluginId: entry.pluginId,
provider: entry.provider,
credentialValue: resolveWebFetchCredentialValue(entry.provider),
}));
}
return webFetchProviderContractRegistryCache;
}
export function resolveWebFetchProviderContractEntriesForPluginId(
pluginId: string,
): WebFetchProviderContractEntry[] {
if (webFetchProviderContractRegistryCache) {
return webFetchProviderContractRegistryCache.filter((entry) => entry.pluginId === pluginId);
}
const cache =
webFetchProviderContractRegistryByPluginIdCache ??
new Map<string, WebFetchProviderContractEntry[]>();
webFetchProviderContractRegistryByPluginIdCache = cache;
const cached = cache.get(pluginId);
if (cached) {
return cached;
}
const entries = loadScopedCapabilityRuntimeRegistryEntries({
pluginId,
capabilityLabel: "web fetch provider",
loadEntries: (registry) =>
registry.webFetchProviders
.filter((entry) => entry.pluginId === pluginId)
.map((entry) => ({
pluginId: entry.pluginId,
provider: entry.provider,
credentialValue: resolveWebFetchCredentialValue(entry.provider),
})),
loadDeclaredIds: (plugin) => plugin.webFetchProviderIds,
});
cache.set(pluginId, entries);
return entries;
}
function loadWebSearchProviderContractRegistry(): WebSearchProviderContractEntry[] {
if (!webSearchProviderContractRegistryCache) {
const registry = loadBundledCapabilityRuntimeRegistry({
@@ -441,6 +512,9 @@ export function resolveProviderContractProvidersForPluginIds(
export const webSearchProviderContractRegistry: WebSearchProviderContractEntry[] =
createLazyArrayView(loadWebSearchProviderContractRegistry);
export const webFetchProviderContractRegistry: WebFetchProviderContractEntry[] =
createLazyArrayView(loadWebFetchProviderContractRegistry);
export const speechProviderContractRegistry: SpeechProviderContractEntry[] = createLazyArrayView(
loadSpeechProviderContractRegistry,
);
@@ -459,6 +533,7 @@ function loadPluginRegistrationContractRegistry(): PluginRegistrationContractEnt
speechProviderIds: uniqueStrings(entry.speechProviderIds),
mediaUnderstandingProviderIds: uniqueStrings(entry.mediaUnderstandingProviderIds),
imageGenerationProviderIds: uniqueStrings(entry.imageGenerationProviderIds),
webFetchProviderIds: uniqueStrings(entry.webFetchProviderIds),
webSearchProviderIds: uniqueStrings(entry.webSearchProviderIds),
toolNames: uniqueStrings(entry.toolNames),
}));

View File

@@ -1,6 +1,6 @@
import { expect, it } from "vitest";
import type { OpenClawConfig } from "../../config/config.js";
import type { ProviderPlugin, WebSearchProviderPlugin } from "../types.js";
import type { ProviderPlugin, WebFetchProviderPlugin, WebSearchProviderPlugin } from "../types.js";
type Lazy<T> = T | (() => T);
@@ -132,3 +132,46 @@ export function installWebSearchProviderContractSuite(params: {
}
});
}
export function installWebFetchProviderContractSuite(params: {
provider: Lazy<WebFetchProviderPlugin>;
credentialValue: Lazy<unknown>;
}) {
it("satisfies the base web fetch provider contract", () => {
const provider = resolveLazy(params.provider);
const credentialValue = resolveLazy(params.credentialValue);
expect(provider.id).toMatch(/^[a-z0-9][a-z0-9-]*$/);
expect(provider.label.trim()).not.toBe("");
expect(provider.hint.trim()).not.toBe("");
expect(provider.placeholder.trim()).not.toBe("");
expect(provider.signupUrl.startsWith("https://")).toBe(true);
if (provider.docsUrl) {
expect(provider.docsUrl.startsWith("http")).toBe(true);
}
expect(provider.envVars).toEqual([...new Set(provider.envVars)]);
expect(provider.envVars.every((entry) => entry.trim().length > 0)).toBe(true);
const fetchConfigTarget: Record<string, unknown> = {};
provider.setCredentialValue(fetchConfigTarget, credentialValue);
expect(provider.getCredentialValue(fetchConfigTarget)).toEqual(credentialValue);
const config = {
tools: {
web: {
fetch: {
provider: provider.id,
...fetchConfigTarget,
},
},
},
} as OpenClawConfig;
const tool = provider.createTool({ config, fetchConfig: fetchConfigTarget });
expect(tool).not.toBeNull();
expect(tool?.description.trim()).not.toBe("");
expect(tool?.parameters).toEqual(expect.any(Object));
expect(typeof tool?.execute).toBe("function");
});
}

View File

@@ -0,0 +1,10 @@
import { describeWebFetchProviderContracts } from "../../../test/helpers/plugins/web-fetch-provider-contract.js";
import { pluginRegistrationContractRegistry } from "./registry.js";
const webFetchProviderContractTests = pluginRegistrationContractRegistry.filter(
(entry) => entry.webFetchProviderIds.length > 0,
);
for (const entry of webFetchProviderContractTests) {
describeWebFetchProviderContracts(entry.pluginId);
}