mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-02 09:50:23 +00:00
test: decouple web fetch fallbacks from provider startup
This commit is contained in:
@@ -10,6 +10,7 @@ import {
|
||||
withStrictWebToolsEndpoint,
|
||||
writeCache,
|
||||
} from "openclaw/plugin-sdk/provider-web-fetch";
|
||||
import { normalizeSecretInput } from "openclaw/plugin-sdk/secret-input";
|
||||
import { wrapExternalContent, wrapWebContent } from "openclaw/plugin-sdk/security-runtime";
|
||||
import {
|
||||
resolveFirecrawlApiKey,
|
||||
@@ -89,6 +90,7 @@ async function postFirecrawlJson<T>(
|
||||
},
|
||||
parse: (response: Response) => Promise<T>,
|
||||
): Promise<T> {
|
||||
const apiKey = normalizeSecretInput(params.apiKey);
|
||||
return await withStrictWebToolsEndpoint(
|
||||
{
|
||||
url: params.url,
|
||||
@@ -96,7 +98,7 @@ async function postFirecrawlJson<T>(
|
||||
init: {
|
||||
method: "POST",
|
||||
headers: {
|
||||
Authorization: `Bearer ${params.apiKey}`,
|
||||
Authorization: `Bearer ${apiKey}`,
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
body: JSON.stringify(params.body),
|
||||
|
||||
@@ -176,6 +176,32 @@ describe("firecrawl tools", () => {
|
||||
).rejects.toThrow(/<<<EXTERNAL_UNTRUSTED_CONTENT id="[a-f0-9]{16}">>>/);
|
||||
});
|
||||
|
||||
it("normalizes Firecrawl authorization headers before requests", async () => {
|
||||
const fetchSpy = vi.fn(
|
||||
async () =>
|
||||
new Response(JSON.stringify({ success: true, data: [] }), {
|
||||
status: 200,
|
||||
headers: { "content-type": "application/json" },
|
||||
}),
|
||||
);
|
||||
global.fetch = fetchSpy as typeof fetch;
|
||||
|
||||
await firecrawlClientTesting.postFirecrawlJson(
|
||||
{
|
||||
url: "https://api.firecrawl.dev/v2/search",
|
||||
timeoutSeconds: 5,
|
||||
apiKey: "firecrawl-test-\r\nkey",
|
||||
body: { query: "openclaw" },
|
||||
errorLabel: "Firecrawl search",
|
||||
},
|
||||
async () => "ok",
|
||||
);
|
||||
|
||||
const init = fetchSpy.mock.calls[0]?.[1];
|
||||
const authHeader = new Headers(init?.headers).get("Authorization");
|
||||
expect(authHeader).toBe("Bearer firecrawl-test-key");
|
||||
});
|
||||
|
||||
it("maps generic provider args into firecrawl search params", async () => {
|
||||
const provider = createFirecrawlWebSearchProvider();
|
||||
const tool = provider.createTool({
|
||||
|
||||
@@ -2,6 +2,7 @@ import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
|
||||
import * as ssrf from "../../infra/net/ssrf.js";
|
||||
import { type FetchMock, withFetchPreconnect } from "../../test-utils/fetch-mock.js";
|
||||
import { makeFetchHeaders } from "./web-fetch.test-harness.js";
|
||||
import "./web-fetch.test-mocks.js";
|
||||
|
||||
const lookupMock = vi.fn();
|
||||
const resolvePinnedHostname = ssrf.resolvePinnedHostname;
|
||||
|
||||
@@ -244,7 +244,7 @@ type WebFetchRuntimeParams = {
|
||||
cacheTtlMs: number;
|
||||
userAgent: string;
|
||||
readabilityEnabled: boolean;
|
||||
providerFallback: ReturnType<typeof resolveWebFetchDefinition>;
|
||||
resolveProviderFallback: () => ReturnType<typeof resolveWebFetchDefinition>;
|
||||
};
|
||||
|
||||
function isRecord(value: unknown): value is Record<string, unknown> {
|
||||
@@ -341,16 +341,17 @@ async function maybeFetchProviderWebFetchPayload(
|
||||
tookMs: number;
|
||||
},
|
||||
): Promise<Record<string, unknown> | null> {
|
||||
if (!params.providerFallback) {
|
||||
const providerFallback = params.resolveProviderFallback();
|
||||
if (!providerFallback) {
|
||||
return null;
|
||||
}
|
||||
const rawPayload = await params.providerFallback.definition.execute({
|
||||
const rawPayload = await providerFallback.definition.execute({
|
||||
url: params.urlToFetch,
|
||||
extractMode: params.extractMode,
|
||||
maxChars: params.maxChars,
|
||||
});
|
||||
const payload = normalizeProviderWebFetchPayload({
|
||||
providerId: params.providerFallback.provider.id,
|
||||
providerId: providerFallback.provider.id,
|
||||
payload: rawPayload,
|
||||
requestedUrl: params.url,
|
||||
extractMode: params.extractMode,
|
||||
@@ -498,7 +499,8 @@ async function runWebFetch(params: WebFetchRuntimeParams): Promise<Record<string
|
||||
title = basic.title;
|
||||
extractor = "raw-html";
|
||||
} else {
|
||||
const providerLabel = params.providerFallback?.provider.label ?? "provider fallback";
|
||||
const providerLabel =
|
||||
params.resolveProviderFallback()?.provider.label ?? "provider fallback";
|
||||
throw new Error(
|
||||
`Web fetch extraction failed: Readability, ${providerLabel}, and basic HTML cleanup returned no content.`,
|
||||
);
|
||||
@@ -572,16 +574,24 @@ export function createWebFetchTool(options?: {
|
||||
return null;
|
||||
}
|
||||
const readabilityEnabled = resolveFetchReadabilityEnabled(fetch);
|
||||
const providerFallback = resolveWebFetchDefinition({
|
||||
config: options?.config,
|
||||
sandboxed: options?.sandboxed,
|
||||
runtimeWebFetch: options?.runtimeWebFetch,
|
||||
preferRuntimeProviders: true,
|
||||
});
|
||||
const userAgent =
|
||||
(fetch && "userAgent" in fetch && typeof fetch.userAgent === "string" && fetch.userAgent) ||
|
||||
DEFAULT_FETCH_USER_AGENT;
|
||||
const maxResponseBytes = resolveFetchMaxResponseBytes(fetch);
|
||||
let providerFallbackResolved = false;
|
||||
let providerFallbackCache: ReturnType<typeof resolveWebFetchDefinition>;
|
||||
const resolveProviderFallback = () => {
|
||||
if (!providerFallbackResolved) {
|
||||
providerFallbackCache = resolveWebFetchDefinition({
|
||||
config: options?.config,
|
||||
sandboxed: options?.sandboxed,
|
||||
runtimeWebFetch: options?.runtimeWebFetch,
|
||||
preferRuntimeProviders: true,
|
||||
});
|
||||
providerFallbackResolved = true;
|
||||
}
|
||||
return providerFallbackCache;
|
||||
};
|
||||
return {
|
||||
label: "Web Fetch",
|
||||
name: "web_fetch",
|
||||
@@ -608,7 +618,7 @@ export function createWebFetchTool(options?: {
|
||||
cacheTtlMs: resolveCacheTtlMs(fetch?.cacheTtlMinutes, DEFAULT_CACHE_TTL_MINUTES),
|
||||
userAgent,
|
||||
readabilityEnabled,
|
||||
providerFallback,
|
||||
resolveProviderFallback,
|
||||
});
|
||||
return jsonResult(result);
|
||||
},
|
||||
|
||||
@@ -4,6 +4,22 @@ import * as ssrf from "../../infra/net/ssrf.js";
|
||||
import { resolveRequestUrl } from "../../plugin-sdk/request-url.js";
|
||||
import { withFetchPreconnect } from "../../test-utils/fetch-mock.js";
|
||||
import { makeFetchHeaders } from "./web-fetch.test-harness.js";
|
||||
const { extractReadableContentMock, resolveWebFetchDefinitionMock } = vi.hoisted(() => ({
|
||||
extractReadableContentMock: vi.fn(),
|
||||
resolveWebFetchDefinitionMock: vi.fn(),
|
||||
}));
|
||||
|
||||
vi.mock("./web-fetch-utils.js", async () => {
|
||||
const actual =
|
||||
await vi.importActual<typeof import("./web-fetch-utils.js")>("./web-fetch-utils.js");
|
||||
return {
|
||||
...actual,
|
||||
extractReadableContent: extractReadableContentMock,
|
||||
};
|
||||
});
|
||||
vi.mock("../../web-fetch/runtime.js", () => ({
|
||||
resolveWebFetchDefinition: resolveWebFetchDefinitionMock,
|
||||
}));
|
||||
import { createWebFetchTool } from "./web-tools.js";
|
||||
|
||||
type MockResponse = {
|
||||
@@ -25,30 +41,6 @@ function htmlResponse(html: string, url = "https://example.com/"): MockResponse
|
||||
};
|
||||
}
|
||||
|
||||
const apiKeyField = ["api", "Key"].join("");
|
||||
|
||||
function firecrawlResponse(markdown: string, url = "https://example.com/"): MockResponse {
|
||||
return {
|
||||
ok: true,
|
||||
status: 200,
|
||||
json: async () => ({
|
||||
success: true,
|
||||
data: {
|
||||
markdown,
|
||||
metadata: { title: "Firecrawl Title", sourceURL: url, statusCode: 200 },
|
||||
},
|
||||
}),
|
||||
};
|
||||
}
|
||||
|
||||
function firecrawlError(): MockResponse {
|
||||
return {
|
||||
ok: false,
|
||||
status: 403,
|
||||
json: async () => ({ success: false, error: "blocked" }),
|
||||
};
|
||||
}
|
||||
|
||||
function textResponse(
|
||||
text: string,
|
||||
url = "https://example.com/",
|
||||
@@ -115,12 +107,8 @@ function installPlainTextFetch(text: string) {
|
||||
);
|
||||
}
|
||||
|
||||
function createFirecrawlTool(apiKey = defaultFirecrawlApiKey()) {
|
||||
return createFetchTool({ firecrawl: { [apiKeyField]: apiKey } });
|
||||
}
|
||||
|
||||
function defaultFirecrawlApiKey() {
|
||||
return "firecrawl-test"; // pragma: allowlist secret
|
||||
function createProviderFallbackTool() {
|
||||
return createFetchTool();
|
||||
}
|
||||
|
||||
function withoutAmbientFirecrawlEnv() {
|
||||
@@ -151,6 +139,10 @@ describe("web_fetch extraction fallbacks", () => {
|
||||
|
||||
beforeEach(() => {
|
||||
withoutAmbientFirecrawlEnv();
|
||||
extractReadableContentMock.mockReset();
|
||||
extractReadableContentMock.mockResolvedValue(null);
|
||||
resolveWebFetchDefinitionMock.mockReset();
|
||||
resolveWebFetchDefinitionMock.mockReturnValue(null);
|
||||
vi.spyOn(ssrf, "resolvePinnedHostname").mockImplementation(async (hostname) => {
|
||||
const normalized = hostname.trim().toLowerCase().replace(/\.$/, "");
|
||||
const addresses = ["93.184.216.34", "93.184.216.35"];
|
||||
@@ -284,77 +276,31 @@ describe("web_fetch extraction fallbacks", () => {
|
||||
// NOTE: Test for wrapping url/finalUrl/warning fields requires DNS mocking.
|
||||
// The sanitization of these fields is verified by external-content.test.ts tests.
|
||||
|
||||
it("falls back to firecrawl when readability returns no content", async () => {
|
||||
it("falls back to a configured provider when readability returns no content", async () => {
|
||||
installMockFetch((input: RequestInfo | URL) => {
|
||||
const url = resolveRequestUrl(input);
|
||||
if (url.includes("api.firecrawl.dev")) {
|
||||
return Promise.resolve(firecrawlResponse("firecrawl content")) as Promise<Response>;
|
||||
}
|
||||
return Promise.resolve(
|
||||
htmlResponse("<!doctype html><html><head></head><body></body></html>", url),
|
||||
) as Promise<Response>;
|
||||
});
|
||||
|
||||
const tool = createFirecrawlTool();
|
||||
resolveWebFetchDefinitionMock.mockReturnValue({
|
||||
provider: { id: "test-fetch", label: "Test Fetch" },
|
||||
definition: {
|
||||
description: "test provider",
|
||||
parameters: {},
|
||||
execute: async () => ({
|
||||
extractor: "test-fetch",
|
||||
text: "provider content",
|
||||
}),
|
||||
},
|
||||
});
|
||||
|
||||
const tool = createProviderFallbackTool();
|
||||
const result = await executeFetch(tool, { url: "https://example.com/empty" });
|
||||
const details = result?.details as { extractor?: string; text?: string };
|
||||
expect(details.extractor).toBe("firecrawl");
|
||||
expect(details.text).toContain("firecrawl content");
|
||||
});
|
||||
|
||||
it("normalizes firecrawl Authorization header values", async () => {
|
||||
const fetchSpy = installMockFetch((input: RequestInfo | URL) => {
|
||||
const url = resolveRequestUrl(input);
|
||||
if (url.includes("api.firecrawl.dev/v2/scrape")) {
|
||||
return Promise.resolve(firecrawlResponse("firecrawl normalized")) as Promise<Response>;
|
||||
}
|
||||
return Promise.resolve(
|
||||
htmlResponse("<!doctype html><html><head></head><body></body></html>", url),
|
||||
) as Promise<Response>;
|
||||
});
|
||||
|
||||
const tool = createFirecrawlTool("firecrawl-test-\r\nkey");
|
||||
const result = await executeFetch(tool, {
|
||||
url: "https://example.com/firecrawl",
|
||||
extractMode: "text",
|
||||
});
|
||||
|
||||
expect(result?.details).toMatchObject({ extractor: "firecrawl" });
|
||||
const firecrawlCall = fetchSpy.mock.calls.find((call) =>
|
||||
resolveRequestUrl(call[0]).includes("/v2/scrape"),
|
||||
);
|
||||
expect(firecrawlCall).toBeTruthy();
|
||||
const init = firecrawlCall?.[1];
|
||||
const authHeader = new Headers(init?.headers).get("Authorization");
|
||||
expect(authHeader).toBe("Bearer firecrawl-test-key");
|
||||
});
|
||||
|
||||
it("uses guarded endpoint fetch for firecrawl requests", async () => {
|
||||
vi.stubEnv("HTTP_PROXY", "http://127.0.0.1:7890");
|
||||
|
||||
const fetchSpy = installMockFetch((input: RequestInfo | URL) => {
|
||||
const url = resolveRequestUrl(input);
|
||||
if (url.includes("api.firecrawl.dev/v2/scrape")) {
|
||||
return Promise.resolve(
|
||||
firecrawlResponse("firecrawl guarded transport"),
|
||||
) as Promise<Response>;
|
||||
}
|
||||
return Promise.resolve(
|
||||
htmlResponse("<!doctype html><html><head></head><body></body></html>", url),
|
||||
) as Promise<Response>;
|
||||
});
|
||||
|
||||
const tool = createFirecrawlTool();
|
||||
const result = await executeFetch(tool, { url: "https://example.com/guarded-firecrawl" });
|
||||
|
||||
expect(result?.details).toMatchObject({ extractor: "firecrawl" });
|
||||
const firecrawlCall = fetchSpy.mock.calls.find((call) =>
|
||||
resolveRequestUrl(call[0]).includes("/v2/scrape"),
|
||||
);
|
||||
expect(firecrawlCall).toBeTruthy();
|
||||
const requestInit = firecrawlCall?.[1] as (RequestInit & { dispatcher?: unknown }) | undefined;
|
||||
expect(requestInit?.dispatcher).toBeDefined();
|
||||
expect(requestInit?.dispatcher).toHaveProperty("dispatch");
|
||||
expect(details.extractor).toBe("test-fetch");
|
||||
expect(details.text).toContain("provider content");
|
||||
});
|
||||
|
||||
it("throws when readability is disabled and firecrawl is unavailable", async () => {
|
||||
@@ -375,21 +321,32 @@ describe("web_fetch extraction fallbacks", () => {
|
||||
).rejects.toThrow("Readability disabled");
|
||||
});
|
||||
|
||||
it("throws when readability is empty and firecrawl fails", async () => {
|
||||
installMockFetch((input: RequestInfo | URL) => {
|
||||
const url = resolveRequestUrl(input);
|
||||
if (url.includes("api.firecrawl.dev")) {
|
||||
return Promise.resolve(firecrawlError()) as Promise<Response>;
|
||||
}
|
||||
return Promise.resolve(
|
||||
htmlResponse("<!doctype html><html><head></head><body></body></html>", url),
|
||||
) as Promise<Response>;
|
||||
it("throws when readability is empty and the provider fallback yields no content", async () => {
|
||||
installMockFetch(
|
||||
(input: RequestInfo | URL) =>
|
||||
Promise.resolve(
|
||||
htmlResponse(
|
||||
"<!doctype html><html><head></head><body></body></html>",
|
||||
resolveRequestUrl(input),
|
||||
),
|
||||
) as Promise<Response>,
|
||||
);
|
||||
|
||||
resolveWebFetchDefinitionMock.mockReturnValue({
|
||||
provider: { id: "test-fetch", label: "Test Fetch" },
|
||||
definition: {
|
||||
description: "test provider",
|
||||
parameters: {},
|
||||
execute: async () => {
|
||||
throw new Error("provider returned no content");
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
const tool = createFirecrawlTool();
|
||||
const tool = createProviderFallbackTool();
|
||||
await expect(
|
||||
executeFetch(tool, { url: "https://example.com/readability-empty" }),
|
||||
).rejects.toThrow("Readability, Firecrawl, and basic HTML cleanup returned no content");
|
||||
).rejects.toThrow("Readability, Test Fetch, and basic HTML cleanup returned no content");
|
||||
});
|
||||
|
||||
it("falls back to basic HTML cleanup after readability and before giving up", async () => {
|
||||
@@ -414,12 +371,8 @@ describe("web_fetch extraction fallbacks", () => {
|
||||
expect(details.title).toContain("Shell App");
|
||||
});
|
||||
|
||||
it("uses firecrawl when direct fetch fails", async () => {
|
||||
installMockFetch((input: RequestInfo | URL) => {
|
||||
const url = resolveRequestUrl(input);
|
||||
if (url.includes("api.firecrawl.dev")) {
|
||||
return Promise.resolve(firecrawlResponse("firecrawl fallback", url)) as Promise<Response>;
|
||||
}
|
||||
it("uses the provider fallback when direct fetch fails", async () => {
|
||||
installMockFetch((_input: RequestInfo | URL) => {
|
||||
return Promise.resolve({
|
||||
ok: false,
|
||||
status: 403,
|
||||
@@ -428,14 +381,23 @@ describe("web_fetch extraction fallbacks", () => {
|
||||
} as Response);
|
||||
});
|
||||
|
||||
const tool = createFetchTool({
|
||||
firecrawl: { apiKey: "firecrawl-test" }, // pragma: allowlist secret
|
||||
resolveWebFetchDefinitionMock.mockReturnValue({
|
||||
provider: { id: "test-fetch", label: "Test Fetch" },
|
||||
definition: {
|
||||
description: "test provider",
|
||||
parameters: {},
|
||||
execute: async () => ({
|
||||
extractor: "test-fetch",
|
||||
text: "provider fallback",
|
||||
}),
|
||||
},
|
||||
});
|
||||
|
||||
const tool = createProviderFallbackTool();
|
||||
const result = await tool?.execute?.("call", { url: "https://example.com/blocked" });
|
||||
const details = result?.details as { extractor?: string; text?: string };
|
||||
expect(details.extractor).toBe("firecrawl");
|
||||
expect(details.text).toContain("firecrawl fallback");
|
||||
expect(details.extractor).toBe("test-fetch");
|
||||
expect(details.text).toContain("provider fallback");
|
||||
});
|
||||
|
||||
it("wraps external content and clamps oversized maxChars", async () => {
|
||||
@@ -509,30 +471,25 @@ describe("web_fetch extraction fallbacks", () => {
|
||||
expect(message).toContain("Oops");
|
||||
});
|
||||
|
||||
it("wraps firecrawl error details", async () => {
|
||||
installMockFetch((input: RequestInfo | URL) => {
|
||||
const url = resolveRequestUrl(input);
|
||||
if (url.includes("api.firecrawl.dev")) {
|
||||
return Promise.resolve({
|
||||
ok: false,
|
||||
status: 403,
|
||||
json: async () => ({ success: false, error: "blocked" }),
|
||||
} as Response);
|
||||
}
|
||||
return Promise.reject(new Error("network down"));
|
||||
it("surfaces provider fallback errors when direct fetch throws", async () => {
|
||||
installMockFetch(() => Promise.reject(new Error("network down")));
|
||||
resolveWebFetchDefinitionMock.mockReturnValue({
|
||||
provider: { id: "test-fetch", label: "Test Fetch" },
|
||||
definition: {
|
||||
description: "test provider",
|
||||
parameters: {},
|
||||
execute: async () => {
|
||||
throw new Error("provider fallback failed");
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
const tool = createFetchTool({
|
||||
firecrawl: { apiKey: "firecrawl-test" }, // pragma: allowlist secret
|
||||
});
|
||||
|
||||
const message = await captureToolErrorMessage({
|
||||
tool,
|
||||
url: "https://example.com/firecrawl-error",
|
||||
});
|
||||
|
||||
expect(message).toContain("Firecrawl API error (403):");
|
||||
expect(message).toMatch(/<<<EXTERNAL_UNTRUSTED_CONTENT id="[a-f0-9]{16}">>>/);
|
||||
expect(message).toContain("blocked");
|
||||
const tool = createProviderFallbackTool();
|
||||
await expect(
|
||||
captureToolErrorMessage({
|
||||
tool,
|
||||
url: "https://example.com/provider-error",
|
||||
}),
|
||||
).resolves.toContain("provider fallback failed");
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user