mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 04:10:46 +00:00
feat: add firecrawl onboarding search plugin
This commit is contained in:
159
extensions/firecrawl/src/config.ts
Normal file
159
extensions/firecrawl/src/config.ts
Normal file
@@ -0,0 +1,159 @@
|
||||
import type { OpenClawConfig } from "../../../src/config/config.js";
|
||||
import { normalizeResolvedSecretInputString } from "../../../src/config/types.secrets.js";
|
||||
import { normalizeSecretInput } from "../../../src/utils/normalize-secret-input.js";
|
||||
|
||||
export const DEFAULT_FIRECRAWL_BASE_URL = "https://api.firecrawl.dev";
|
||||
export const DEFAULT_FIRECRAWL_SEARCH_TIMEOUT_SECONDS = 30;
|
||||
export const DEFAULT_FIRECRAWL_SCRAPE_TIMEOUT_SECONDS = 60;
|
||||
export const DEFAULT_FIRECRAWL_MAX_AGE_MS = 172_800_000;
|
||||
|
||||
type WebSearchConfig = NonNullable<OpenClawConfig["tools"]>["web"] extends infer Web
|
||||
? Web extends { search?: infer Search }
|
||||
? Search
|
||||
: undefined
|
||||
: undefined;
|
||||
|
||||
type WebFetchConfig = NonNullable<OpenClawConfig["tools"]>["web"] extends infer Web
|
||||
? Web extends { fetch?: infer Fetch }
|
||||
? Fetch
|
||||
: undefined
|
||||
: undefined;
|
||||
|
||||
type FirecrawlSearchConfig =
|
||||
| {
|
||||
apiKey?: unknown;
|
||||
baseUrl?: string;
|
||||
}
|
||||
| undefined;
|
||||
|
||||
type FirecrawlFetchConfig =
|
||||
| {
|
||||
apiKey?: unknown;
|
||||
baseUrl?: string;
|
||||
onlyMainContent?: boolean;
|
||||
maxAgeMs?: number;
|
||||
timeoutSeconds?: number;
|
||||
}
|
||||
| undefined;
|
||||
|
||||
function resolveSearchConfig(cfg?: OpenClawConfig): WebSearchConfig {
|
||||
const search = cfg?.tools?.web?.search;
|
||||
if (!search || typeof search !== "object") {
|
||||
return undefined;
|
||||
}
|
||||
return search as WebSearchConfig;
|
||||
}
|
||||
|
||||
function resolveFetchConfig(cfg?: OpenClawConfig): WebFetchConfig {
|
||||
const fetch = cfg?.tools?.web?.fetch;
|
||||
if (!fetch || typeof fetch !== "object") {
|
||||
return undefined;
|
||||
}
|
||||
return fetch as WebFetchConfig;
|
||||
}
|
||||
|
||||
export function resolveFirecrawlSearchConfig(cfg?: OpenClawConfig): FirecrawlSearchConfig {
|
||||
const search = resolveSearchConfig(cfg);
|
||||
if (!search || typeof search !== "object") {
|
||||
return undefined;
|
||||
}
|
||||
const firecrawl = "firecrawl" in search ? search.firecrawl : undefined;
|
||||
if (!firecrawl || typeof firecrawl !== "object") {
|
||||
return undefined;
|
||||
}
|
||||
return firecrawl as FirecrawlSearchConfig;
|
||||
}
|
||||
|
||||
export function resolveFirecrawlFetchConfig(cfg?: OpenClawConfig): FirecrawlFetchConfig {
|
||||
const fetch = resolveFetchConfig(cfg);
|
||||
if (!fetch || typeof fetch !== "object") {
|
||||
return undefined;
|
||||
}
|
||||
const firecrawl = "firecrawl" in fetch ? fetch.firecrawl : undefined;
|
||||
if (!firecrawl || typeof firecrawl !== "object") {
|
||||
return undefined;
|
||||
}
|
||||
return firecrawl as FirecrawlFetchConfig;
|
||||
}
|
||||
|
||||
function normalizeConfiguredSecret(value: unknown, path: string): string | undefined {
|
||||
return normalizeSecretInput(
|
||||
normalizeResolvedSecretInputString({
|
||||
value,
|
||||
path,
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
export function resolveFirecrawlApiKey(cfg?: OpenClawConfig): string | undefined {
|
||||
const search = resolveFirecrawlSearchConfig(cfg);
|
||||
const fetch = resolveFirecrawlFetchConfig(cfg);
|
||||
return (
|
||||
normalizeConfiguredSecret(search?.apiKey, "tools.web.search.firecrawl.apiKey") ||
|
||||
normalizeConfiguredSecret(fetch?.apiKey, "tools.web.fetch.firecrawl.apiKey") ||
|
||||
normalizeSecretInput(process.env.FIRECRAWL_API_KEY) ||
|
||||
undefined
|
||||
);
|
||||
}
|
||||
|
||||
export function resolveFirecrawlBaseUrl(cfg?: OpenClawConfig): string {
|
||||
const search = resolveFirecrawlSearchConfig(cfg);
|
||||
const fetch = resolveFirecrawlFetchConfig(cfg);
|
||||
const configured =
|
||||
(typeof search?.baseUrl === "string" ? search.baseUrl.trim() : "") ||
|
||||
(typeof fetch?.baseUrl === "string" ? fetch.baseUrl.trim() : "") ||
|
||||
normalizeSecretInput(process.env.FIRECRAWL_BASE_URL) ||
|
||||
"";
|
||||
return configured || DEFAULT_FIRECRAWL_BASE_URL;
|
||||
}
|
||||
|
||||
export function resolveFirecrawlOnlyMainContent(cfg?: OpenClawConfig, override?: boolean): boolean {
|
||||
if (typeof override === "boolean") {
|
||||
return override;
|
||||
}
|
||||
const fetch = resolveFirecrawlFetchConfig(cfg);
|
||||
if (typeof fetch?.onlyMainContent === "boolean") {
|
||||
return fetch.onlyMainContent;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
export function resolveFirecrawlMaxAgeMs(cfg?: OpenClawConfig, override?: number): number {
|
||||
if (typeof override === "number" && Number.isFinite(override) && override >= 0) {
|
||||
return Math.floor(override);
|
||||
}
|
||||
const fetch = resolveFirecrawlFetchConfig(cfg);
|
||||
if (
|
||||
typeof fetch?.maxAgeMs === "number" &&
|
||||
Number.isFinite(fetch.maxAgeMs) &&
|
||||
fetch.maxAgeMs >= 0
|
||||
) {
|
||||
return Math.floor(fetch.maxAgeMs);
|
||||
}
|
||||
return DEFAULT_FIRECRAWL_MAX_AGE_MS;
|
||||
}
|
||||
|
||||
export function resolveFirecrawlScrapeTimeoutSeconds(
|
||||
cfg?: OpenClawConfig,
|
||||
override?: number,
|
||||
): number {
|
||||
if (typeof override === "number" && Number.isFinite(override) && override > 0) {
|
||||
return Math.floor(override);
|
||||
}
|
||||
const fetch = resolveFirecrawlFetchConfig(cfg);
|
||||
if (
|
||||
typeof fetch?.timeoutSeconds === "number" &&
|
||||
Number.isFinite(fetch.timeoutSeconds) &&
|
||||
fetch.timeoutSeconds > 0
|
||||
) {
|
||||
return Math.floor(fetch.timeoutSeconds);
|
||||
}
|
||||
return DEFAULT_FIRECRAWL_SCRAPE_TIMEOUT_SECONDS;
|
||||
}
|
||||
|
||||
export function resolveFirecrawlSearchTimeoutSeconds(override?: number): number {
|
||||
if (typeof override === "number" && Number.isFinite(override) && override > 0) {
|
||||
return Math.floor(override);
|
||||
}
|
||||
return DEFAULT_FIRECRAWL_SEARCH_TIMEOUT_SECONDS;
|
||||
}
|
||||
446
extensions/firecrawl/src/firecrawl-client.ts
Normal file
446
extensions/firecrawl/src/firecrawl-client.ts
Normal file
@@ -0,0 +1,446 @@
|
||||
import { markdownToText, truncateText } from "../../../src/agents/tools/web-fetch-utils.js";
|
||||
import { withTrustedWebToolsEndpoint } from "../../../src/agents/tools/web-guarded-fetch.js";
|
||||
import {
|
||||
DEFAULT_CACHE_TTL_MINUTES,
|
||||
normalizeCacheKey,
|
||||
readCache,
|
||||
readResponseText,
|
||||
resolveCacheTtlMs,
|
||||
writeCache,
|
||||
} from "../../../src/agents/tools/web-shared.js";
|
||||
import type { OpenClawConfig } from "../../../src/config/config.js";
|
||||
import { wrapExternalContent, wrapWebContent } from "../../../src/security/external-content.js";
|
||||
import {
|
||||
resolveFirecrawlApiKey,
|
||||
resolveFirecrawlBaseUrl,
|
||||
resolveFirecrawlMaxAgeMs,
|
||||
resolveFirecrawlOnlyMainContent,
|
||||
resolveFirecrawlScrapeTimeoutSeconds,
|
||||
resolveFirecrawlSearchTimeoutSeconds,
|
||||
} from "./config.js";
|
||||
|
||||
const SEARCH_CACHE = new Map<
|
||||
string,
|
||||
{ value: Record<string, unknown>; expiresAt: number; insertedAt: number }
|
||||
>();
|
||||
const SCRAPE_CACHE = new Map<
|
||||
string,
|
||||
{ value: Record<string, unknown>; expiresAt: number; insertedAt: number }
|
||||
>();
|
||||
const DEFAULT_SEARCH_COUNT = 5;
|
||||
const DEFAULT_SCRAPE_MAX_CHARS = 50_000;
|
||||
const DEFAULT_ERROR_MAX_BYTES = 64_000;
|
||||
|
||||
type FirecrawlSearchItem = {
|
||||
title: string;
|
||||
url: string;
|
||||
description?: string;
|
||||
content?: string;
|
||||
published?: string;
|
||||
siteName?: string;
|
||||
};
|
||||
|
||||
export type FirecrawlSearchParams = {
|
||||
cfg?: OpenClawConfig;
|
||||
query: string;
|
||||
count?: number;
|
||||
timeoutSeconds?: number;
|
||||
sources?: string[];
|
||||
categories?: string[];
|
||||
scrapeResults?: boolean;
|
||||
};
|
||||
|
||||
export type FirecrawlScrapeParams = {
|
||||
cfg?: OpenClawConfig;
|
||||
url: string;
|
||||
extractMode: "markdown" | "text";
|
||||
maxChars?: number;
|
||||
onlyMainContent?: boolean;
|
||||
maxAgeMs?: number;
|
||||
proxy?: "auto" | "basic" | "stealth";
|
||||
storeInCache?: boolean;
|
||||
timeoutSeconds?: number;
|
||||
};
|
||||
|
||||
function resolveEndpoint(baseUrl: string, pathname: "/v2/search" | "/v2/scrape"): string {
|
||||
const trimmed = baseUrl.trim();
|
||||
if (!trimmed) {
|
||||
return new URL(pathname, "https://api.firecrawl.dev").toString();
|
||||
}
|
||||
try {
|
||||
const url = new URL(trimmed);
|
||||
if (url.pathname && url.pathname !== "/") {
|
||||
return url.toString();
|
||||
}
|
||||
url.pathname = pathname;
|
||||
return url.toString();
|
||||
} catch {
|
||||
return new URL(pathname, "https://api.firecrawl.dev").toString();
|
||||
}
|
||||
}
|
||||
|
||||
function resolveSiteName(urlRaw: string): string | undefined {
|
||||
try {
|
||||
const host = new URL(urlRaw).hostname.replace(/^www\./, "");
|
||||
return host || undefined;
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
async function postFirecrawlJson(params: {
|
||||
baseUrl: string;
|
||||
pathname: "/v2/search" | "/v2/scrape";
|
||||
apiKey: string;
|
||||
body: Record<string, unknown>;
|
||||
timeoutSeconds: number;
|
||||
errorLabel: string;
|
||||
}): Promise<Record<string, unknown>> {
|
||||
const endpoint = resolveEndpoint(params.baseUrl, params.pathname);
|
||||
return await withTrustedWebToolsEndpoint(
|
||||
{
|
||||
url: endpoint,
|
||||
timeoutSeconds: params.timeoutSeconds,
|
||||
init: {
|
||||
method: "POST",
|
||||
headers: {
|
||||
Accept: "application/json",
|
||||
Authorization: `Bearer ${params.apiKey}`,
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
body: JSON.stringify(params.body),
|
||||
},
|
||||
},
|
||||
async ({ response }) => {
|
||||
if (!response.ok) {
|
||||
const detail = await readResponseText(response, { maxBytes: DEFAULT_ERROR_MAX_BYTES });
|
||||
throw new Error(
|
||||
`${params.errorLabel} API error (${response.status}): ${detail.text || response.statusText}`,
|
||||
);
|
||||
}
|
||||
const payload = (await response.json()) as Record<string, unknown>;
|
||||
if (payload.success === false) {
|
||||
const error =
|
||||
typeof payload.error === "string"
|
||||
? payload.error
|
||||
: typeof payload.message === "string"
|
||||
? payload.message
|
||||
: "unknown error";
|
||||
throw new Error(`${params.errorLabel} API error: ${error}`);
|
||||
}
|
||||
return payload;
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
function resolveSearchItems(payload: Record<string, unknown>): FirecrawlSearchItem[] {
|
||||
const candidates = [
|
||||
payload.data,
|
||||
payload.results,
|
||||
(payload.data as { results?: unknown } | undefined)?.results,
|
||||
(payload.data as { data?: unknown } | undefined)?.data,
|
||||
(payload.data as { web?: unknown } | undefined)?.web,
|
||||
(payload.web as { results?: unknown } | undefined)?.results,
|
||||
];
|
||||
const rawItems = candidates.find((candidate) => Array.isArray(candidate));
|
||||
if (!Array.isArray(rawItems)) {
|
||||
return [];
|
||||
}
|
||||
const items: FirecrawlSearchItem[] = [];
|
||||
for (const entry of rawItems) {
|
||||
if (!entry || typeof entry !== "object") {
|
||||
continue;
|
||||
}
|
||||
const record = entry as Record<string, unknown>;
|
||||
const metadata =
|
||||
record.metadata && typeof record.metadata === "object"
|
||||
? (record.metadata as Record<string, unknown>)
|
||||
: undefined;
|
||||
const url =
|
||||
(typeof record.url === "string" && record.url) ||
|
||||
(typeof record.sourceURL === "string" && record.sourceURL) ||
|
||||
(typeof record.sourceUrl === "string" && record.sourceUrl) ||
|
||||
(typeof metadata?.sourceURL === "string" && metadata.sourceURL) ||
|
||||
"";
|
||||
if (!url) {
|
||||
continue;
|
||||
}
|
||||
const title =
|
||||
(typeof record.title === "string" && record.title) ||
|
||||
(typeof metadata?.title === "string" && metadata.title) ||
|
||||
"";
|
||||
const description =
|
||||
(typeof record.description === "string" && record.description) ||
|
||||
(typeof record.snippet === "string" && record.snippet) ||
|
||||
(typeof record.summary === "string" && record.summary) ||
|
||||
undefined;
|
||||
const content =
|
||||
(typeof record.markdown === "string" && record.markdown) ||
|
||||
(typeof record.content === "string" && record.content) ||
|
||||
(typeof record.text === "string" && record.text) ||
|
||||
undefined;
|
||||
const published =
|
||||
(typeof record.publishedDate === "string" && record.publishedDate) ||
|
||||
(typeof record.published === "string" && record.published) ||
|
||||
(typeof metadata?.publishedTime === "string" && metadata.publishedTime) ||
|
||||
(typeof metadata?.publishedDate === "string" && metadata.publishedDate) ||
|
||||
undefined;
|
||||
items.push({
|
||||
title,
|
||||
url,
|
||||
description,
|
||||
content,
|
||||
published,
|
||||
siteName: resolveSiteName(url),
|
||||
});
|
||||
}
|
||||
return items;
|
||||
}
|
||||
|
||||
function buildSearchPayload(params: {
|
||||
query: string;
|
||||
provider: "firecrawl";
|
||||
items: FirecrawlSearchItem[];
|
||||
tookMs: number;
|
||||
scrapeResults: boolean;
|
||||
}): Record<string, unknown> {
|
||||
return {
|
||||
query: params.query,
|
||||
provider: params.provider,
|
||||
count: params.items.length,
|
||||
tookMs: params.tookMs,
|
||||
externalContent: {
|
||||
untrusted: true,
|
||||
source: "web_search",
|
||||
provider: params.provider,
|
||||
wrapped: true,
|
||||
},
|
||||
results: params.items.map((entry) => ({
|
||||
title: entry.title ? wrapWebContent(entry.title, "web_search") : "",
|
||||
url: entry.url,
|
||||
description: entry.description ? wrapWebContent(entry.description, "web_search") : "",
|
||||
...(entry.published ? { published: entry.published } : {}),
|
||||
...(entry.siteName ? { siteName: entry.siteName } : {}),
|
||||
...(params.scrapeResults && entry.content
|
||||
? { content: wrapWebContent(entry.content, "web_search") }
|
||||
: {}),
|
||||
})),
|
||||
};
|
||||
}
|
||||
|
||||
export async function runFirecrawlSearch(
|
||||
params: FirecrawlSearchParams,
|
||||
): Promise<Record<string, unknown>> {
|
||||
const apiKey = resolveFirecrawlApiKey(params.cfg);
|
||||
if (!apiKey) {
|
||||
throw new Error(
|
||||
"web_search (firecrawl) needs a Firecrawl API key. Set FIRECRAWL_API_KEY in the Gateway environment, or configure tools.web.search.firecrawl.apiKey.",
|
||||
);
|
||||
}
|
||||
const count =
|
||||
typeof params.count === "number" && Number.isFinite(params.count)
|
||||
? Math.max(1, Math.min(10, Math.floor(params.count)))
|
||||
: DEFAULT_SEARCH_COUNT;
|
||||
const timeoutSeconds = resolveFirecrawlSearchTimeoutSeconds(params.timeoutSeconds);
|
||||
const scrapeResults = params.scrapeResults === true;
|
||||
const sources = Array.isArray(params.sources) ? params.sources.filter(Boolean) : [];
|
||||
const categories = Array.isArray(params.categories) ? params.categories.filter(Boolean) : [];
|
||||
const baseUrl = resolveFirecrawlBaseUrl(params.cfg);
|
||||
const cacheKey = normalizeCacheKey(
|
||||
JSON.stringify({
|
||||
type: "firecrawl-search",
|
||||
q: params.query,
|
||||
count,
|
||||
baseUrl,
|
||||
sources,
|
||||
categories,
|
||||
scrapeResults,
|
||||
}),
|
||||
);
|
||||
const cached = readCache(SEARCH_CACHE, cacheKey);
|
||||
if (cached) {
|
||||
return { ...cached.value, cached: true };
|
||||
}
|
||||
|
||||
const body: Record<string, unknown> = {
|
||||
query: params.query,
|
||||
limit: count,
|
||||
};
|
||||
if (sources.length > 0) {
|
||||
body.sources = sources;
|
||||
}
|
||||
if (categories.length > 0) {
|
||||
body.categories = categories;
|
||||
}
|
||||
if (scrapeResults) {
|
||||
body.scrapeOptions = {
|
||||
formats: ["markdown"],
|
||||
};
|
||||
}
|
||||
|
||||
const start = Date.now();
|
||||
const payload = await postFirecrawlJson({
|
||||
baseUrl,
|
||||
pathname: "/v2/search",
|
||||
apiKey,
|
||||
body,
|
||||
timeoutSeconds,
|
||||
errorLabel: "Firecrawl Search",
|
||||
});
|
||||
const result = buildSearchPayload({
|
||||
query: params.query,
|
||||
provider: "firecrawl",
|
||||
items: resolveSearchItems(payload),
|
||||
tookMs: Date.now() - start,
|
||||
scrapeResults,
|
||||
});
|
||||
writeCache(
|
||||
SEARCH_CACHE,
|
||||
cacheKey,
|
||||
result,
|
||||
resolveCacheTtlMs(undefined, DEFAULT_CACHE_TTL_MINUTES),
|
||||
);
|
||||
return result;
|
||||
}
|
||||
|
||||
function resolveScrapeData(payload: Record<string, unknown>): Record<string, unknown> {
|
||||
const data = payload.data;
|
||||
if (data && typeof data === "object") {
|
||||
return data as Record<string, unknown>;
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
export function parseFirecrawlScrapePayload(params: {
|
||||
payload: Record<string, unknown>;
|
||||
url: string;
|
||||
extractMode: "markdown" | "text";
|
||||
maxChars: number;
|
||||
}): Record<string, unknown> {
|
||||
const data = resolveScrapeData(params.payload);
|
||||
const metadata =
|
||||
data.metadata && typeof data.metadata === "object"
|
||||
? (data.metadata as Record<string, unknown>)
|
||||
: undefined;
|
||||
const markdown =
|
||||
(typeof data.markdown === "string" && data.markdown) ||
|
||||
(typeof data.content === "string" && data.content) ||
|
||||
"";
|
||||
if (!markdown) {
|
||||
throw new Error("Firecrawl scrape returned no content.");
|
||||
}
|
||||
const rawText = params.extractMode === "text" ? markdownToText(markdown) : markdown;
|
||||
const truncated = truncateText(rawText, params.maxChars);
|
||||
return {
|
||||
url: params.url,
|
||||
finalUrl:
|
||||
(typeof metadata?.sourceURL === "string" && metadata.sourceURL) ||
|
||||
(typeof data.url === "string" && data.url) ||
|
||||
params.url,
|
||||
status:
|
||||
(typeof metadata?.statusCode === "number" && metadata.statusCode) ||
|
||||
(typeof data.statusCode === "number" && data.statusCode) ||
|
||||
undefined,
|
||||
title:
|
||||
typeof metadata?.title === "string" && metadata.title
|
||||
? wrapExternalContent(metadata.title, { source: "web_fetch", includeWarning: false })
|
||||
: undefined,
|
||||
extractor: "firecrawl",
|
||||
extractMode: params.extractMode,
|
||||
externalContent: {
|
||||
untrusted: true,
|
||||
source: "web_fetch",
|
||||
wrapped: true,
|
||||
},
|
||||
truncated: truncated.truncated,
|
||||
rawLength: rawText.length,
|
||||
wrappedLength: wrapExternalContent(truncated.text, {
|
||||
source: "web_fetch",
|
||||
includeWarning: false,
|
||||
}).length,
|
||||
text: wrapExternalContent(truncated.text, {
|
||||
source: "web_fetch",
|
||||
includeWarning: false,
|
||||
}),
|
||||
warning:
|
||||
typeof params.payload.warning === "string" && params.payload.warning
|
||||
? wrapExternalContent(params.payload.warning, {
|
||||
source: "web_fetch",
|
||||
includeWarning: false,
|
||||
})
|
||||
: undefined,
|
||||
};
|
||||
}
|
||||
|
||||
export async function runFirecrawlScrape(
|
||||
params: FirecrawlScrapeParams,
|
||||
): Promise<Record<string, unknown>> {
|
||||
const apiKey = resolveFirecrawlApiKey(params.cfg);
|
||||
if (!apiKey) {
|
||||
throw new Error(
|
||||
"firecrawl_scrape needs a Firecrawl API key. Set FIRECRAWL_API_KEY in the Gateway environment, or configure tools.web.fetch.firecrawl.apiKey.",
|
||||
);
|
||||
}
|
||||
const baseUrl = resolveFirecrawlBaseUrl(params.cfg);
|
||||
const timeoutSeconds = resolveFirecrawlScrapeTimeoutSeconds(params.cfg, params.timeoutSeconds);
|
||||
const onlyMainContent = resolveFirecrawlOnlyMainContent(params.cfg, params.onlyMainContent);
|
||||
const maxAgeMs = resolveFirecrawlMaxAgeMs(params.cfg, params.maxAgeMs);
|
||||
const proxy = params.proxy ?? "auto";
|
||||
const storeInCache = params.storeInCache ?? true;
|
||||
const maxChars =
|
||||
typeof params.maxChars === "number" && Number.isFinite(params.maxChars) && params.maxChars > 0
|
||||
? Math.floor(params.maxChars)
|
||||
: DEFAULT_SCRAPE_MAX_CHARS;
|
||||
const cacheKey = normalizeCacheKey(
|
||||
JSON.stringify({
|
||||
type: "firecrawl-scrape",
|
||||
url: params.url,
|
||||
extractMode: params.extractMode,
|
||||
baseUrl,
|
||||
onlyMainContent,
|
||||
maxAgeMs,
|
||||
proxy,
|
||||
storeInCache,
|
||||
maxChars,
|
||||
}),
|
||||
);
|
||||
const cached = readCache(SCRAPE_CACHE, cacheKey);
|
||||
if (cached) {
|
||||
return { ...cached.value, cached: true };
|
||||
}
|
||||
|
||||
const payload = await postFirecrawlJson({
|
||||
baseUrl,
|
||||
pathname: "/v2/scrape",
|
||||
apiKey,
|
||||
timeoutSeconds,
|
||||
errorLabel: "Firecrawl",
|
||||
body: {
|
||||
url: params.url,
|
||||
formats: ["markdown"],
|
||||
onlyMainContent,
|
||||
timeout: timeoutSeconds * 1000,
|
||||
maxAge: maxAgeMs,
|
||||
proxy,
|
||||
storeInCache,
|
||||
},
|
||||
});
|
||||
const result = parseFirecrawlScrapePayload({
|
||||
payload,
|
||||
url: params.url,
|
||||
extractMode: params.extractMode,
|
||||
maxChars,
|
||||
});
|
||||
writeCache(
|
||||
SCRAPE_CACHE,
|
||||
cacheKey,
|
||||
result,
|
||||
resolveCacheTtlMs(undefined, DEFAULT_CACHE_TTL_MINUTES),
|
||||
);
|
||||
return result;
|
||||
}
|
||||
|
||||
export const __testing = {
|
||||
parseFirecrawlScrapePayload,
|
||||
resolveSearchItems,
|
||||
};
|
||||
89
extensions/firecrawl/src/firecrawl-scrape-tool.ts
Normal file
89
extensions/firecrawl/src/firecrawl-scrape-tool.ts
Normal file
@@ -0,0 +1,89 @@
|
||||
import { Type } from "@sinclair/typebox";
|
||||
import { optionalStringEnum } from "../../../src/agents/schema/typebox.js";
|
||||
import { jsonResult, readNumberParam, readStringParam } from "../../../src/agents/tools/common.js";
|
||||
import type { OpenClawPluginApi } from "../../../src/plugins/types.js";
|
||||
import { runFirecrawlScrape } from "./firecrawl-client.js";
|
||||
|
||||
const FirecrawlScrapeToolSchema = Type.Object(
|
||||
{
|
||||
url: Type.String({ description: "HTTP or HTTPS URL to scrape via Firecrawl." }),
|
||||
extractMode: optionalStringEnum(["markdown", "text"] as const, {
|
||||
description: 'Extraction mode ("markdown" or "text"). Default: markdown.',
|
||||
}),
|
||||
maxChars: Type.Optional(
|
||||
Type.Number({
|
||||
description: "Maximum characters to return.",
|
||||
minimum: 100,
|
||||
}),
|
||||
),
|
||||
onlyMainContent: Type.Optional(
|
||||
Type.Boolean({
|
||||
description: "Keep only main content when Firecrawl supports it.",
|
||||
}),
|
||||
),
|
||||
maxAgeMs: Type.Optional(
|
||||
Type.Number({
|
||||
description: "Maximum Firecrawl cache age in milliseconds.",
|
||||
minimum: 0,
|
||||
}),
|
||||
),
|
||||
proxy: optionalStringEnum(["auto", "basic", "stealth"] as const, {
|
||||
description: 'Firecrawl proxy mode ("auto", "basic", or "stealth").',
|
||||
}),
|
||||
storeInCache: Type.Optional(
|
||||
Type.Boolean({
|
||||
description: "Whether Firecrawl should store the scrape in its cache.",
|
||||
}),
|
||||
),
|
||||
timeoutSeconds: Type.Optional(
|
||||
Type.Number({
|
||||
description: "Timeout in seconds for the Firecrawl scrape request.",
|
||||
minimum: 1,
|
||||
}),
|
||||
),
|
||||
},
|
||||
{ additionalProperties: false },
|
||||
);
|
||||
|
||||
export function createFirecrawlScrapeTool(api: OpenClawPluginApi) {
|
||||
return {
|
||||
name: "firecrawl_scrape",
|
||||
label: "Firecrawl Scrape",
|
||||
description:
|
||||
"Scrape a page using Firecrawl v2/scrape. Useful for JS-heavy or bot-protected pages where plain web_fetch is weak.",
|
||||
parameters: FirecrawlScrapeToolSchema,
|
||||
execute: async (_toolCallId: string, rawParams: Record<string, unknown>) => {
|
||||
const url = readStringParam(rawParams, "url", { required: true });
|
||||
const extractMode =
|
||||
readStringParam(rawParams, "extractMode") === "text" ? "text" : "markdown";
|
||||
const maxChars = readNumberParam(rawParams, "maxChars", { integer: true });
|
||||
const maxAgeMs = readNumberParam(rawParams, "maxAgeMs", { integer: true });
|
||||
const timeoutSeconds = readNumberParam(rawParams, "timeoutSeconds", {
|
||||
integer: true,
|
||||
});
|
||||
const proxyRaw = readStringParam(rawParams, "proxy");
|
||||
const proxy =
|
||||
proxyRaw === "basic" || proxyRaw === "stealth" || proxyRaw === "auto"
|
||||
? proxyRaw
|
||||
: undefined;
|
||||
const onlyMainContent =
|
||||
typeof rawParams.onlyMainContent === "boolean" ? rawParams.onlyMainContent : undefined;
|
||||
const storeInCache =
|
||||
typeof rawParams.storeInCache === "boolean" ? rawParams.storeInCache : undefined;
|
||||
|
||||
return jsonResult(
|
||||
await runFirecrawlScrape({
|
||||
cfg: api.config,
|
||||
url,
|
||||
extractMode,
|
||||
maxChars,
|
||||
onlyMainContent,
|
||||
maxAgeMs,
|
||||
proxy,
|
||||
storeInCache,
|
||||
timeoutSeconds,
|
||||
}),
|
||||
);
|
||||
},
|
||||
};
|
||||
}
|
||||
63
extensions/firecrawl/src/firecrawl-search-provider.ts
Normal file
63
extensions/firecrawl/src/firecrawl-search-provider.ts
Normal file
@@ -0,0 +1,63 @@
|
||||
import { Type } from "@sinclair/typebox";
|
||||
import type { WebSearchProviderPlugin } from "../../../src/plugins/types.js";
|
||||
import { runFirecrawlSearch } from "./firecrawl-client.js";
|
||||
|
||||
const GenericFirecrawlSearchSchema = Type.Object(
|
||||
{
|
||||
query: Type.String({ description: "Search query string." }),
|
||||
count: Type.Optional(
|
||||
Type.Number({
|
||||
description: "Number of results to return (1-10).",
|
||||
minimum: 1,
|
||||
maximum: 10,
|
||||
}),
|
||||
),
|
||||
},
|
||||
{ additionalProperties: false },
|
||||
);
|
||||
|
||||
function getScopedCredentialValue(searchConfig?: Record<string, unknown>): unknown {
|
||||
const scoped = searchConfig?.firecrawl;
|
||||
if (!scoped || typeof scoped !== "object" || Array.isArray(scoped)) {
|
||||
return undefined;
|
||||
}
|
||||
return (scoped as Record<string, unknown>).apiKey;
|
||||
}
|
||||
|
||||
function setScopedCredentialValue(
|
||||
searchConfigTarget: Record<string, unknown>,
|
||||
value: unknown,
|
||||
): void {
|
||||
const scoped = searchConfigTarget.firecrawl;
|
||||
if (!scoped || typeof scoped !== "object" || Array.isArray(scoped)) {
|
||||
searchConfigTarget.firecrawl = { apiKey: value };
|
||||
return;
|
||||
}
|
||||
(scoped as Record<string, unknown>).apiKey = value;
|
||||
}
|
||||
|
||||
export function createFirecrawlWebSearchProvider(): WebSearchProviderPlugin {
|
||||
return {
|
||||
id: "firecrawl",
|
||||
label: "Firecrawl Search",
|
||||
hint: "Structured results with optional result scraping",
|
||||
envVars: ["FIRECRAWL_API_KEY"],
|
||||
placeholder: "fc-...",
|
||||
signupUrl: "https://www.firecrawl.dev/",
|
||||
docsUrl: "https://docs.openclaw.ai/tools/firecrawl",
|
||||
autoDetectOrder: 60,
|
||||
getCredentialValue: getScopedCredentialValue,
|
||||
setCredentialValue: setScopedCredentialValue,
|
||||
createTool: (ctx) => ({
|
||||
description:
|
||||
"Search the web using Firecrawl. Returns structured results with snippets from Firecrawl Search. Use firecrawl_search for Firecrawl-specific knobs like sources or categories.",
|
||||
parameters: GenericFirecrawlSearchSchema,
|
||||
execute: async (args) =>
|
||||
await runFirecrawlSearch({
|
||||
cfg: ctx.config,
|
||||
query: typeof args.query === "string" ? args.query : "",
|
||||
count: typeof args.count === "number" ? args.count : undefined,
|
||||
}),
|
||||
}),
|
||||
};
|
||||
}
|
||||
76
extensions/firecrawl/src/firecrawl-search-tool.ts
Normal file
76
extensions/firecrawl/src/firecrawl-search-tool.ts
Normal file
@@ -0,0 +1,76 @@
|
||||
import { Type } from "@sinclair/typebox";
|
||||
import {
|
||||
jsonResult,
|
||||
readNumberParam,
|
||||
readStringArrayParam,
|
||||
readStringParam,
|
||||
} from "../../../src/agents/tools/common.js";
|
||||
import type { OpenClawPluginApi } from "../../../src/plugins/types.js";
|
||||
import { runFirecrawlSearch } from "./firecrawl-client.js";
|
||||
|
||||
const FirecrawlSearchToolSchema = Type.Object(
|
||||
{
|
||||
query: Type.String({ description: "Search query string." }),
|
||||
count: Type.Optional(
|
||||
Type.Number({
|
||||
description: "Number of results to return (1-10).",
|
||||
minimum: 1,
|
||||
maximum: 10,
|
||||
}),
|
||||
),
|
||||
sources: Type.Optional(
|
||||
Type.Array(Type.String(), {
|
||||
description: 'Optional sources list, for example ["web"], ["news"], or ["images"].',
|
||||
}),
|
||||
),
|
||||
categories: Type.Optional(
|
||||
Type.Array(Type.String(), {
|
||||
description: 'Optional Firecrawl categories, for example ["github"] or ["research"].',
|
||||
}),
|
||||
),
|
||||
scrapeResults: Type.Optional(
|
||||
Type.Boolean({
|
||||
description: "Include scraped result content when Firecrawl returns it.",
|
||||
}),
|
||||
),
|
||||
timeoutSeconds: Type.Optional(
|
||||
Type.Number({
|
||||
description: "Timeout in seconds for the Firecrawl Search request.",
|
||||
minimum: 1,
|
||||
}),
|
||||
),
|
||||
},
|
||||
{ additionalProperties: false },
|
||||
);
|
||||
|
||||
export function createFirecrawlSearchTool(api: OpenClawPluginApi) {
|
||||
return {
|
||||
name: "firecrawl_search",
|
||||
label: "Firecrawl Search",
|
||||
description:
|
||||
"Search the web using Firecrawl v2/search. Can optionally include scraped content from result pages.",
|
||||
parameters: FirecrawlSearchToolSchema,
|
||||
execute: async (_toolCallId: string, rawParams: Record<string, unknown>) => {
|
||||
const query = readStringParam(rawParams, "query", { required: true });
|
||||
const count = readNumberParam(rawParams, "count", { integer: true });
|
||||
const timeoutSeconds = readNumberParam(rawParams, "timeoutSeconds", {
|
||||
integer: true,
|
||||
});
|
||||
const sources = readStringArrayParam(rawParams, "sources");
|
||||
const categories = readStringArrayParam(rawParams, "categories");
|
||||
const scrapeResults = rawParams.scrapeResults === true;
|
||||
|
||||
return jsonResult(
|
||||
await runFirecrawlSearch({
|
||||
cfg: api.config,
|
||||
query,
|
||||
count,
|
||||
timeoutSeconds,
|
||||
sources,
|
||||
categories,
|
||||
scrapeResults,
|
||||
}),
|
||||
);
|
||||
},
|
||||
};
|
||||
}
|
||||
Reference in New Issue
Block a user