import type { OpenClawConfig } from "openclaw/plugin-sdk/config-runtime"; import { DEFAULT_CACHE_TTL_MINUTES, markdownToText, normalizeCacheKey, postTrustedWebToolsJson, readCache, resolveCacheTtlMs, truncateText, writeCache, } from "openclaw/plugin-sdk/provider-web-search"; import { wrapExternalContent, wrapWebContent } from "openclaw/plugin-sdk/security-runtime"; import { resolveFirecrawlApiKey, resolveFirecrawlBaseUrl, resolveFirecrawlMaxAgeMs, resolveFirecrawlOnlyMainContent, resolveFirecrawlScrapeTimeoutSeconds, resolveFirecrawlSearchTimeoutSeconds, } from "./config.js"; const SEARCH_CACHE = new Map< string, { value: Record; expiresAt: number; insertedAt: number } >(); const SCRAPE_CACHE = new Map< string, { value: Record; expiresAt: number; insertedAt: number } >(); const DEFAULT_SEARCH_COUNT = 5; const DEFAULT_SCRAPE_MAX_CHARS = 50_000; type FirecrawlSearchItem = { title: string; url: string; description?: string; content?: string; published?: string; siteName?: string; }; export type FirecrawlSearchParams = { cfg?: OpenClawConfig; query: string; count?: number; timeoutSeconds?: number; sources?: string[]; categories?: string[]; scrapeResults?: boolean; }; export type FirecrawlScrapeParams = { cfg?: OpenClawConfig; url: string; extractMode: "markdown" | "text"; maxChars?: number; onlyMainContent?: boolean; maxAgeMs?: number; proxy?: "auto" | "basic" | "stealth"; storeInCache?: boolean; timeoutSeconds?: number; }; function resolveEndpoint(baseUrl: string, pathname: "/v2/search" | "/v2/scrape"): string { const trimmed = baseUrl.trim(); if (!trimmed) { return new URL(pathname, "https://api.firecrawl.dev").toString(); } try { const url = new URL(trimmed); if (url.pathname && url.pathname !== "/") { return url.toString(); } url.pathname = pathname; return url.toString(); } catch { return new URL(pathname, "https://api.firecrawl.dev").toString(); } } function resolveSiteName(urlRaw: string): string | undefined { try { const host = new URL(urlRaw).hostname.replace(/^www\./, ""); return host || undefined; } catch { return undefined; } } function resolveSearchItems(payload: Record): FirecrawlSearchItem[] { const candidates = [ payload.data, payload.results, (payload.data as { results?: unknown } | undefined)?.results, (payload.data as { data?: unknown } | undefined)?.data, (payload.data as { web?: unknown } | undefined)?.web, (payload.web as { results?: unknown } | undefined)?.results, ]; const rawItems = candidates.find((candidate) => Array.isArray(candidate)); if (!Array.isArray(rawItems)) { return []; } const items: FirecrawlSearchItem[] = []; for (const entry of rawItems) { if (!entry || typeof entry !== "object") { continue; } const record = entry as Record; const metadata = record.metadata && typeof record.metadata === "object" ? (record.metadata as Record) : undefined; const url = (typeof record.url === "string" && record.url) || (typeof record.sourceURL === "string" && record.sourceURL) || (typeof record.sourceUrl === "string" && record.sourceUrl) || (typeof metadata?.sourceURL === "string" && metadata.sourceURL) || ""; if (!url) { continue; } const title = (typeof record.title === "string" && record.title) || (typeof metadata?.title === "string" && metadata.title) || ""; const description = (typeof record.description === "string" && record.description) || (typeof record.snippet === "string" && record.snippet) || (typeof record.summary === "string" && record.summary) || undefined; const content = (typeof record.markdown === "string" && record.markdown) || (typeof record.content === "string" && record.content) || (typeof record.text === "string" && record.text) || undefined; const published = (typeof record.publishedDate === "string" && record.publishedDate) || (typeof record.published === "string" && record.published) || (typeof metadata?.publishedTime === "string" && metadata.publishedTime) || (typeof metadata?.publishedDate === "string" && metadata.publishedDate) || undefined; items.push({ title, url, description, content, published, siteName: resolveSiteName(url), }); } return items; } function buildSearchPayload(params: { query: string; provider: "firecrawl"; items: FirecrawlSearchItem[]; tookMs: number; scrapeResults: boolean; }): Record { return { query: params.query, provider: params.provider, count: params.items.length, tookMs: params.tookMs, externalContent: { untrusted: true, source: "web_search", provider: params.provider, wrapped: true, }, results: params.items.map((entry) => ({ title: entry.title ? wrapWebContent(entry.title, "web_search") : "", url: entry.url, description: entry.description ? wrapWebContent(entry.description, "web_search") : "", ...(entry.published ? { published: entry.published } : {}), ...(entry.siteName ? { siteName: entry.siteName } : {}), ...(params.scrapeResults && entry.content ? { content: wrapWebContent(entry.content, "web_search") } : {}), })), }; } export async function runFirecrawlSearch( params: FirecrawlSearchParams, ): Promise> { const apiKey = resolveFirecrawlApiKey(params.cfg); if (!apiKey) { throw new Error( "web_search (firecrawl) needs a Firecrawl API key. Set FIRECRAWL_API_KEY in the Gateway environment, or configure plugins.entries.firecrawl.config.webSearch.apiKey.", ); } const count = typeof params.count === "number" && Number.isFinite(params.count) ? Math.max(1, Math.min(10, Math.floor(params.count))) : DEFAULT_SEARCH_COUNT; const timeoutSeconds = resolveFirecrawlSearchTimeoutSeconds(params.timeoutSeconds); const scrapeResults = params.scrapeResults === true; const sources = Array.isArray(params.sources) ? params.sources.filter(Boolean) : []; const categories = Array.isArray(params.categories) ? params.categories.filter(Boolean) : []; const baseUrl = resolveFirecrawlBaseUrl(params.cfg); const cacheKey = normalizeCacheKey( JSON.stringify({ type: "firecrawl-search", q: params.query, count, baseUrl, sources, categories, scrapeResults, }), ); const cached = readCache(SEARCH_CACHE, cacheKey); if (cached) { return { ...cached.value, cached: true }; } const body: Record = { query: params.query, limit: count, }; if (sources.length > 0) { body.sources = sources; } if (categories.length > 0) { body.categories = categories; } if (scrapeResults) { body.scrapeOptions = { formats: ["markdown"], }; } const start = Date.now(); const payload = await postTrustedWebToolsJson( { url: resolveEndpoint(baseUrl, "/v2/search"), timeoutSeconds, apiKey, body, errorLabel: "Firecrawl Search", }, async (response) => { const payload = (await response.json()) as Record; if (payload.success === false) { const error = typeof payload.error === "string" ? payload.error : typeof payload.message === "string" ? payload.message : "unknown error"; throw new Error(`Firecrawl Search API error: ${error}`); } return payload; }, ); const result = buildSearchPayload({ query: params.query, provider: "firecrawl", items: resolveSearchItems(payload), tookMs: Date.now() - start, scrapeResults, }); writeCache( SEARCH_CACHE, cacheKey, result, resolveCacheTtlMs(undefined, DEFAULT_CACHE_TTL_MINUTES), ); return result; } function resolveScrapeData(payload: Record): Record { const data = payload.data; if (data && typeof data === "object") { return data as Record; } return {}; } export function parseFirecrawlScrapePayload(params: { payload: Record; url: string; extractMode: "markdown" | "text"; maxChars: number; }): Record { const data = resolveScrapeData(params.payload); const metadata = data.metadata && typeof data.metadata === "object" ? (data.metadata as Record) : undefined; const markdown = (typeof data.markdown === "string" && data.markdown) || (typeof data.content === "string" && data.content) || ""; if (!markdown) { throw new Error("Firecrawl scrape returned no content."); } const rawText = params.extractMode === "text" ? markdownToText(markdown) : markdown; const truncated = truncateText(rawText, params.maxChars); return { url: params.url, finalUrl: (typeof metadata?.sourceURL === "string" && metadata.sourceURL) || (typeof data.url === "string" && data.url) || params.url, status: (typeof metadata?.statusCode === "number" && metadata.statusCode) || (typeof data.statusCode === "number" && data.statusCode) || undefined, title: typeof metadata?.title === "string" && metadata.title ? wrapExternalContent(metadata.title, { source: "web_fetch", includeWarning: false }) : undefined, extractor: "firecrawl", extractMode: params.extractMode, externalContent: { untrusted: true, source: "web_fetch", wrapped: true, }, truncated: truncated.truncated, rawLength: rawText.length, wrappedLength: wrapExternalContent(truncated.text, { source: "web_fetch", includeWarning: false, }).length, text: wrapExternalContent(truncated.text, { source: "web_fetch", includeWarning: false, }), warning: typeof params.payload.warning === "string" && params.payload.warning ? wrapExternalContent(params.payload.warning, { source: "web_fetch", includeWarning: false, }) : undefined, }; } export async function runFirecrawlScrape( params: FirecrawlScrapeParams, ): Promise> { const apiKey = resolveFirecrawlApiKey(params.cfg); if (!apiKey) { throw new Error( "firecrawl_scrape needs a Firecrawl API key. Set FIRECRAWL_API_KEY in the Gateway environment, or configure tools.web.fetch.firecrawl.apiKey.", ); } const baseUrl = resolveFirecrawlBaseUrl(params.cfg); const timeoutSeconds = resolveFirecrawlScrapeTimeoutSeconds(params.cfg, params.timeoutSeconds); const onlyMainContent = resolveFirecrawlOnlyMainContent(params.cfg, params.onlyMainContent); const maxAgeMs = resolveFirecrawlMaxAgeMs(params.cfg, params.maxAgeMs); const proxy = params.proxy ?? "auto"; const storeInCache = params.storeInCache ?? true; const maxChars = typeof params.maxChars === "number" && Number.isFinite(params.maxChars) && params.maxChars > 0 ? Math.floor(params.maxChars) : DEFAULT_SCRAPE_MAX_CHARS; const cacheKey = normalizeCacheKey( JSON.stringify({ type: "firecrawl-scrape", url: params.url, extractMode: params.extractMode, baseUrl, onlyMainContent, maxAgeMs, proxy, storeInCache, maxChars, }), ); const cached = readCache(SCRAPE_CACHE, cacheKey); if (cached) { return { ...cached.value, cached: true }; } const payload = await postTrustedWebToolsJson( { url: resolveEndpoint(baseUrl, "/v2/scrape"), timeoutSeconds, apiKey, errorLabel: "Firecrawl", body: { url: params.url, formats: ["markdown"], onlyMainContent, timeout: timeoutSeconds * 1000, maxAge: maxAgeMs, proxy, storeInCache, }, }, async (response) => (await response.json()) as Record, ); const result = parseFirecrawlScrapePayload({ payload, url: params.url, extractMode: params.extractMode, maxChars, }); writeCache( SCRAPE_CACHE, cacheKey, result, resolveCacheTtlMs(undefined, DEFAULT_CACHE_TTL_MINUTES), ); return result; } export const __testing = { parseFirecrawlScrapePayload, resolveSearchItems, };