|
|
|
|
@@ -16,7 +16,6 @@ import type { RuntimeWebSearchMetadata } from "../../secrets/runtime-web-tools.j
|
|
|
|
|
import { wrapWebContent } from "../../security/external-content.js";
|
|
|
|
|
import type { AnyAgentTool } from "./common.js";
|
|
|
|
|
import { jsonResult, readNumberParam, readStringArrayParam, readStringParam } from "./common.js";
|
|
|
|
|
import { withTrustedWebToolsEndpoint } from "./web-guarded-fetch.js";
|
|
|
|
|
import { resolveCitationRedirectUrl } from "./web-search-citation-redirect.js";
|
|
|
|
|
import {
|
|
|
|
|
CacheEntry,
|
|
|
|
|
@@ -24,7 +23,6 @@ import {
|
|
|
|
|
DEFAULT_TIMEOUT_SECONDS,
|
|
|
|
|
normalizeCacheKey,
|
|
|
|
|
readCache,
|
|
|
|
|
readResponseText,
|
|
|
|
|
resolveCacheTtlMs,
|
|
|
|
|
resolveTimeoutSeconds,
|
|
|
|
|
writeCache,
|
|
|
|
|
@@ -33,16 +31,6 @@ import {
|
|
|
|
|
const DEFAULT_SEARCH_COUNT = 5;
|
|
|
|
|
const MAX_SEARCH_COUNT = 10;
|
|
|
|
|
|
|
|
|
|
const _BRAVE_SEARCH_ENDPOINT = "https://api.search.brave.com/res/v1/web/search";
|
|
|
|
|
const _BRAVE_LLM_CONTEXT_ENDPOINT = "https://api.search.brave.com/res/v1/llm/context";
|
|
|
|
|
const PERPLEXITY_SEARCH_ENDPOINT = "https://api.perplexity.ai/search";
|
|
|
|
|
|
|
|
|
|
const XAI_API_ENDPOINT = "https://api.x.ai/v1/responses";
|
|
|
|
|
const KIMI_WEB_SEARCH_TOOL = {
|
|
|
|
|
type: "builtin_function",
|
|
|
|
|
function: { name: "$web_search" },
|
|
|
|
|
} as const;
|
|
|
|
|
|
|
|
|
|
const SEARCH_CACHE = new Map<string, CacheEntry<Record<string, unknown>>>();
|
|
|
|
|
|
|
|
|
|
const ISO_DATE_PATTERN = /^(\d{4})-(\d{2})-(\d{2})$/;
|
|
|
|
|
@@ -138,217 +126,6 @@ type WebSearchConfig = NonNullable<OpenClawConfig["tools"]>["web"] extends infer
|
|
|
|
|
: undefined
|
|
|
|
|
: undefined;
|
|
|
|
|
|
|
|
|
|
type BraveSearchResult = {
|
|
|
|
|
title?: string;
|
|
|
|
|
url?: string;
|
|
|
|
|
description?: string;
|
|
|
|
|
age?: string;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
type _BraveSearchResponse = {
|
|
|
|
|
web?: {
|
|
|
|
|
results?: BraveSearchResult[];
|
|
|
|
|
};
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
type BraveLlmContextResult = { url: string; title: string; snippets: string[] };
|
|
|
|
|
type BraveLlmContextResponse = {
|
|
|
|
|
grounding: { generic?: BraveLlmContextResult[] };
|
|
|
|
|
sources?: { url?: string; hostname?: string; date?: string }[];
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
type GrokSearchResponse = {
|
|
|
|
|
output?: Array<{
|
|
|
|
|
type?: string;
|
|
|
|
|
role?: string;
|
|
|
|
|
text?: string; // present when type === "output_text" (top-level output_text block)
|
|
|
|
|
content?: Array<{
|
|
|
|
|
type?: string;
|
|
|
|
|
text?: string;
|
|
|
|
|
annotations?: Array<{
|
|
|
|
|
type?: string;
|
|
|
|
|
url?: string;
|
|
|
|
|
start_index?: number;
|
|
|
|
|
end_index?: number;
|
|
|
|
|
}>;
|
|
|
|
|
}>;
|
|
|
|
|
annotations?: Array<{
|
|
|
|
|
type?: string;
|
|
|
|
|
url?: string;
|
|
|
|
|
start_index?: number;
|
|
|
|
|
end_index?: number;
|
|
|
|
|
}>;
|
|
|
|
|
}>;
|
|
|
|
|
output_text?: string; // deprecated field - kept for backwards compatibility
|
|
|
|
|
citations?: string[];
|
|
|
|
|
inline_citations?: Array<{
|
|
|
|
|
start_index: number;
|
|
|
|
|
end_index: number;
|
|
|
|
|
url: string;
|
|
|
|
|
}>;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
type KimiToolCall = {
|
|
|
|
|
id?: string;
|
|
|
|
|
type?: string;
|
|
|
|
|
function?: {
|
|
|
|
|
name?: string;
|
|
|
|
|
arguments?: string;
|
|
|
|
|
};
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
type KimiMessage = {
|
|
|
|
|
role?: string;
|
|
|
|
|
content?: string;
|
|
|
|
|
reasoning_content?: string;
|
|
|
|
|
tool_calls?: KimiToolCall[];
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
type KimiSearchResponse = {
|
|
|
|
|
choices?: Array<{
|
|
|
|
|
finish_reason?: string;
|
|
|
|
|
message?: KimiMessage;
|
|
|
|
|
}>;
|
|
|
|
|
search_results?: Array<{
|
|
|
|
|
title?: string;
|
|
|
|
|
url?: string;
|
|
|
|
|
content?: string;
|
|
|
|
|
}>;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
type PerplexitySearchResponse = {
|
|
|
|
|
choices?: Array<{
|
|
|
|
|
message?: {
|
|
|
|
|
content?: string;
|
|
|
|
|
annotations?: Array<{
|
|
|
|
|
type?: string;
|
|
|
|
|
url?: string;
|
|
|
|
|
url_citation?: {
|
|
|
|
|
url?: string;
|
|
|
|
|
title?: string;
|
|
|
|
|
start_index?: number;
|
|
|
|
|
end_index?: number;
|
|
|
|
|
};
|
|
|
|
|
}>;
|
|
|
|
|
};
|
|
|
|
|
}>;
|
|
|
|
|
citations?: string[];
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
type PerplexitySearchApiResult = {
|
|
|
|
|
title?: string;
|
|
|
|
|
url?: string;
|
|
|
|
|
snippet?: string;
|
|
|
|
|
date?: string;
|
|
|
|
|
last_updated?: string;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
type PerplexitySearchApiResponse = {
|
|
|
|
|
results?: PerplexitySearchApiResult[];
|
|
|
|
|
id?: string;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
function extractPerplexityCitations(data: PerplexitySearchResponse): string[] {
|
|
|
|
|
const normalizeUrl = (value: unknown): string | undefined => {
|
|
|
|
|
if (typeof value !== "string") {
|
|
|
|
|
return undefined;
|
|
|
|
|
}
|
|
|
|
|
const trimmed = value.trim();
|
|
|
|
|
return trimmed ? trimmed : undefined;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
const topLevel = (data.citations ?? [])
|
|
|
|
|
.map(normalizeUrl)
|
|
|
|
|
.filter((url): url is string => Boolean(url));
|
|
|
|
|
if (topLevel.length > 0) {
|
|
|
|
|
return [...new Set(topLevel)];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const citations: string[] = [];
|
|
|
|
|
for (const choice of data.choices ?? []) {
|
|
|
|
|
for (const annotation of choice.message?.annotations ?? []) {
|
|
|
|
|
if (annotation.type !== "url_citation") {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
const url = normalizeUrl(annotation.url_citation?.url ?? annotation.url);
|
|
|
|
|
if (url) {
|
|
|
|
|
citations.push(url);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return [...new Set(citations)];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function extractGrokContent(data: GrokSearchResponse): {
|
|
|
|
|
text: string | undefined;
|
|
|
|
|
annotationCitations: string[];
|
|
|
|
|
} {
|
|
|
|
|
// xAI Responses API format: find the message output with text content
|
|
|
|
|
for (const output of data.output ?? []) {
|
|
|
|
|
if (output.type === "message") {
|
|
|
|
|
for (const block of output.content ?? []) {
|
|
|
|
|
if (block.type === "output_text" && typeof block.text === "string" && block.text) {
|
|
|
|
|
const urls = (block.annotations ?? [])
|
|
|
|
|
.filter((a) => a.type === "url_citation" && typeof a.url === "string")
|
|
|
|
|
.map((a) => a.url as string);
|
|
|
|
|
return { text: block.text, annotationCitations: [...new Set(urls)] };
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
// Some xAI responses place output_text blocks directly in the output array
|
|
|
|
|
// without a message wrapper.
|
|
|
|
|
if (
|
|
|
|
|
output.type === "output_text" &&
|
|
|
|
|
"text" in output &&
|
|
|
|
|
typeof output.text === "string" &&
|
|
|
|
|
output.text
|
|
|
|
|
) {
|
|
|
|
|
const rawAnnotations =
|
|
|
|
|
"annotations" in output && Array.isArray(output.annotations) ? output.annotations : [];
|
|
|
|
|
const urls = rawAnnotations
|
|
|
|
|
.filter(
|
|
|
|
|
(a: Record<string, unknown>) => a.type === "url_citation" && typeof a.url === "string",
|
|
|
|
|
)
|
|
|
|
|
.map((a: Record<string, unknown>) => a.url as string);
|
|
|
|
|
return { text: output.text, annotationCitations: [...new Set(urls)] };
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
// Fallback: deprecated output_text field
|
|
|
|
|
const text = typeof data.output_text === "string" ? data.output_text : undefined;
|
|
|
|
|
return { text, annotationCitations: [] };
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
type GeminiGroundingResponse = {
|
|
|
|
|
candidates?: Array<{
|
|
|
|
|
content?: {
|
|
|
|
|
parts?: Array<{
|
|
|
|
|
text?: string;
|
|
|
|
|
}>;
|
|
|
|
|
};
|
|
|
|
|
groundingMetadata?: {
|
|
|
|
|
groundingChunks?: Array<{
|
|
|
|
|
web?: {
|
|
|
|
|
uri?: string;
|
|
|
|
|
title?: string;
|
|
|
|
|
};
|
|
|
|
|
}>;
|
|
|
|
|
searchEntryPoint?: {
|
|
|
|
|
renderedContent?: string;
|
|
|
|
|
};
|
|
|
|
|
webSearchQueries?: string[];
|
|
|
|
|
};
|
|
|
|
|
}>;
|
|
|
|
|
error?: {
|
|
|
|
|
code?: number;
|
|
|
|
|
message?: string;
|
|
|
|
|
status?: string;
|
|
|
|
|
};
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
const _DEFAULT_GEMINI_MODEL = "gemini-2.5-flash";
|
|
|
|
|
const GEMINI_API_BASE = "https://generativelanguage.googleapis.com/v1beta";
|
|
|
|
|
|
|
|
|
|
function resolveSearchConfig(cfg?: OpenClawConfig): WebSearchConfig {
|
|
|
|
|
const search = cfg?.tools?.web?.search;
|
|
|
|
|
if (!search || typeof search !== "object") {
|
|
|
|
|
@@ -367,131 +144,6 @@ function resolveSearchEnabled(params: { search?: WebSearchConfig; sandboxed?: bo
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function isDirectPerplexityBaseUrl(baseUrl: string): boolean {
|
|
|
|
|
const trimmed = baseUrl.trim();
|
|
|
|
|
if (!trimmed) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
try {
|
|
|
|
|
return new URL(trimmed).hostname.toLowerCase() === "api.perplexity.ai";
|
|
|
|
|
} catch {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function resolvePerplexityRequestModel(baseUrl: string, model: string): string {
|
|
|
|
|
if (!isDirectPerplexityBaseUrl(baseUrl)) {
|
|
|
|
|
return model;
|
|
|
|
|
}
|
|
|
|
|
return model.startsWith("perplexity/") ? model.slice("perplexity/".length) : model;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async function withTrustedWebSearchEndpoint<T>(
|
|
|
|
|
params: {
|
|
|
|
|
url: string;
|
|
|
|
|
timeoutSeconds: number;
|
|
|
|
|
init: RequestInit;
|
|
|
|
|
},
|
|
|
|
|
run: (response: Response) => Promise<T>,
|
|
|
|
|
): Promise<T> {
|
|
|
|
|
return withTrustedWebToolsEndpoint(
|
|
|
|
|
{
|
|
|
|
|
url: params.url,
|
|
|
|
|
init: params.init,
|
|
|
|
|
timeoutSeconds: params.timeoutSeconds,
|
|
|
|
|
},
|
|
|
|
|
async ({ response }) => run(response),
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async function _runGeminiSearch(params: {
|
|
|
|
|
query: string;
|
|
|
|
|
apiKey: string;
|
|
|
|
|
model: string;
|
|
|
|
|
timeoutSeconds: number;
|
|
|
|
|
}): Promise<{ content: string; citations: Array<{ url: string; title?: string }> }> {
|
|
|
|
|
const endpoint = `${GEMINI_API_BASE}/models/${params.model}:generateContent`;
|
|
|
|
|
|
|
|
|
|
return withTrustedWebSearchEndpoint(
|
|
|
|
|
{
|
|
|
|
|
url: endpoint,
|
|
|
|
|
timeoutSeconds: params.timeoutSeconds,
|
|
|
|
|
init: {
|
|
|
|
|
method: "POST",
|
|
|
|
|
headers: {
|
|
|
|
|
"Content-Type": "application/json",
|
|
|
|
|
"x-goog-api-key": params.apiKey,
|
|
|
|
|
},
|
|
|
|
|
body: JSON.stringify({
|
|
|
|
|
contents: [
|
|
|
|
|
{
|
|
|
|
|
parts: [{ text: params.query }],
|
|
|
|
|
},
|
|
|
|
|
],
|
|
|
|
|
tools: [{ google_search: {} }],
|
|
|
|
|
}),
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
async (res) => {
|
|
|
|
|
if (!res.ok) {
|
|
|
|
|
const detailResult = await readResponseText(res, { maxBytes: 64_000 });
|
|
|
|
|
// Strip API key from any error detail to prevent accidental key leakage in logs
|
|
|
|
|
const safeDetail = (detailResult.text || res.statusText).replace(
|
|
|
|
|
/key=[^&\s]+/gi,
|
|
|
|
|
"key=***",
|
|
|
|
|
);
|
|
|
|
|
throw new Error(`Gemini API error (${res.status}): ${safeDetail}`);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let data: GeminiGroundingResponse;
|
|
|
|
|
try {
|
|
|
|
|
data = (await res.json()) as GeminiGroundingResponse;
|
|
|
|
|
} catch (err) {
|
|
|
|
|
const safeError = String(err).replace(/key=[^&\s]+/gi, "key=***");
|
|
|
|
|
throw new Error(`Gemini API returned invalid JSON: ${safeError}`, { cause: err });
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (data.error) {
|
|
|
|
|
const rawMsg = data.error.message || data.error.status || "unknown";
|
|
|
|
|
const safeMsg = rawMsg.replace(/key=[^&\s]+/gi, "key=***");
|
|
|
|
|
throw new Error(`Gemini API error (${data.error.code}): ${safeMsg}`);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const candidate = data.candidates?.[0];
|
|
|
|
|
const content =
|
|
|
|
|
candidate?.content?.parts
|
|
|
|
|
?.map((p) => p.text)
|
|
|
|
|
.filter(Boolean)
|
|
|
|
|
.join("\n") ?? "No response";
|
|
|
|
|
|
|
|
|
|
const groundingChunks = candidate?.groundingMetadata?.groundingChunks ?? [];
|
|
|
|
|
const rawCitations = groundingChunks
|
|
|
|
|
.filter((chunk) => chunk.web?.uri)
|
|
|
|
|
.map((chunk) => ({
|
|
|
|
|
url: chunk.web!.uri!,
|
|
|
|
|
title: chunk.web?.title || undefined,
|
|
|
|
|
}));
|
|
|
|
|
|
|
|
|
|
// Resolve Google grounding redirect URLs to direct URLs with concurrency cap.
|
|
|
|
|
// Gemini typically returns 3-8 citations; cap at 10 concurrent to be safe.
|
|
|
|
|
const MAX_CONCURRENT_REDIRECTS = 10;
|
|
|
|
|
const citations: Array<{ url: string; title?: string }> = [];
|
|
|
|
|
for (let i = 0; i < rawCitations.length; i += MAX_CONCURRENT_REDIRECTS) {
|
|
|
|
|
const batch = rawCitations.slice(i, i + MAX_CONCURRENT_REDIRECTS);
|
|
|
|
|
const resolved = await Promise.all(
|
|
|
|
|
batch.map(async (citation) => {
|
|
|
|
|
const resolvedUrl = await resolveCitationRedirectUrl(citation.url);
|
|
|
|
|
return { ...citation, url: resolvedUrl };
|
|
|
|
|
}),
|
|
|
|
|
);
|
|
|
|
|
citations.push(...resolved);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return { content, citations };
|
|
|
|
|
},
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function resolveSearchCount(value: unknown, fallback: number): number {
|
|
|
|
|
const parsed = typeof value === "number" && Number.isFinite(value) ? value : fallback;
|
|
|
|
|
const clamped = Math.max(1, Math.min(MAX_SEARCH_COUNT, Math.floor(parsed)));
|
|
|
|
|
@@ -513,440 +165,6 @@ function isValidIsoDate(value: string): boolean {
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function resolveSiteName(url: string | undefined): string | undefined {
|
|
|
|
|
if (!url) {
|
|
|
|
|
return undefined;
|
|
|
|
|
}
|
|
|
|
|
try {
|
|
|
|
|
return new URL(url).hostname;
|
|
|
|
|
} catch {
|
|
|
|
|
return undefined;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async function throwWebSearchApiError(res: Response, providerLabel: string): Promise<never> {
|
|
|
|
|
const detailResult = await readResponseText(res, { maxBytes: 64_000 });
|
|
|
|
|
const detail = detailResult.text;
|
|
|
|
|
throw new Error(`${providerLabel} API error (${res.status}): ${detail || res.statusText}`);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async function _runPerplexitySearchApi(params: {
|
|
|
|
|
query: string;
|
|
|
|
|
apiKey: string;
|
|
|
|
|
count: number;
|
|
|
|
|
timeoutSeconds: number;
|
|
|
|
|
country?: string;
|
|
|
|
|
searchDomainFilter?: string[];
|
|
|
|
|
searchRecencyFilter?: string;
|
|
|
|
|
searchLanguageFilter?: string[];
|
|
|
|
|
searchAfterDate?: string;
|
|
|
|
|
searchBeforeDate?: string;
|
|
|
|
|
maxTokens?: number;
|
|
|
|
|
maxTokensPerPage?: number;
|
|
|
|
|
}): Promise<
|
|
|
|
|
Array<{ title: string; url: string; description: string; published?: string; siteName?: string }>
|
|
|
|
|
> {
|
|
|
|
|
const body: Record<string, unknown> = {
|
|
|
|
|
query: params.query,
|
|
|
|
|
max_results: params.count,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
if (params.country) {
|
|
|
|
|
body.country = params.country;
|
|
|
|
|
}
|
|
|
|
|
if (params.searchDomainFilter && params.searchDomainFilter.length > 0) {
|
|
|
|
|
body.search_domain_filter = params.searchDomainFilter;
|
|
|
|
|
}
|
|
|
|
|
if (params.searchRecencyFilter) {
|
|
|
|
|
body.search_recency_filter = params.searchRecencyFilter;
|
|
|
|
|
}
|
|
|
|
|
if (params.searchLanguageFilter && params.searchLanguageFilter.length > 0) {
|
|
|
|
|
body.search_language_filter = params.searchLanguageFilter;
|
|
|
|
|
}
|
|
|
|
|
if (params.searchAfterDate) {
|
|
|
|
|
body.search_after_date = params.searchAfterDate;
|
|
|
|
|
}
|
|
|
|
|
if (params.searchBeforeDate) {
|
|
|
|
|
body.search_before_date = params.searchBeforeDate;
|
|
|
|
|
}
|
|
|
|
|
if (params.maxTokens !== undefined) {
|
|
|
|
|
body.max_tokens = params.maxTokens;
|
|
|
|
|
}
|
|
|
|
|
if (params.maxTokensPerPage !== undefined) {
|
|
|
|
|
body.max_tokens_per_page = params.maxTokensPerPage;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return withTrustedWebSearchEndpoint(
|
|
|
|
|
{
|
|
|
|
|
url: PERPLEXITY_SEARCH_ENDPOINT,
|
|
|
|
|
timeoutSeconds: params.timeoutSeconds,
|
|
|
|
|
init: {
|
|
|
|
|
method: "POST",
|
|
|
|
|
headers: {
|
|
|
|
|
"Content-Type": "application/json",
|
|
|
|
|
Accept: "application/json",
|
|
|
|
|
Authorization: `Bearer ${params.apiKey}`,
|
|
|
|
|
"HTTP-Referer": "https://openclaw.ai",
|
|
|
|
|
"X-Title": "OpenClaw Web Search",
|
|
|
|
|
},
|
|
|
|
|
body: JSON.stringify(body),
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
async (res) => {
|
|
|
|
|
if (!res.ok) {
|
|
|
|
|
return await throwWebSearchApiError(res, "Perplexity Search");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const data = (await res.json()) as PerplexitySearchApiResponse;
|
|
|
|
|
const results = Array.isArray(data.results) ? data.results : [];
|
|
|
|
|
|
|
|
|
|
return results.map((entry) => {
|
|
|
|
|
const title = entry.title ?? "";
|
|
|
|
|
const url = entry.url ?? "";
|
|
|
|
|
const snippet = entry.snippet ?? "";
|
|
|
|
|
return {
|
|
|
|
|
title: title ? wrapWebContent(title, "web_search") : "",
|
|
|
|
|
url,
|
|
|
|
|
description: snippet ? wrapWebContent(snippet, "web_search") : "",
|
|
|
|
|
published: entry.date ?? undefined,
|
|
|
|
|
siteName: resolveSiteName(url) || undefined,
|
|
|
|
|
};
|
|
|
|
|
});
|
|
|
|
|
},
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async function _runPerplexitySearch(params: {
|
|
|
|
|
query: string;
|
|
|
|
|
apiKey: string;
|
|
|
|
|
baseUrl: string;
|
|
|
|
|
model: string;
|
|
|
|
|
timeoutSeconds: number;
|
|
|
|
|
freshness?: string;
|
|
|
|
|
}): Promise<{ content: string; citations: string[] }> {
|
|
|
|
|
const baseUrl = params.baseUrl.trim().replace(/\/$/, "");
|
|
|
|
|
const endpoint = `${baseUrl}/chat/completions`;
|
|
|
|
|
const model = resolvePerplexityRequestModel(baseUrl, params.model);
|
|
|
|
|
|
|
|
|
|
const body: Record<string, unknown> = {
|
|
|
|
|
model,
|
|
|
|
|
messages: [
|
|
|
|
|
{
|
|
|
|
|
role: "user",
|
|
|
|
|
content: params.query,
|
|
|
|
|
},
|
|
|
|
|
],
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
if (params.freshness) {
|
|
|
|
|
body.search_recency_filter = params.freshness;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return withTrustedWebSearchEndpoint(
|
|
|
|
|
{
|
|
|
|
|
url: endpoint,
|
|
|
|
|
timeoutSeconds: params.timeoutSeconds,
|
|
|
|
|
init: {
|
|
|
|
|
method: "POST",
|
|
|
|
|
headers: {
|
|
|
|
|
"Content-Type": "application/json",
|
|
|
|
|
Authorization: `Bearer ${params.apiKey}`,
|
|
|
|
|
"HTTP-Referer": "https://openclaw.ai",
|
|
|
|
|
"X-Title": "OpenClaw Web Search",
|
|
|
|
|
},
|
|
|
|
|
body: JSON.stringify(body),
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
async (res) => {
|
|
|
|
|
if (!res.ok) {
|
|
|
|
|
return await throwWebSearchApiError(res, "Perplexity");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const data = (await res.json()) as PerplexitySearchResponse;
|
|
|
|
|
const content = data.choices?.[0]?.message?.content ?? "No response";
|
|
|
|
|
// Prefer top-level citations; fall back to OpenRouter-style message annotations.
|
|
|
|
|
const citations = extractPerplexityCitations(data);
|
|
|
|
|
|
|
|
|
|
return { content, citations };
|
|
|
|
|
},
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async function _runGrokSearch(params: {
|
|
|
|
|
query: string;
|
|
|
|
|
apiKey: string;
|
|
|
|
|
model: string;
|
|
|
|
|
timeoutSeconds: number;
|
|
|
|
|
inlineCitations: boolean;
|
|
|
|
|
}): Promise<{
|
|
|
|
|
content: string;
|
|
|
|
|
citations: string[];
|
|
|
|
|
inlineCitations?: GrokSearchResponse["inline_citations"];
|
|
|
|
|
}> {
|
|
|
|
|
const body: Record<string, unknown> = {
|
|
|
|
|
model: params.model,
|
|
|
|
|
input: [
|
|
|
|
|
{
|
|
|
|
|
role: "user",
|
|
|
|
|
content: params.query,
|
|
|
|
|
},
|
|
|
|
|
],
|
|
|
|
|
tools: [{ type: "web_search" }],
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// Note: xAI's /v1/responses endpoint does not support the `include`
|
|
|
|
|
// parameter (returns 400 "Argument not supported: include"). Inline
|
|
|
|
|
// citations are returned automatically when available — we just parse
|
|
|
|
|
// them from the response without requesting them explicitly (#12910).
|
|
|
|
|
|
|
|
|
|
return withTrustedWebSearchEndpoint(
|
|
|
|
|
{
|
|
|
|
|
url: XAI_API_ENDPOINT,
|
|
|
|
|
timeoutSeconds: params.timeoutSeconds,
|
|
|
|
|
init: {
|
|
|
|
|
method: "POST",
|
|
|
|
|
headers: {
|
|
|
|
|
"Content-Type": "application/json",
|
|
|
|
|
Authorization: `Bearer ${params.apiKey}`,
|
|
|
|
|
},
|
|
|
|
|
body: JSON.stringify(body),
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
async (res) => {
|
|
|
|
|
if (!res.ok) {
|
|
|
|
|
return await throwWebSearchApiError(res, "xAI");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const data = (await res.json()) as GrokSearchResponse;
|
|
|
|
|
const { text: extractedText, annotationCitations } = extractGrokContent(data);
|
|
|
|
|
const content = extractedText ?? "No response";
|
|
|
|
|
// Prefer top-level citations; fall back to annotation-derived ones
|
|
|
|
|
const citations = (data.citations ?? []).length > 0 ? data.citations! : annotationCitations;
|
|
|
|
|
const inlineCitations = data.inline_citations;
|
|
|
|
|
|
|
|
|
|
return { content, citations, inlineCitations };
|
|
|
|
|
},
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function extractKimiMessageText(message: KimiMessage | undefined): string | undefined {
|
|
|
|
|
const content = message?.content?.trim();
|
|
|
|
|
if (content) {
|
|
|
|
|
return content;
|
|
|
|
|
}
|
|
|
|
|
const reasoning = message?.reasoning_content?.trim();
|
|
|
|
|
return reasoning || undefined;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function extractKimiCitations(data: KimiSearchResponse): string[] {
|
|
|
|
|
const citations = (data.search_results ?? [])
|
|
|
|
|
.map((entry) => entry.url?.trim())
|
|
|
|
|
.filter((url): url is string => Boolean(url));
|
|
|
|
|
|
|
|
|
|
for (const toolCall of data.choices?.[0]?.message?.tool_calls ?? []) {
|
|
|
|
|
const rawArguments = toolCall.function?.arguments;
|
|
|
|
|
if (!rawArguments) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
try {
|
|
|
|
|
const parsed = JSON.parse(rawArguments) as {
|
|
|
|
|
search_results?: Array<{ url?: string }>;
|
|
|
|
|
url?: string;
|
|
|
|
|
};
|
|
|
|
|
if (typeof parsed.url === "string" && parsed.url.trim()) {
|
|
|
|
|
citations.push(parsed.url.trim());
|
|
|
|
|
}
|
|
|
|
|
for (const result of parsed.search_results ?? []) {
|
|
|
|
|
if (typeof result.url === "string" && result.url.trim()) {
|
|
|
|
|
citations.push(result.url.trim());
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
} catch {
|
|
|
|
|
// ignore malformed tool arguments
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return [...new Set(citations)];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function buildKimiToolResultContent(data: KimiSearchResponse): string {
|
|
|
|
|
return JSON.stringify({
|
|
|
|
|
search_results: (data.search_results ?? []).map((entry) => ({
|
|
|
|
|
title: entry.title ?? "",
|
|
|
|
|
url: entry.url ?? "",
|
|
|
|
|
content: entry.content ?? "",
|
|
|
|
|
})),
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async function _runKimiSearch(params: {
|
|
|
|
|
query: string;
|
|
|
|
|
apiKey: string;
|
|
|
|
|
baseUrl: string;
|
|
|
|
|
model: string;
|
|
|
|
|
timeoutSeconds: number;
|
|
|
|
|
}): Promise<{ content: string; citations: string[] }> {
|
|
|
|
|
const baseUrl = params.baseUrl.trim().replace(/\/$/, "");
|
|
|
|
|
const endpoint = `${baseUrl}/chat/completions`;
|
|
|
|
|
const messages: Array<Record<string, unknown>> = [
|
|
|
|
|
{
|
|
|
|
|
role: "user",
|
|
|
|
|
content: params.query,
|
|
|
|
|
},
|
|
|
|
|
];
|
|
|
|
|
const collectedCitations = new Set<string>();
|
|
|
|
|
const MAX_ROUNDS = 3;
|
|
|
|
|
|
|
|
|
|
for (let round = 0; round < MAX_ROUNDS; round += 1) {
|
|
|
|
|
const nextResult = await withTrustedWebSearchEndpoint(
|
|
|
|
|
{
|
|
|
|
|
url: endpoint,
|
|
|
|
|
timeoutSeconds: params.timeoutSeconds,
|
|
|
|
|
init: {
|
|
|
|
|
method: "POST",
|
|
|
|
|
headers: {
|
|
|
|
|
"Content-Type": "application/json",
|
|
|
|
|
Authorization: `Bearer ${params.apiKey}`,
|
|
|
|
|
},
|
|
|
|
|
body: JSON.stringify({
|
|
|
|
|
model: params.model,
|
|
|
|
|
messages,
|
|
|
|
|
tools: [KIMI_WEB_SEARCH_TOOL],
|
|
|
|
|
}),
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
async (
|
|
|
|
|
res,
|
|
|
|
|
): Promise<{ done: true; content: string; citations: string[] } | { done: false }> => {
|
|
|
|
|
if (!res.ok) {
|
|
|
|
|
return await throwWebSearchApiError(res, "Kimi");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const data = (await res.json()) as KimiSearchResponse;
|
|
|
|
|
for (const citation of extractKimiCitations(data)) {
|
|
|
|
|
collectedCitations.add(citation);
|
|
|
|
|
}
|
|
|
|
|
const choice = data.choices?.[0];
|
|
|
|
|
const message = choice?.message;
|
|
|
|
|
const text = extractKimiMessageText(message);
|
|
|
|
|
const toolCalls = message?.tool_calls ?? [];
|
|
|
|
|
|
|
|
|
|
if (choice?.finish_reason !== "tool_calls" || toolCalls.length === 0) {
|
|
|
|
|
return { done: true, content: text ?? "No response", citations: [...collectedCitations] };
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
messages.push({
|
|
|
|
|
role: "assistant",
|
|
|
|
|
content: message?.content ?? "",
|
|
|
|
|
...(message?.reasoning_content
|
|
|
|
|
? {
|
|
|
|
|
reasoning_content: message.reasoning_content,
|
|
|
|
|
}
|
|
|
|
|
: {}),
|
|
|
|
|
tool_calls: toolCalls,
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
const toolContent = buildKimiToolResultContent(data);
|
|
|
|
|
let pushedToolResult = false;
|
|
|
|
|
for (const toolCall of toolCalls) {
|
|
|
|
|
const toolCallId = toolCall.id?.trim();
|
|
|
|
|
if (!toolCallId) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
pushedToolResult = true;
|
|
|
|
|
messages.push({
|
|
|
|
|
role: "tool",
|
|
|
|
|
tool_call_id: toolCallId,
|
|
|
|
|
content: toolContent,
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!pushedToolResult) {
|
|
|
|
|
return { done: true, content: text ?? "No response", citations: [...collectedCitations] };
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return { done: false };
|
|
|
|
|
},
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
if (nextResult.done) {
|
|
|
|
|
return { content: nextResult.content, citations: nextResult.citations };
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
content: "Search completed but no final answer was produced.",
|
|
|
|
|
citations: [...collectedCitations],
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function mapBraveLlmContextResults(
|
|
|
|
|
data: BraveLlmContextResponse,
|
|
|
|
|
): { url: string; title: string; snippets: string[]; siteName?: string }[] {
|
|
|
|
|
const genericResults = Array.isArray(data.grounding?.generic) ? data.grounding.generic : [];
|
|
|
|
|
return genericResults.map((entry) => ({
|
|
|
|
|
url: entry.url ?? "",
|
|
|
|
|
title: entry.title ?? "",
|
|
|
|
|
snippets: (entry.snippets ?? []).filter((s) => typeof s === "string" && s.length > 0),
|
|
|
|
|
siteName: resolveSiteName(entry.url) || undefined,
|
|
|
|
|
}));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async function _runBraveLlmContextSearch(params: {
|
|
|
|
|
query: string;
|
|
|
|
|
apiKey: string;
|
|
|
|
|
timeoutSeconds: number;
|
|
|
|
|
country?: string;
|
|
|
|
|
search_lang?: string;
|
|
|
|
|
freshness?: string;
|
|
|
|
|
}): Promise<{
|
|
|
|
|
results: Array<{
|
|
|
|
|
url: string;
|
|
|
|
|
title: string;
|
|
|
|
|
snippets: string[];
|
|
|
|
|
siteName?: string;
|
|
|
|
|
}>;
|
|
|
|
|
sources?: BraveLlmContextResponse["sources"];
|
|
|
|
|
}> {
|
|
|
|
|
const url = new URL(_BRAVE_LLM_CONTEXT_ENDPOINT);
|
|
|
|
|
url.searchParams.set("q", params.query);
|
|
|
|
|
if (params.country) {
|
|
|
|
|
url.searchParams.set("country", params.country);
|
|
|
|
|
}
|
|
|
|
|
if (params.search_lang) {
|
|
|
|
|
url.searchParams.set("search_lang", params.search_lang);
|
|
|
|
|
}
|
|
|
|
|
if (params.freshness) {
|
|
|
|
|
url.searchParams.set("freshness", params.freshness);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return withTrustedWebSearchEndpoint(
|
|
|
|
|
{
|
|
|
|
|
url: url.toString(),
|
|
|
|
|
timeoutSeconds: params.timeoutSeconds,
|
|
|
|
|
init: {
|
|
|
|
|
method: "GET",
|
|
|
|
|
headers: {
|
|
|
|
|
Accept: "application/json",
|
|
|
|
|
"X-Subscription-Token": params.apiKey,
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
async (res) => {
|
|
|
|
|
if (!res.ok) {
|
|
|
|
|
const detailResult = await readResponseText(res, { maxBytes: 64_000 });
|
|
|
|
|
const detail = detailResult.text;
|
|
|
|
|
throw new Error(`Brave LLM Context API error (${res.status}): ${detail || res.statusText}`);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const data = (await res.json()) as BraveLlmContextResponse;
|
|
|
|
|
const mapped = mapBraveLlmContextResults(data);
|
|
|
|
|
|
|
|
|
|
return { results: mapped, sources: data.sources };
|
|
|
|
|
},
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function normalizeSearchProviderId(value: string | undefined): string {
|
|
|
|
|
return value?.trim().toLowerCase() ?? "";
|
|
|
|
|
}
|
|
|
|
|
|