mirror of
https://github.com/openclaw/openclaw.git
synced 2026-06-22 23:58:10 +00:00
* feat(parallel): add free Parallel Search MCP as the zero-config default web_search provider
Registers two Parallel web_search providers in the parallel plugin:
- parallel-free: keyless, always the free hosted Search MCP (search.parallel.ai/mcp);
the zero-config default (autoDetectOrder 76) so web_search works with no key.
- parallel: the existing paid v1 REST API (requires PARALLEL_API_KEY).
Shared query/result normalization lives in parallel-search-normalize.ts (used by both
transports); a minimal Streamable-HTTP JSON-RPC client (parallel-mcp-search.runtime.ts)
backs the free path. UI brands the tool-call chip 'Parallel Web Search' on the free path
via a searchTransport marker; setup default mirrors runtime auto-detect.
* chore(parallel): register parallel-free in doctor legacy-web-search owners
parallel-free is a bundled web_search provider, so add it to the doctor's
exhaustive BUNDLED_LEGACY_WEB_SEARCH_OWNERS map (owned by the parallel plugin)
and the NON_MIGRATED set — it has no legacy tools.web.search.* shape, so this is
a no-op for migration, matching paid parallel/tavily. Keeps the registry
complete. (Spotted by diffing the earlier local WIP branch.)
* docs(parallel): restore concise frontmatter summary
* docs(parallel): clearer, professional copy; drop v1 REST jargon and UI-label claim
- Frame the two providers as Parallel Search (Free) vs paid Parallel Search;
remove internal 'v1 REST API' wording.
- Remove conversational/overstated phrasing ('out of the box for everyone').
- Remove the 'labeled Parallel Web Search in the UI' claim (only renders in the
Control UI, not the TUI). Scope the searchTransport code comment accordingly.
* revert(parallel): drop the "Parallel Web Search" tool-call branding
The label only rendered in the Control UI, never the TUI (a separate renderer
via src/agents/tool-display.ts). Extending it would put provider-specific
labeling into a shared/core display path, against the plugin-agnostic-core rule.
Reverts the Control-UI labelOverride wiring and removes the now-orphaned
searchTransport marker from the free provider's result. The result still carries
provider: "parallel-free".
* fix(parallel): cap free Search MCP session_id at its 100-char tools/list contract
The free parallel-free provider reused the paid ParallelSearchSchema, whose
session_id allows 1000 chars, but the live Search MCP tools/list schema caps
session_id at 100. Parameterize normalizeParallelSessionId(value, maxLength);
the free path passes 100 (paid keeps 1000) and advertises the tighter bound in
its own ParallelFreeSearchSchema. An over-limit caller id is dropped and a
fresh in-contract id is minted. Updates tests and docs accordingly.
198 lines
6.9 KiB
TypeScript
198 lines
6.9 KiB
TypeScript
// Transport-agnostic Parallel search normalization shared by the paid REST
|
|
// provider (`parallel`) and the free Search MCP provider (`parallel-free`).
|
|
// Both transports return the same v1 result shape, so query/result handling
|
|
// lives here instead of being copied into each runtime.
|
|
import {
|
|
buildSearchCacheKey,
|
|
resolveSiteName,
|
|
wrapWebContent,
|
|
} from "openclaw/plugin-sdk/provider-web-search";
|
|
import { normalizeOptionalString } from "openclaw/plugin-sdk/string-coerce-runtime";
|
|
|
|
// Internal-only bounds (the model-facing tool schema declares its own copies).
|
|
const PARALLEL_MAX_SEARCH_COUNT = 40;
|
|
// Parallel v1 Search caps each search_queries entry at 200 chars, the objective
|
|
// field at 5000, and accepts up to 5 search queries. See
|
|
// https://docs.parallel.ai/search/best-practices.
|
|
const PARALLEL_MAX_SEARCH_QUERY_CHARS = 200;
|
|
const PARALLEL_MAX_OBJECTIVE_CHARS = 5000;
|
|
const PARALLEL_MAX_SEARCH_QUERIES = 5;
|
|
// Paid v1 REST accepts session ids up to 1000 chars, but the free Search MCP
|
|
// `tools/list` schema caps session_id at 100. Each runtime passes its own limit
|
|
// (and advertises it in the tool schema) so callers never send an out-of-contract id.
|
|
export const PARALLEL_SESSION_ID_MAX_LENGTH = 1000;
|
|
export const PARALLEL_FREE_SESSION_ID_MAX_LENGTH = 100;
|
|
const PARALLEL_CLIENT_MODEL_MAX_LENGTH = 100;
|
|
|
|
export type ParallelSearchResult = {
|
|
title?: unknown;
|
|
url?: unknown;
|
|
publish_date?: unknown;
|
|
excerpts?: unknown;
|
|
};
|
|
|
|
export type ParallelSearchResponse = {
|
|
search_id?: unknown;
|
|
session_id?: unknown;
|
|
results?: unknown;
|
|
warnings?: unknown;
|
|
usage?: unknown;
|
|
};
|
|
|
|
export function resolveParallelSearchCount(value: number): number {
|
|
return Math.max(1, Math.min(PARALLEL_MAX_SEARCH_COUNT, Math.floor(value)));
|
|
}
|
|
|
|
export function normalizeParallelSessionId(
|
|
value: string | undefined,
|
|
maxLength: number,
|
|
): string | undefined {
|
|
const trimmed = normalizeOptionalString(value);
|
|
return trimmed && trimmed.length <= maxLength ? trimmed : undefined;
|
|
}
|
|
|
|
export function normalizeParallelObjective(value: string | undefined): string | undefined {
|
|
const trimmed = normalizeOptionalString(value);
|
|
if (!trimmed) {
|
|
return undefined;
|
|
}
|
|
return trimmed.length <= PARALLEL_MAX_OBJECTIVE_CHARS
|
|
? trimmed
|
|
: trimmed.slice(0, PARALLEL_MAX_OBJECTIVE_CHARS);
|
|
}
|
|
|
|
export function normalizeParallelClientModel(value: string | undefined): string | undefined {
|
|
const trimmed = normalizeOptionalString(value);
|
|
if (!trimmed) {
|
|
return undefined;
|
|
}
|
|
return trimmed.length <= PARALLEL_CLIENT_MODEL_MAX_LENGTH
|
|
? trimmed
|
|
: trimmed.slice(0, PARALLEL_CLIENT_MODEL_MAX_LENGTH);
|
|
}
|
|
|
|
// Parallel's API caps each entry at 200 chars and accepts up to 5 queries. We
|
|
// trim, drop empties/duplicates, truncate over-long entries to the API's hard
|
|
// limit, and cap to the API's maximum so a malformed call from the model
|
|
// doesn't 422 the request. See https://docs.parallel.ai/search/best-practices.
|
|
export function normalizeParallelSearchQueries(value: unknown): string[] {
|
|
const candidates = Array.isArray(value) ? value : [];
|
|
const seen = new Set<string>();
|
|
const out: string[] = [];
|
|
for (const entry of candidates) {
|
|
if (typeof entry !== "string") {
|
|
continue;
|
|
}
|
|
const trimmed = entry.trim();
|
|
if (!trimmed) {
|
|
continue;
|
|
}
|
|
const capped =
|
|
trimmed.length <= PARALLEL_MAX_SEARCH_QUERY_CHARS
|
|
? trimmed
|
|
: trimmed.slice(0, PARALLEL_MAX_SEARCH_QUERY_CHARS);
|
|
if (seen.has(capped)) {
|
|
continue;
|
|
}
|
|
seen.add(capped);
|
|
out.push(capped);
|
|
if (out.length === PARALLEL_MAX_SEARCH_QUERIES) {
|
|
break;
|
|
}
|
|
}
|
|
return out;
|
|
}
|
|
|
|
export function invalidSearchQueriesPayload() {
|
|
return {
|
|
error: "invalid_search_queries",
|
|
message:
|
|
"search_queries must be a non-empty array of keyword strings (max 5, max 200 chars each). See https://docs.parallel.ai/search/best-practices.",
|
|
docs: "https://docs.openclaw.ai/tools/parallel-search",
|
|
};
|
|
}
|
|
|
|
export function normalizeParallelResults(payload: unknown): ParallelSearchResult[] {
|
|
if (!payload || typeof payload !== "object") {
|
|
return [];
|
|
}
|
|
const results = (payload as ParallelSearchResponse).results;
|
|
if (!Array.isArray(results)) {
|
|
return [];
|
|
}
|
|
return results.filter((entry): entry is ParallelSearchResult =>
|
|
Boolean(entry && typeof entry === "object" && !Array.isArray(entry)),
|
|
);
|
|
}
|
|
|
|
/** Maps a Parallel v1 response into wrapped `web_search` result entries. */
|
|
export function mapParallelResults(response: ParallelSearchResponse): Record<string, unknown>[] {
|
|
return normalizeParallelResults(response).map((entry) => {
|
|
const title = typeof entry.title === "string" ? entry.title : "";
|
|
const url = typeof entry.url === "string" ? entry.url : "";
|
|
const published =
|
|
typeof entry.publish_date === "string" && entry.publish_date ? entry.publish_date : undefined;
|
|
const excerpts = Array.isArray(entry.excerpts)
|
|
? entry.excerpts
|
|
.filter((e): e is string => typeof e === "string")
|
|
.map((e) => wrapWebContent(e, "web_search"))
|
|
: [];
|
|
const description = excerpts.join("\n\n");
|
|
return Object.assign(
|
|
{
|
|
title: title ? wrapWebContent(title, "web_search") : "",
|
|
url,
|
|
description,
|
|
siteName: resolveSiteName(url) || undefined,
|
|
},
|
|
published ? { published } : {},
|
|
excerpts.length > 0 ? { excerpts } : {},
|
|
);
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Drops a Parallel-generated `sessionId` before caching. Identical queries from
|
|
* unrelated tasks would otherwise share that id; caller-supplied session ids are
|
|
* part of the cache key, so a cache hit only ever returns the matching id.
|
|
*/
|
|
export function stripParallelGeneratedSessionId(
|
|
payload: Record<string, unknown>,
|
|
): Record<string, unknown> {
|
|
if (!("sessionId" in payload)) {
|
|
return payload;
|
|
}
|
|
const { sessionId: _omitted, ...rest } = payload;
|
|
void _omitted;
|
|
return rest;
|
|
}
|
|
|
|
export function buildParallelCacheKey(params: {
|
|
endpoint: string;
|
|
objective?: string;
|
|
searchQueries: readonly string[];
|
|
count: number;
|
|
sessionId?: string;
|
|
clientModel?: string;
|
|
}): string {
|
|
return buildSearchCacheKey([
|
|
"parallel",
|
|
// The transport endpoint (REST URL or the free MCP URL) partitions paid-REST
|
|
// vs free-MCP and REST endpoint overrides so transports never share cached
|
|
// payloads.
|
|
params.endpoint,
|
|
params.objective,
|
|
// Join with a NUL delimiter (can't appear in normalized queries) so distinct
|
|
// arrays like ["ab","c"] and ["a","bc"] don't collide on the same cache key.
|
|
params.searchQueries.join("\u0000"),
|
|
params.count,
|
|
// Different Parallel sessions can return different ranked excerpts for the
|
|
// same query set, so partition cached payloads by caller-provided session.
|
|
params.sessionId,
|
|
// Parallel tailors defaults/optimizations to client_model per its docs, so
|
|
// partition cached payloads by it; otherwise two models hitting the same
|
|
// query inside the cache TTL would silently share ranked excerpts.
|
|
params.clientModel,
|
|
]);
|
|
}
|