Files
openclaw/extensions/parallel/src/parallel-search-normalize.ts
Matt H 983b65b0e0 feat(parallel): add free Parallel Search MCP as the zero-config default web_search provider (#90849)
* feat(parallel): add free Parallel Search MCP as the zero-config default web_search provider

Registers two Parallel web_search providers in the parallel plugin:
- parallel-free: keyless, always the free hosted Search MCP (search.parallel.ai/mcp);
  the zero-config default (autoDetectOrder 76) so web_search works with no key.
- parallel: the existing paid v1 REST API (requires PARALLEL_API_KEY).

Shared query/result normalization lives in parallel-search-normalize.ts (used by both
transports); a minimal Streamable-HTTP JSON-RPC client (parallel-mcp-search.runtime.ts)
backs the free path. UI brands the tool-call chip 'Parallel Web Search' on the free path
via a searchTransport marker; setup default mirrors runtime auto-detect.

* chore(parallel): register parallel-free in doctor legacy-web-search owners

parallel-free is a bundled web_search provider, so add it to the doctor's
exhaustive BUNDLED_LEGACY_WEB_SEARCH_OWNERS map (owned by the parallel plugin)
and the NON_MIGRATED set — it has no legacy tools.web.search.* shape, so this is
a no-op for migration, matching paid parallel/tavily. Keeps the registry
complete. (Spotted by diffing the earlier local WIP branch.)

* docs(parallel): restore concise frontmatter summary

* docs(parallel): clearer, professional copy; drop v1 REST jargon and UI-label claim

- Frame the two providers as Parallel Search (Free) vs paid Parallel Search;
  remove internal 'v1 REST API' wording.
- Remove conversational/overstated phrasing ('out of the box for everyone').
- Remove the 'labeled Parallel Web Search in the UI' claim (only renders in the
  Control UI, not the TUI). Scope the searchTransport code comment accordingly.

* revert(parallel): drop the "Parallel Web Search" tool-call branding

The label only rendered in the Control UI, never the TUI (a separate renderer
via src/agents/tool-display.ts). Extending it would put provider-specific
labeling into a shared/core display path, against the plugin-agnostic-core rule.

Reverts the Control-UI labelOverride wiring and removes the now-orphaned
searchTransport marker from the free provider's result. The result still carries
provider: "parallel-free".

* fix(parallel): cap free Search MCP session_id at its 100-char tools/list contract

The free parallel-free provider reused the paid ParallelSearchSchema, whose
session_id allows 1000 chars, but the live Search MCP tools/list schema caps
session_id at 100. Parameterize normalizeParallelSessionId(value, maxLength);
the free path passes 100 (paid keeps 1000) and advertises the tighter bound in
its own ParallelFreeSearchSchema. An over-limit caller id is dropped and a
fresh in-contract id is minted. Updates tests and docs accordingly.
2026-06-06 17:36:28 -07:00

198 lines
6.9 KiB
TypeScript

// Transport-agnostic Parallel search normalization shared by the paid REST
// provider (`parallel`) and the free Search MCP provider (`parallel-free`).
// Both transports return the same v1 result shape, so query/result handling
// lives here instead of being copied into each runtime.
import {
buildSearchCacheKey,
resolveSiteName,
wrapWebContent,
} from "openclaw/plugin-sdk/provider-web-search";
import { normalizeOptionalString } from "openclaw/plugin-sdk/string-coerce-runtime";
// Internal-only bounds (the model-facing tool schema declares its own copies).
const PARALLEL_MAX_SEARCH_COUNT = 40;
// Parallel v1 Search caps each search_queries entry at 200 chars, the objective
// field at 5000, and accepts up to 5 search queries. See
// https://docs.parallel.ai/search/best-practices.
const PARALLEL_MAX_SEARCH_QUERY_CHARS = 200;
const PARALLEL_MAX_OBJECTIVE_CHARS = 5000;
const PARALLEL_MAX_SEARCH_QUERIES = 5;
// Paid v1 REST accepts session ids up to 1000 chars, but the free Search MCP
// `tools/list` schema caps session_id at 100. Each runtime passes its own limit
// (and advertises it in the tool schema) so callers never send an out-of-contract id.
export const PARALLEL_SESSION_ID_MAX_LENGTH = 1000;
export const PARALLEL_FREE_SESSION_ID_MAX_LENGTH = 100;
const PARALLEL_CLIENT_MODEL_MAX_LENGTH = 100;
export type ParallelSearchResult = {
title?: unknown;
url?: unknown;
publish_date?: unknown;
excerpts?: unknown;
};
export type ParallelSearchResponse = {
search_id?: unknown;
session_id?: unknown;
results?: unknown;
warnings?: unknown;
usage?: unknown;
};
export function resolveParallelSearchCount(value: number): number {
return Math.max(1, Math.min(PARALLEL_MAX_SEARCH_COUNT, Math.floor(value)));
}
export function normalizeParallelSessionId(
value: string | undefined,
maxLength: number,
): string | undefined {
const trimmed = normalizeOptionalString(value);
return trimmed && trimmed.length <= maxLength ? trimmed : undefined;
}
export function normalizeParallelObjective(value: string | undefined): string | undefined {
const trimmed = normalizeOptionalString(value);
if (!trimmed) {
return undefined;
}
return trimmed.length <= PARALLEL_MAX_OBJECTIVE_CHARS
? trimmed
: trimmed.slice(0, PARALLEL_MAX_OBJECTIVE_CHARS);
}
export function normalizeParallelClientModel(value: string | undefined): string | undefined {
const trimmed = normalizeOptionalString(value);
if (!trimmed) {
return undefined;
}
return trimmed.length <= PARALLEL_CLIENT_MODEL_MAX_LENGTH
? trimmed
: trimmed.slice(0, PARALLEL_CLIENT_MODEL_MAX_LENGTH);
}
// Parallel's API caps each entry at 200 chars and accepts up to 5 queries. We
// trim, drop empties/duplicates, truncate over-long entries to the API's hard
// limit, and cap to the API's maximum so a malformed call from the model
// doesn't 422 the request. See https://docs.parallel.ai/search/best-practices.
export function normalizeParallelSearchQueries(value: unknown): string[] {
const candidates = Array.isArray(value) ? value : [];
const seen = new Set<string>();
const out: string[] = [];
for (const entry of candidates) {
if (typeof entry !== "string") {
continue;
}
const trimmed = entry.trim();
if (!trimmed) {
continue;
}
const capped =
trimmed.length <= PARALLEL_MAX_SEARCH_QUERY_CHARS
? trimmed
: trimmed.slice(0, PARALLEL_MAX_SEARCH_QUERY_CHARS);
if (seen.has(capped)) {
continue;
}
seen.add(capped);
out.push(capped);
if (out.length === PARALLEL_MAX_SEARCH_QUERIES) {
break;
}
}
return out;
}
export function invalidSearchQueriesPayload() {
return {
error: "invalid_search_queries",
message:
"search_queries must be a non-empty array of keyword strings (max 5, max 200 chars each). See https://docs.parallel.ai/search/best-practices.",
docs: "https://docs.openclaw.ai/tools/parallel-search",
};
}
export function normalizeParallelResults(payload: unknown): ParallelSearchResult[] {
if (!payload || typeof payload !== "object") {
return [];
}
const results = (payload as ParallelSearchResponse).results;
if (!Array.isArray(results)) {
return [];
}
return results.filter((entry): entry is ParallelSearchResult =>
Boolean(entry && typeof entry === "object" && !Array.isArray(entry)),
);
}
/** Maps a Parallel v1 response into wrapped `web_search` result entries. */
export function mapParallelResults(response: ParallelSearchResponse): Record<string, unknown>[] {
return normalizeParallelResults(response).map((entry) => {
const title = typeof entry.title === "string" ? entry.title : "";
const url = typeof entry.url === "string" ? entry.url : "";
const published =
typeof entry.publish_date === "string" && entry.publish_date ? entry.publish_date : undefined;
const excerpts = Array.isArray(entry.excerpts)
? entry.excerpts
.filter((e): e is string => typeof e === "string")
.map((e) => wrapWebContent(e, "web_search"))
: [];
const description = excerpts.join("\n\n");
return Object.assign(
{
title: title ? wrapWebContent(title, "web_search") : "",
url,
description,
siteName: resolveSiteName(url) || undefined,
},
published ? { published } : {},
excerpts.length > 0 ? { excerpts } : {},
);
});
}
/**
* Drops a Parallel-generated `sessionId` before caching. Identical queries from
* unrelated tasks would otherwise share that id; caller-supplied session ids are
* part of the cache key, so a cache hit only ever returns the matching id.
*/
export function stripParallelGeneratedSessionId(
payload: Record<string, unknown>,
): Record<string, unknown> {
if (!("sessionId" in payload)) {
return payload;
}
const { sessionId: _omitted, ...rest } = payload;
void _omitted;
return rest;
}
export function buildParallelCacheKey(params: {
endpoint: string;
objective?: string;
searchQueries: readonly string[];
count: number;
sessionId?: string;
clientModel?: string;
}): string {
return buildSearchCacheKey([
"parallel",
// The transport endpoint (REST URL or the free MCP URL) partitions paid-REST
// vs free-MCP and REST endpoint overrides so transports never share cached
// payloads.
params.endpoint,
params.objective,
// Join with a NUL delimiter (can't appear in normalized queries) so distinct
// arrays like ["ab","c"] and ["a","bc"] don't collide on the same cache key.
params.searchQueries.join("\u0000"),
params.count,
// Different Parallel sessions can return different ranked excerpts for the
// same query set, so partition cached payloads by caller-provided session.
params.sessionId,
// Parallel tailors defaults/optimizations to client_model per its docs, so
// partition cached payloads by it; otherwise two models hitting the same
// query inside the cache TTL would silently share ranked excerpts.
params.clientModel,
]);
}