Files
openclaw/scripts/tool-search-gateway-e2e.ts
Peter Steinberger f91de52f0d refactor: move runtime state to SQLite
* refactor: remove stale file-backed shims

* fix: harden sqlite state ci boundaries

* refactor: store matrix idb snapshots in sqlite

* fix: satisfy rebased CI guardrails

* refactor: store current conversation bindings in sqlite table

* refactor: store tui last sessions in sqlite table

* refactor: reset sqlite schema history

* refactor: drop unshipped sqlite table migration

* refactor: remove plugin index file rollback

* refactor: drop unshipped sqlite sidecar migrations

* refactor: remove runtime commitments kv migration

* refactor: preserve kysely sync result types

* refactor: drop unshipped sqlite schema migration table

* test: keep session usage coverage sqlite-backed

* refactor: keep sqlite migration doctor-only

* refactor: isolate device legacy imports

* refactor: isolate push voicewake legacy imports

* refactor: isolate remaining runtime legacy imports

* refactor: tighten sqlite migration guardrails

* test: cover sqlite persisted enum parsing

* refactor: isolate legacy update and tui imports

* refactor: tighten sqlite state ownership

* refactor: move legacy imports behind doctor

* refactor: remove legacy session row lookup

* refactor: canonicalize memory transcript locators

* refactor: drop transcript path scope fallbacks

* refactor: drop runtime legacy session delivery pruning

* refactor: store tts prefs only in sqlite

* refactor: remove cron store path runtime

* refactor: use cron sqlite store keys

* refactor: rename telegram message cache scope

* refactor: read memory dreaming status from sqlite

* refactor: rename cron status store key

* refactor: stop remembering transcript file paths

* test: use sqlite locators in agent fixtures

* refactor: remove file-shaped commitments and cron store surfaces

* refactor: keep compaction transcript handles out of session rows

* refactor: derive transcript handles from session identity

* refactor: derive runtime transcript handles

* refactor: remove gateway session locator reads

* refactor: remove transcript locator from session rows

* refactor: store raw stream diagnostics in sqlite

* refactor: remove file-shaped transcript rotation

* refactor: hide legacy trajectory paths from runtime

* refactor: remove runtime transcript file bridges

* refactor: repair database-first rebase fallout

* refactor: align tests with database-first state

* refactor: remove transcript file handoffs

* refactor: sync post-compaction memory by transcript scope

* refactor: run codex app-server sessions by id

* refactor: bind codex runtime state by session id

* refactor: pass memory transcripts by sqlite scope

* refactor: remove transcript locator cleanup leftovers

* test: remove stale transcript file fixtures

* refactor: remove transcript locator test helper

* test: make cron sqlite keys explicit

* test: remove cron runtime store paths

* test: remove stale session file fixtures

* test: use sqlite cron keys in diagnostics

* refactor: remove runtime delivery queue backfill

* test: drop fake export session file mocks

* refactor: rename acp session read failure flag

* refactor: rename acp row session key

* refactor: remove session store test seams

* refactor: move legacy session parser tests to doctor

* refactor: reindex managed memory in place

* refactor: drop stale session store wording

* refactor: rename session row helpers

* refactor: rename sqlite session entry modules

* refactor: remove transcript locator leftovers

* refactor: trim file-era audit wording

* refactor: clean managed media through sqlite

* fix: prefer explicit agent for exports

* fix: use prepared agent for session resets

* fix: canonicalize legacy codex binding import

* test: rename state cleanup helper

* docs: align backup docs with sqlite state

* refactor: drop legacy Pi usage auth fallback

* refactor: move legacy auth profile imports to doctor

* refactor: keep Pi model discovery auth in memory

* refactor: remove MSTeams legacy learning key fallback

* refactor: store model catalog config in sqlite

* refactor: use sqlite model catalog at runtime

* refactor: remove model json compatibility aliases

* refactor: store auth profiles in sqlite

* refactor: seed copied auth profiles in sqlite

* refactor: make auth profile runtime sqlite-addressed

* refactor: migrate hermes secrets into sqlite auth store

* refactor: move plugin install config migration to doctor

* refactor: rename plugin index audit checks

* test: drop auth file assumptions

* test: remove legacy transcript file assertions

* refactor: drop legacy cli session aliases

* refactor: store skill uploads in sqlite

* refactor: keep subagent attachments in sqlite vfs

* refactor: drop subagent attachment cleanup state

* refactor: move legacy session aliases to doctor

* refactor: require node 24 for sqlite state runtime

* refactor: move provider caches into sqlite state

* fix: harden virtual agent filesystem

* refactor: enforce database-first runtime state

* refactor: rename compaction transcript rotation setting

* test: clean sqlite refactor test types

* refactor: consolidate sqlite runtime state

* refactor: model session conversations in sqlite

* refactor: stop deriving cron delivery from session keys

* refactor: stop classifying sessions from key shape

* refactor: hydrate announce targets from typed delivery

* refactor: route heartbeat delivery from typed sqlite context

* refactor: tighten typed sqlite session routing

* refactor: remove session origin routing shadow

* refactor: drop session origin shadow fixtures

* perf: query sqlite vfs paths by prefix

* refactor: use typed conversation metadata for sessions

* refactor: prefer typed session routing metadata

* refactor: require typed session routing metadata

* refactor: resolve group tool policy from typed sessions

* refactor: delete dead session thread info bridge

* Show Codex subscription reset times in channel errors (#80456)

* feat(plugin-sdk): consolidate session workflow APIs

* fix(agents): allow read-only agent mount reads

* [codex] refresh plugin regression fixtures

* fix(agents): restore compaction gateway logs

* test: tighten gateway startup assertions

* Redact persisted secret-shaped payloads [AI] (#79006)

* test: tighten device pair notify assertions

* test: tighten hermes secret assertions

* test: assert matrix client error shapes

* test: assert config compat warnings

* fix(heartbeat): remap cron-run exec events to session keys (#80214)

* fix(codex): route btw through native side threads

* fix(auth): accept friendly OpenAI order for Codex profiles

* fix(codex): rotate auth profiles inside harness

* fix: keep browser status page probe within timeout

* test: assert agents add outputs

* test: pin cron read status

* fix(agents): avoid Pi resource discovery stalls

Co-authored-by: dataCenter430 <titan032000@gmail.com>

* fix: retire timed-out codex app-server clients

* test: tighten qa lab runtime assertions

* test: check security fix outputs

* test: verify extension runtime messages

* feat(wake): expose typed sessionKey on wake protocol + system event CLI

* fix(gateway): await session_end during shutdown drain and track channel + compaction lifecycle paths (#57790)

* test: guard talk consult call helper

* fix(codex): scale context engine projection (#80761)

* fix(codex): scale context engine projection

* fix: document Codex context projection scaling

* fix: document Codex context projection scaling

* fix: document Codex context projection scaling

* fix: document Codex context projection scaling

* chore: align Codex projection changelog

* chore: realign Codex projection changelog

* fix: isolate Codex projection patch

---------

Co-authored-by: Eva (agent) <eva+agent-78055@100yen.org>
Co-authored-by: Josh Lehman <josh@martian.engineering>

* refactor: move agent runtime state toward piless

* refactor: remove cron session reaper

* refactor: move session management to sqlite

* refactor: finish database-first state migration

* chore: refresh generated sqlite db types

* refactor: remove stale file-backed shims

* test: harden kysely type coverage

# Conflicts:
#	.agents/skills/kysely-database-access/SKILL.md
#	src/infra/kysely-sync.types.test.ts
#	src/proxy-capture/store.sqlite.test.ts
#	src/state/openclaw-agent-db.test.ts
#	src/state/openclaw-state-db.test.ts

* refactor: remove cron store path runtime

* refactor: keep compaction transcript handles out of session rows

* refactor: derive embedded transcripts from sqlite identity

* refactor: remove embedded transcript locator handoff

* refactor: remove runtime transcript file bridges

* refactor: remove transcript file handoffs

* refactor: remove MSTeams legacy learning key fallback

* refactor: store model catalog config in sqlite

* refactor: use sqlite model catalog at runtime

# Conflicts:
#	docs/cli/secrets.md
#	docs/gateway/authentication.md
#	docs/gateway/secrets.md

* fix: keep oauth sibling sync sqlite-local

# Conflicts:
#	src/commands/onboard-auth.test.ts

* refactor: remove task session store maintenance

# Conflicts:
#	src/commands/tasks.ts

* refactor: keep diagnostics in state sqlite

* refactor: enforce database-first runtime state

* refactor: consolidate sqlite runtime state

* Show Codex subscription reset times in channel errors (#80456)

* fix(codex): refresh subscription limit resets

* fix(codex): format reset times for channels

* Update CHANGELOG with latest changes and fixes

Updated CHANGELOG with recent fixes and improvements.

* fix(codex): keep command load failures on codex surface

* fix(codex): format account rate limits as rows

* fix(codex): summarize account limits as usage status

* fix(codex): simplify account limit status

* test: tighten subagent announce queue assertion

* test: tighten session delete lifecycle assertions

* test: tighten cron ops assertions

* fix: track cron execution milestones

* test: tighten hermes secret assertions

* test: assert matrix sync store payloads

* test: assert config compat warnings

* fix(codex): align btw side thread semantics

* fix(codex): honor codex fallback blocking

* fix(agents): avoid Pi resource discovery stalls

* test: tighten codex event assertions

* test: tighten cron assertions

* Fix Codex app-server OAuth harness auth

* refactor: move agent runtime state toward piless

* refactor: move device and push state to sqlite

* refactor: move runtime json state imports to doctor

* refactor: finish database-first state migration

* chore: refresh generated sqlite db types

* refactor: clarify cron sqlite store keys

* refactor: remove stale file-backed shims

* refactor: bind codex runtime state by session id

* test: expect sqlite trajectory branch export

* refactor: rename session row helpers

* fix: keep legacy device identity import in doctor

* refactor: enforce database-first runtime state

* refactor: consolidate sqlite runtime state

* build: align pi contract wrappers

* chore: repair database-first rebase

* refactor: remove session file test contracts

* test: update gateway session expectations

* refactor: stop routing from session compatibility shadows

* refactor: stop persisting session route shadows

* refactor: use typed delivery context in clients

* refactor: stop echoing session route shadows

* refactor: repair embedded runner rebase imports

# Conflicts:
#	src/agents/pi-embedded-runner/run/attempt.tool-call-argument-repair.ts

* refactor: align pi contract imports

* refactor: satisfy kysely sync helper guard

* refactor: remove file transcript bridge remnants

* refactor: remove session locator compatibility

* refactor: remove session file test contracts

* refactor: keep rebase database-first clean

* refactor: remove session file assumptions from e2e

* docs: clarify database-first goal state

* test: remove legacy store markers from sqlite runtime tests

* refactor: remove legacy store assumptions from runtime seams

* refactor: align sqlite runtime helper seams

* test: update memory recall sqlite audit mock

* refactor: align database-first runtime type seams

* test: clarify doctor cron legacy store names

* fix: preserve sqlite session route projections

* test: fix copilot token cache test syntax

* docs: update database-first proof status

* test: align database-first test fixtures

* docs: update database-first proof status

* refactor: clean extension database-first drift

* test: align agent session route proof

* test: clarify doctor legacy path fixtures

* chore: clean database-first changed checks

* chore: repair database-first rebase markers

* build: allow baileys git subdependency

* chore: repair exp-vfs rebase drift

* chore: finish exp-vfs rebase cleanup

* chore: satisfy rebase lint drift

* chore: fix qqbot rebase type seam

* chore: fix rebase drift leftovers

* fix: keep auth profile oauth secrets out of sqlite

* fix: repair rebase drift tests

* test: stabilize pairing request ordering

* test: use source manifests in plugin contract checks

* fix: restore gateway session metadata after rebase

* fix: repair database-first rebase drift

* fix: clean up database-first rebase fallout

* test: stabilize line quick reply receipt time

* fix: repair extension rebase drift

* test: keep transcript redaction tests sqlite-backed

* fix: carry injected transcript redaction through sqlite

* chore: clean database branch rebase residue

* fix: repair database branch CI drift

* fix: repair database branch CI guard drift

* fix: stabilize oauth tls preflight test

* test: align database branch fast guards

* test: repair build artifact boundary guards

* chore: clean changelog rebase markers

---------

Co-authored-by: pashpashpash <nik@vault77.ai>
Co-authored-by: Eva <eva@100yen.org>
Co-authored-by: stainlu <stainlu@newtype-ai.org>
Co-authored-by: Jason Zhou <jason.zhou.design@gmail.com>
Co-authored-by: Ruben Cuevas <hi@rubencu.com>
Co-authored-by: Pavan Kumar Gondhi <pavangondhi@gmail.com>
Co-authored-by: Shakker <shakkerdroid@gmail.com>
Co-authored-by: Kaspre <36520309+Kaspre@users.noreply.github.com>
Co-authored-by: dataCenter430 <titan032000@gmail.com>
Co-authored-by: Kaspre <kaspre@gmail.com>
Co-authored-by: pandadev66 <nova.full.stack@outlook.com>
Co-authored-by: Eva <admin@100yen.org>
Co-authored-by: Eva (agent) <eva+agent-78055@100yen.org>
Co-authored-by: Josh Lehman <josh@martian.engineering>
Co-authored-by: jeffjhunter <support@aipersonamethod.com>
2026-05-13 13:15:12 +01:00

561 lines
16 KiB
TypeScript

import fs from "node:fs/promises";
import net from "node:net";
import os from "node:os";
import path from "node:path";
import process from "node:process";
import { pathToFileURL } from "node:url";
import { startQaMockOpenAiServer } from "../extensions/qa-lab/src/providers/mock-openai/server.js";
import { stageQaMockAuthProfiles } from "../extensions/qa-lab/src/providers/shared/mock-auth.js";
import { buildQaGatewayConfig } from "../extensions/qa-lab/src/qa-gateway-config.js";
import { resetConfigRuntimeState } from "../src/config/config.js";
import {
listSqliteSessionTranscripts,
loadSqliteSessionTranscriptEvents,
} from "../src/config/sessions/transcript-store.sqlite.js";
import { startGatewayServer } from "../src/gateway/server.js";
type Lane = "normal" | "code";
type LaneResult = {
lane: Lane;
status: string;
providerRequestCount: number;
providerRawBytes: number;
providerSystemPromptChars: number;
providerDeclaredToolCount: number;
providerPlannedTools: string[];
gatewayOutputToolNames: string[];
gatewayOutputText: string;
transcriptToolMentions: Record<string, number>;
};
const FAKE_PLUGIN_ID = "tool-search-e2e-fixture";
function assert(condition: unknown, message: string): asserts condition {
if (!condition) {
throw new Error(message);
}
}
async function freePort(): Promise<number> {
return await new Promise((resolve, reject) => {
const server = net.createServer();
server.once("error", reject);
server.listen(0, "127.0.0.1", () => {
const address = server.address();
const port = typeof address === "object" && address ? address.port : 0;
server.close((error) => (error ? reject(error) : resolve(port)));
});
});
}
function buildFakeTools(count = 36) {
return Array.from({ length: count }, (_, index) => {
const id = `fake_plugin_tool_${String(index + 1).padStart(2, "0")}`;
return {
type: "function",
name: id,
description: [
`Fake plugin tool ${index + 1}.`,
"Used by the Tool Search gateway E2E to prove a large plugin-owned tool catalog can be hidden from the model prompt and still called through the compact bridge.",
"The description is intentionally non-trivial so prompt-size regression is measurable.",
].join(" "),
parameters: {
type: "object",
properties: {
marker: {
type: "string",
description: "Lane marker supplied by the scripted model.",
},
},
required: ["marker"],
additionalProperties: false,
},
strict: true,
};
});
}
function countOccurrences(haystack: string, needle: string): number {
if (!needle) {
return 0;
}
let count = 0;
let offset = 0;
while (true) {
const next = haystack.indexOf(needle, offset);
if (next < 0) {
return count;
}
count += 1;
offset = next + needle.length;
}
}
function stringifyTranscriptEvent(event: unknown): string {
try {
return JSON.stringify(event);
} catch {
return "";
}
}
async function readSqliteTranscriptMentions(params: {
stateDir: string;
targetTool: string;
}): Promise<Record<string, number>> {
const mentions: Record<string, number> = {
tool_search_code: 0,
[params.targetTool]: 0,
};
const env = { ...process.env, OPENCLAW_STATE_DIR: params.stateDir };
for (const transcript of listSqliteSessionTranscripts({ env, agentId: "qa" })) {
for (const entry of loadSqliteSessionTranscriptEvents({
env,
agentId: transcript.agentId,
sessionId: transcript.sessionId,
})) {
const raw = stringifyTranscriptEvent(entry.event);
mentions.tool_search_code += countOccurrences(raw, "tool_search_code");
mentions[params.targetTool] += countOccurrences(raw, params.targetTool);
}
}
return mentions;
}
async function fetchJson(url: string, init?: RequestInit): Promise<unknown> {
const response = await fetch(url, init);
const text = await response.text();
let parsed: unknown;
try {
parsed = text ? JSON.parse(text) : {};
} catch {
parsed = text;
}
if (!response.ok) {
throw new Error(`HTTP ${response.status} from ${url}: ${text}`);
}
return parsed;
}
function outputToolNames(response: unknown): string[] {
const output = (response as { output?: Array<{ type?: unknown; name?: unknown }> }).output;
if (!Array.isArray(output)) {
return [];
}
return output
.filter((item) => item.type === "function_call" && typeof item.name === "string")
.map((item) => item.name as string);
}
function outputText(response: unknown): string {
const output = (response as { output?: Array<{ type?: unknown; content?: unknown }> }).output;
if (!Array.isArray(output)) {
return "";
}
return output
.flatMap((item) => {
if (item.type !== "message" || !Array.isArray(item.content)) {
return [];
}
return item.content.flatMap((piece) => {
if (!piece || typeof piece !== "object") {
return [];
}
const record = piece as { text?: unknown };
return typeof record.text === "string" ? [record.text] : [];
});
})
.join("\n");
}
function readContentText(content: unknown): string {
if (typeof content === "string") {
return content;
}
if (!Array.isArray(content)) {
return "";
}
return content
.map((item) => {
if (!item || typeof item !== "object") {
return "";
}
const record = item as { type?: unknown; text?: unknown };
return typeof record.text === "string" ? record.text : "";
})
.join("\n");
}
function countSystemPromptChars(body: unknown): number {
if (!body || typeof body !== "object") {
return 0;
}
const record = body as { instructions?: unknown; input?: unknown };
let total = typeof record.instructions === "string" ? record.instructions.length : 0;
if (Array.isArray(record.input)) {
for (const item of record.input) {
if (!item || typeof item !== "object") {
continue;
}
const inputRecord = item as { role?: unknown; content?: unknown };
if (inputRecord.role === "system" || inputRecord.role === "developer") {
total += readContentText(inputRecord.content).length;
}
}
}
return total;
}
async function writeConfig(params: {
lane: Lane;
stateDir: string;
configPath: string;
workspaceDir: string;
gatewayPort: number;
providerBaseUrl: string;
fakePluginDir: string;
}) {
let cfg = buildQaGatewayConfig({
bind: "loopback",
gatewayPort: params.gatewayPort,
gatewayToken: "tool-search-e2e",
providerBaseUrl: `${params.providerBaseUrl}/v1`,
workspaceDir: params.workspaceDir,
controlUiEnabled: false,
providerMode: "mock-openai",
});
cfg = {
...cfg,
tools: {
...cfg.tools,
alsoAllow: [...new Set([...(cfg.tools?.alsoAllow ?? []), FAKE_PLUGIN_ID])],
},
};
if (params.lane === "code") {
cfg = {
...cfg,
tools: {
...cfg.tools,
alsoAllow: [
...new Set([
...(cfg.tools?.alsoAllow ?? []),
"tool_search_code",
"tool_search",
"tool_describe",
"tool_call",
]),
],
toolSearch: true,
},
plugins: {
...cfg.plugins,
allow: [...new Set([...(cfg.plugins?.allow ?? []), FAKE_PLUGIN_ID])],
entries: {
...cfg.plugins?.entries,
[FAKE_PLUGIN_ID]: {
enabled: true,
},
},
},
};
} else {
cfg = {
...cfg,
plugins: {
...cfg.plugins,
allow: [...new Set([...(cfg.plugins?.allow ?? []), FAKE_PLUGIN_ID])],
entries: {
...cfg.plugins?.entries,
[FAKE_PLUGIN_ID]: {
enabled: true,
},
},
},
};
}
cfg = {
...cfg,
plugins: {
...cfg.plugins,
load: {
...cfg.plugins?.load,
paths: [...new Set([...(cfg.plugins?.load?.paths ?? []), params.fakePluginDir])],
},
},
};
cfg = await stageQaMockAuthProfiles({
cfg,
stateDir: params.stateDir,
agentIds: ["qa"],
providers: ["mock-openai", "openai", "anthropic"],
});
cfg = {
...cfg,
gateway: {
...cfg.gateway,
http: {
endpoints: {
responses: {
enabled: true,
},
},
},
},
};
await fs.mkdir(path.dirname(params.configPath), { recursive: true });
await fs.writeFile(params.configPath, `${JSON.stringify(cfg, null, 2)}\n`, "utf8");
}
async function writeFakePlugin(params: {
rootDir: string;
repoRoot: string;
fakeTools: ReturnType<typeof buildFakeTools>;
}): Promise<string> {
const pluginDir = path.join(params.rootDir, "fake-plugin");
await fs.mkdir(pluginDir, { recursive: true });
await fs.writeFile(
path.join(pluginDir, "package.json"),
`${JSON.stringify(
{
name: "@openclaw/tool-search-e2e-fixture",
version: "0.0.0",
type: "module",
openclaw: {
extensions: ["./index.js"],
},
},
null,
2,
)}\n`,
"utf8",
);
await fs.writeFile(
path.join(pluginDir, "openclaw.plugin.json"),
`${JSON.stringify(
{
id: FAKE_PLUGIN_ID,
activation: {
onStartup: true,
},
name: "Tool Search E2E Fixture",
description: "Fake plugin with a large tool catalog for Tool Search gateway validation.",
contracts: {
tools: params.fakeTools.map((tool) => tool.name),
},
configSchema: {
type: "object",
additionalProperties: false,
properties: {},
},
},
null,
2,
)}\n`,
"utf8",
);
const pluginEntryUrl = pathToFileURL(
path.join(params.repoRoot, "src/plugin-sdk/plugin-entry.ts"),
).href;
await fs.writeFile(
path.join(pluginDir, "index.js"),
[
`import { definePluginEntry } from ${JSON.stringify(pluginEntryUrl)};`,
`const tools = ${JSON.stringify(params.fakeTools, null, 2)};`,
"export default definePluginEntry({",
` id: ${JSON.stringify(FAKE_PLUGIN_ID)},`,
" name: 'Tool Search E2E Fixture',",
" register(api) {",
" for (const spec of tools) {",
" api.registerTool({",
" name: spec.name,",
" label: spec.name,",
" description: spec.description,",
" parameters: spec.parameters,",
" execute: async (_toolCallId, input) => ({",
" content: [{ type: 'text', text: `FAKE_PLUGIN_OK ${spec.name} ${JSON.stringify(input ?? {})}` }],",
" details: { status: 'ok', tool: spec.name, input },",
" }),",
" }, { name: spec.name });",
" }",
" },",
"});",
"",
].join("\n"),
"utf8",
);
return pluginDir;
}
async function runLane(params: {
lane: Lane;
rootDir: string;
providerBaseUrl: string;
targetTool: string;
fakeTools: ReturnType<typeof buildFakeTools>;
fakePluginDir: string;
}): Promise<LaneResult> {
const stateDir = path.join(params.rootDir, params.lane, "state");
const configPath = path.join(stateDir, "openclaw.json");
const workspaceDir = path.join(params.rootDir, params.lane, "workspace");
const gatewayPort = await freePort();
await fs.mkdir(workspaceDir, { recursive: true });
await writeConfig({
lane: params.lane,
stateDir,
configPath,
workspaceDir,
gatewayPort,
providerBaseUrl: params.providerBaseUrl,
fakePluginDir: params.fakePluginDir,
});
process.env.OPENCLAW_STATE_DIR = stateDir;
process.env.OPENCLAW_CONFIG_PATH = configPath;
process.env.OPENCLAW_TEST_FAST = "1";
resetConfigRuntimeState();
const server = await startGatewayServer(gatewayPort, {
host: "127.0.0.1",
auth: { mode: "none" },
controlUiEnabled: false,
openResponsesEnabled: true,
});
try {
const beforeRequests = (await fetchJson(
`${params.providerBaseUrl}/debug/requests`,
)) as unknown[];
const response = await fetchJson(`http://127.0.0.1:${gatewayPort}/v1/responses`, {
method: "POST",
headers: {
"content-type": "application/json",
"x-openclaw-scopes": "operator.write",
"x-openclaw-agent": "qa",
},
body: JSON.stringify({
model: "openclaw/qa",
input: [
{
type: "message",
role: "user",
content: [
{
type: "input_text",
text: `tool search qa check target=${params.targetTool}`,
},
],
},
],
max_output_tokens: 256,
stream: false,
}),
});
const requests = (await fetchJson(`${params.providerBaseUrl}/debug/requests`)) as Array<{
raw?: string;
body?: { tools?: unknown[] };
instructions?: string;
plannedToolName?: string;
}>;
const laneRequests = requests.slice(beforeRequests.length);
const lastRequest = laneRequests.at(-1) ?? {};
const responseStatus = (response as { status?: unknown }).status;
return {
lane: params.lane,
status: typeof responseStatus === "string" ? responseStatus : "",
providerRequestCount: laneRequests.length,
providerRawBytes: typeof lastRequest.raw === "string" ? lastRequest.raw.length : 0,
providerSystemPromptChars: countSystemPromptChars(lastRequest.body),
providerDeclaredToolCount: Array.isArray(lastRequest.body?.tools)
? lastRequest.body.tools.length
: 0,
providerPlannedTools: laneRequests
.map((request) => request.plannedToolName)
.filter((name): name is string => typeof name === "string"),
gatewayOutputToolNames: outputToolNames(response),
gatewayOutputText: outputText(response),
transcriptToolMentions: await readSqliteTranscriptMentions({
stateDir,
targetTool: params.targetTool,
}),
};
} finally {
await server.close({ reason: `${params.lane} lane complete` });
resetConfigRuntimeState();
}
}
async function main() {
const rootDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-tool-search-"));
const provider = await startQaMockOpenAiServer();
const fakeTools = buildFakeTools();
const fakePluginDir = await writeFakePlugin({
rootDir,
repoRoot: process.cwd(),
fakeTools,
});
const targetTool = "fake_plugin_tool_17";
try {
const normal = await runLane({
lane: "normal",
rootDir,
providerBaseUrl: provider.baseUrl,
targetTool,
fakeTools,
fakePluginDir,
});
const code = await runLane({
lane: "code",
rootDir,
providerBaseUrl: provider.baseUrl,
targetTool,
fakeTools,
fakePluginDir,
});
assert(
normal.providerPlannedTools.includes(targetTool) &&
normal.gatewayOutputText.includes("FAKE_PLUGIN_OK") &&
normal.gatewayOutputText.includes(targetTool),
`normal lane did not call ${targetTool}`,
);
assert(
code.providerPlannedTools.includes("tool_search_code") &&
code.gatewayOutputText.includes(targetTool) &&
code.transcriptToolMentions[targetTool] > 0,
`code lane did not bridge-call ${targetTool}`,
);
assert(
normal.providerDeclaredToolCount > code.providerDeclaredToolCount,
`expected Tool Search to expose fewer tools to provider: normal=${normal.providerDeclaredToolCount} code=${code.providerDeclaredToolCount}`,
);
assert(
normal.providerRawBytes > code.providerRawBytes,
`expected Tool Search request to be smaller: normal=${normal.providerRawBytes} code=${code.providerRawBytes}`,
);
assert(
code.transcriptToolMentions.tool_search_code > 0 &&
code.transcriptToolMentions[targetTool] > 0,
"code lane SQLite transcript did not record bridge and target tool mentions",
);
const summary = {
ok: true,
rootDir,
targetTool,
normal,
code,
reduction: {
providerRawBytes: normal.providerRawBytes - code.providerRawBytes,
providerDeclaredTools: normal.providerDeclaredToolCount - code.providerDeclaredToolCount,
providerSystemPromptChars:
normal.providerSystemPromptChars - code.providerSystemPromptChars,
},
};
process.stdout.write(`${JSON.stringify(summary, null, 2)}\n`);
} finally {
await provider.stop();
}
}
await main();