mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-23 01:18:05 +00:00
* refactor: remove stale file-backed shims * fix: harden sqlite state ci boundaries * refactor: store matrix idb snapshots in sqlite * fix: satisfy rebased CI guardrails * refactor: store current conversation bindings in sqlite table * refactor: store tui last sessions in sqlite table * refactor: reset sqlite schema history * refactor: drop unshipped sqlite table migration * refactor: remove plugin index file rollback * refactor: drop unshipped sqlite sidecar migrations * refactor: remove runtime commitments kv migration * refactor: preserve kysely sync result types * refactor: drop unshipped sqlite schema migration table * test: keep session usage coverage sqlite-backed * refactor: keep sqlite migration doctor-only * refactor: isolate device legacy imports * refactor: isolate push voicewake legacy imports * refactor: isolate remaining runtime legacy imports * refactor: tighten sqlite migration guardrails * test: cover sqlite persisted enum parsing * refactor: isolate legacy update and tui imports * refactor: tighten sqlite state ownership * refactor: move legacy imports behind doctor * refactor: remove legacy session row lookup * refactor: canonicalize memory transcript locators * refactor: drop transcript path scope fallbacks * refactor: drop runtime legacy session delivery pruning * refactor: store tts prefs only in sqlite * refactor: remove cron store path runtime * refactor: use cron sqlite store keys * refactor: rename telegram message cache scope * refactor: read memory dreaming status from sqlite * refactor: rename cron status store key * refactor: stop remembering transcript file paths * test: use sqlite locators in agent fixtures * refactor: remove file-shaped commitments and cron store surfaces * refactor: keep compaction transcript handles out of session rows * refactor: derive transcript handles from session identity * refactor: derive runtime transcript handles * refactor: remove gateway session locator reads * refactor: remove transcript locator from session rows * refactor: store raw stream diagnostics in sqlite * refactor: remove file-shaped transcript rotation * refactor: hide legacy trajectory paths from runtime * refactor: remove runtime transcript file bridges * refactor: repair database-first rebase fallout * refactor: align tests with database-first state * refactor: remove transcript file handoffs * refactor: sync post-compaction memory by transcript scope * refactor: run codex app-server sessions by id * refactor: bind codex runtime state by session id * refactor: pass memory transcripts by sqlite scope * refactor: remove transcript locator cleanup leftovers * test: remove stale transcript file fixtures * refactor: remove transcript locator test helper * test: make cron sqlite keys explicit * test: remove cron runtime store paths * test: remove stale session file fixtures * test: use sqlite cron keys in diagnostics * refactor: remove runtime delivery queue backfill * test: drop fake export session file mocks * refactor: rename acp session read failure flag * refactor: rename acp row session key * refactor: remove session store test seams * refactor: move legacy session parser tests to doctor * refactor: reindex managed memory in place * refactor: drop stale session store wording * refactor: rename session row helpers * refactor: rename sqlite session entry modules * refactor: remove transcript locator leftovers * refactor: trim file-era audit wording * refactor: clean managed media through sqlite * fix: prefer explicit agent for exports * fix: use prepared agent for session resets * fix: canonicalize legacy codex binding import * test: rename state cleanup helper * docs: align backup docs with sqlite state * refactor: drop legacy Pi usage auth fallback * refactor: move legacy auth profile imports to doctor * refactor: keep Pi model discovery auth in memory * refactor: remove MSTeams legacy learning key fallback * refactor: store model catalog config in sqlite * refactor: use sqlite model catalog at runtime * refactor: remove model json compatibility aliases * refactor: store auth profiles in sqlite * refactor: seed copied auth profiles in sqlite * refactor: make auth profile runtime sqlite-addressed * refactor: migrate hermes secrets into sqlite auth store * refactor: move plugin install config migration to doctor * refactor: rename plugin index audit checks * test: drop auth file assumptions * test: remove legacy transcript file assertions * refactor: drop legacy cli session aliases * refactor: store skill uploads in sqlite * refactor: keep subagent attachments in sqlite vfs * refactor: drop subagent attachment cleanup state * refactor: move legacy session aliases to doctor * refactor: require node 24 for sqlite state runtime * refactor: move provider caches into sqlite state * fix: harden virtual agent filesystem * refactor: enforce database-first runtime state * refactor: rename compaction transcript rotation setting * test: clean sqlite refactor test types * refactor: consolidate sqlite runtime state * refactor: model session conversations in sqlite * refactor: stop deriving cron delivery from session keys * refactor: stop classifying sessions from key shape * refactor: hydrate announce targets from typed delivery * refactor: route heartbeat delivery from typed sqlite context * refactor: tighten typed sqlite session routing * refactor: remove session origin routing shadow * refactor: drop session origin shadow fixtures * perf: query sqlite vfs paths by prefix * refactor: use typed conversation metadata for sessions * refactor: prefer typed session routing metadata * refactor: require typed session routing metadata * refactor: resolve group tool policy from typed sessions * refactor: delete dead session thread info bridge * Show Codex subscription reset times in channel errors (#80456) * feat(plugin-sdk): consolidate session workflow APIs * fix(agents): allow read-only agent mount reads * [codex] refresh plugin regression fixtures * fix(agents): restore compaction gateway logs * test: tighten gateway startup assertions * Redact persisted secret-shaped payloads [AI] (#79006) * test: tighten device pair notify assertions * test: tighten hermes secret assertions * test: assert matrix client error shapes * test: assert config compat warnings * fix(heartbeat): remap cron-run exec events to session keys (#80214) * fix(codex): route btw through native side threads * fix(auth): accept friendly OpenAI order for Codex profiles * fix(codex): rotate auth profiles inside harness * fix: keep browser status page probe within timeout * test: assert agents add outputs * test: pin cron read status * fix(agents): avoid Pi resource discovery stalls Co-authored-by: dataCenter430 <titan032000@gmail.com> * fix: retire timed-out codex app-server clients * test: tighten qa lab runtime assertions * test: check security fix outputs * test: verify extension runtime messages * feat(wake): expose typed sessionKey on wake protocol + system event CLI * fix(gateway): await session_end during shutdown drain and track channel + compaction lifecycle paths (#57790) * test: guard talk consult call helper * fix(codex): scale context engine projection (#80761) * fix(codex): scale context engine projection * fix: document Codex context projection scaling * fix: document Codex context projection scaling * fix: document Codex context projection scaling * fix: document Codex context projection scaling * chore: align Codex projection changelog * chore: realign Codex projection changelog * fix: isolate Codex projection patch --------- Co-authored-by: Eva (agent) <eva+agent-78055@100yen.org> Co-authored-by: Josh Lehman <josh@martian.engineering> * refactor: move agent runtime state toward piless * refactor: remove cron session reaper * refactor: move session management to sqlite * refactor: finish database-first state migration * chore: refresh generated sqlite db types * refactor: remove stale file-backed shims * test: harden kysely type coverage # Conflicts: # .agents/skills/kysely-database-access/SKILL.md # src/infra/kysely-sync.types.test.ts # src/proxy-capture/store.sqlite.test.ts # src/state/openclaw-agent-db.test.ts # src/state/openclaw-state-db.test.ts * refactor: remove cron store path runtime * refactor: keep compaction transcript handles out of session rows * refactor: derive embedded transcripts from sqlite identity * refactor: remove embedded transcript locator handoff * refactor: remove runtime transcript file bridges * refactor: remove transcript file handoffs * refactor: remove MSTeams legacy learning key fallback * refactor: store model catalog config in sqlite * refactor: use sqlite model catalog at runtime # Conflicts: # docs/cli/secrets.md # docs/gateway/authentication.md # docs/gateway/secrets.md * fix: keep oauth sibling sync sqlite-local # Conflicts: # src/commands/onboard-auth.test.ts * refactor: remove task session store maintenance # Conflicts: # src/commands/tasks.ts * refactor: keep diagnostics in state sqlite * refactor: enforce database-first runtime state * refactor: consolidate sqlite runtime state * Show Codex subscription reset times in channel errors (#80456) * fix(codex): refresh subscription limit resets * fix(codex): format reset times for channels * Update CHANGELOG with latest changes and fixes Updated CHANGELOG with recent fixes and improvements. * fix(codex): keep command load failures on codex surface * fix(codex): format account rate limits as rows * fix(codex): summarize account limits as usage status * fix(codex): simplify account limit status * test: tighten subagent announce queue assertion * test: tighten session delete lifecycle assertions * test: tighten cron ops assertions * fix: track cron execution milestones * test: tighten hermes secret assertions * test: assert matrix sync store payloads * test: assert config compat warnings * fix(codex): align btw side thread semantics * fix(codex): honor codex fallback blocking * fix(agents): avoid Pi resource discovery stalls * test: tighten codex event assertions * test: tighten cron assertions * Fix Codex app-server OAuth harness auth * refactor: move agent runtime state toward piless * refactor: move device and push state to sqlite * refactor: move runtime json state imports to doctor * refactor: finish database-first state migration * chore: refresh generated sqlite db types * refactor: clarify cron sqlite store keys * refactor: remove stale file-backed shims * refactor: bind codex runtime state by session id * test: expect sqlite trajectory branch export * refactor: rename session row helpers * fix: keep legacy device identity import in doctor * refactor: enforce database-first runtime state * refactor: consolidate sqlite runtime state * build: align pi contract wrappers * chore: repair database-first rebase * refactor: remove session file test contracts * test: update gateway session expectations * refactor: stop routing from session compatibility shadows * refactor: stop persisting session route shadows * refactor: use typed delivery context in clients * refactor: stop echoing session route shadows * refactor: repair embedded runner rebase imports # Conflicts: # src/agents/pi-embedded-runner/run/attempt.tool-call-argument-repair.ts * refactor: align pi contract imports * refactor: satisfy kysely sync helper guard * refactor: remove file transcript bridge remnants * refactor: remove session locator compatibility * refactor: remove session file test contracts * refactor: keep rebase database-first clean * refactor: remove session file assumptions from e2e * docs: clarify database-first goal state * test: remove legacy store markers from sqlite runtime tests * refactor: remove legacy store assumptions from runtime seams * refactor: align sqlite runtime helper seams * test: update memory recall sqlite audit mock * refactor: align database-first runtime type seams * test: clarify doctor cron legacy store names * fix: preserve sqlite session route projections * test: fix copilot token cache test syntax * docs: update database-first proof status * test: align database-first test fixtures * docs: update database-first proof status * refactor: clean extension database-first drift * test: align agent session route proof * test: clarify doctor legacy path fixtures * chore: clean database-first changed checks * chore: repair database-first rebase markers * build: allow baileys git subdependency * chore: repair exp-vfs rebase drift * chore: finish exp-vfs rebase cleanup * chore: satisfy rebase lint drift * chore: fix qqbot rebase type seam * chore: fix rebase drift leftovers * fix: keep auth profile oauth secrets out of sqlite * fix: repair rebase drift tests * test: stabilize pairing request ordering * test: use source manifests in plugin contract checks * fix: restore gateway session metadata after rebase * fix: repair database-first rebase drift * fix: clean up database-first rebase fallout * test: stabilize line quick reply receipt time * fix: repair extension rebase drift * test: keep transcript redaction tests sqlite-backed * fix: carry injected transcript redaction through sqlite * chore: clean database branch rebase residue * fix: repair database branch CI drift * fix: repair database branch CI guard drift * fix: stabilize oauth tls preflight test * test: align database branch fast guards * test: repair build artifact boundary guards * chore: clean changelog rebase markers --------- Co-authored-by: pashpashpash <nik@vault77.ai> Co-authored-by: Eva <eva@100yen.org> Co-authored-by: stainlu <stainlu@newtype-ai.org> Co-authored-by: Jason Zhou <jason.zhou.design@gmail.com> Co-authored-by: Ruben Cuevas <hi@rubencu.com> Co-authored-by: Pavan Kumar Gondhi <pavangondhi@gmail.com> Co-authored-by: Shakker <shakkerdroid@gmail.com> Co-authored-by: Kaspre <36520309+Kaspre@users.noreply.github.com> Co-authored-by: dataCenter430 <titan032000@gmail.com> Co-authored-by: Kaspre <kaspre@gmail.com> Co-authored-by: pandadev66 <nova.full.stack@outlook.com> Co-authored-by: Eva <admin@100yen.org> Co-authored-by: Eva (agent) <eva+agent-78055@100yen.org> Co-authored-by: Josh Lehman <josh@martian.engineering> Co-authored-by: jeffjhunter <support@aipersonamethod.com>
561 lines
16 KiB
TypeScript
561 lines
16 KiB
TypeScript
import fs from "node:fs/promises";
|
|
import net from "node:net";
|
|
import os from "node:os";
|
|
import path from "node:path";
|
|
import process from "node:process";
|
|
import { pathToFileURL } from "node:url";
|
|
import { startQaMockOpenAiServer } from "../extensions/qa-lab/src/providers/mock-openai/server.js";
|
|
import { stageQaMockAuthProfiles } from "../extensions/qa-lab/src/providers/shared/mock-auth.js";
|
|
import { buildQaGatewayConfig } from "../extensions/qa-lab/src/qa-gateway-config.js";
|
|
import { resetConfigRuntimeState } from "../src/config/config.js";
|
|
import {
|
|
listSqliteSessionTranscripts,
|
|
loadSqliteSessionTranscriptEvents,
|
|
} from "../src/config/sessions/transcript-store.sqlite.js";
|
|
import { startGatewayServer } from "../src/gateway/server.js";
|
|
|
|
type Lane = "normal" | "code";
|
|
|
|
type LaneResult = {
|
|
lane: Lane;
|
|
status: string;
|
|
providerRequestCount: number;
|
|
providerRawBytes: number;
|
|
providerSystemPromptChars: number;
|
|
providerDeclaredToolCount: number;
|
|
providerPlannedTools: string[];
|
|
gatewayOutputToolNames: string[];
|
|
gatewayOutputText: string;
|
|
transcriptToolMentions: Record<string, number>;
|
|
};
|
|
|
|
const FAKE_PLUGIN_ID = "tool-search-e2e-fixture";
|
|
|
|
function assert(condition: unknown, message: string): asserts condition {
|
|
if (!condition) {
|
|
throw new Error(message);
|
|
}
|
|
}
|
|
|
|
async function freePort(): Promise<number> {
|
|
return await new Promise((resolve, reject) => {
|
|
const server = net.createServer();
|
|
server.once("error", reject);
|
|
server.listen(0, "127.0.0.1", () => {
|
|
const address = server.address();
|
|
const port = typeof address === "object" && address ? address.port : 0;
|
|
server.close((error) => (error ? reject(error) : resolve(port)));
|
|
});
|
|
});
|
|
}
|
|
|
|
function buildFakeTools(count = 36) {
|
|
return Array.from({ length: count }, (_, index) => {
|
|
const id = `fake_plugin_tool_${String(index + 1).padStart(2, "0")}`;
|
|
return {
|
|
type: "function",
|
|
name: id,
|
|
description: [
|
|
`Fake plugin tool ${index + 1}.`,
|
|
"Used by the Tool Search gateway E2E to prove a large plugin-owned tool catalog can be hidden from the model prompt and still called through the compact bridge.",
|
|
"The description is intentionally non-trivial so prompt-size regression is measurable.",
|
|
].join(" "),
|
|
parameters: {
|
|
type: "object",
|
|
properties: {
|
|
marker: {
|
|
type: "string",
|
|
description: "Lane marker supplied by the scripted model.",
|
|
},
|
|
},
|
|
required: ["marker"],
|
|
additionalProperties: false,
|
|
},
|
|
strict: true,
|
|
};
|
|
});
|
|
}
|
|
|
|
function countOccurrences(haystack: string, needle: string): number {
|
|
if (!needle) {
|
|
return 0;
|
|
}
|
|
let count = 0;
|
|
let offset = 0;
|
|
while (true) {
|
|
const next = haystack.indexOf(needle, offset);
|
|
if (next < 0) {
|
|
return count;
|
|
}
|
|
count += 1;
|
|
offset = next + needle.length;
|
|
}
|
|
}
|
|
|
|
function stringifyTranscriptEvent(event: unknown): string {
|
|
try {
|
|
return JSON.stringify(event);
|
|
} catch {
|
|
return "";
|
|
}
|
|
}
|
|
|
|
async function readSqliteTranscriptMentions(params: {
|
|
stateDir: string;
|
|
targetTool: string;
|
|
}): Promise<Record<string, number>> {
|
|
const mentions: Record<string, number> = {
|
|
tool_search_code: 0,
|
|
[params.targetTool]: 0,
|
|
};
|
|
const env = { ...process.env, OPENCLAW_STATE_DIR: params.stateDir };
|
|
for (const transcript of listSqliteSessionTranscripts({ env, agentId: "qa" })) {
|
|
for (const entry of loadSqliteSessionTranscriptEvents({
|
|
env,
|
|
agentId: transcript.agentId,
|
|
sessionId: transcript.sessionId,
|
|
})) {
|
|
const raw = stringifyTranscriptEvent(entry.event);
|
|
mentions.tool_search_code += countOccurrences(raw, "tool_search_code");
|
|
mentions[params.targetTool] += countOccurrences(raw, params.targetTool);
|
|
}
|
|
}
|
|
return mentions;
|
|
}
|
|
|
|
async function fetchJson(url: string, init?: RequestInit): Promise<unknown> {
|
|
const response = await fetch(url, init);
|
|
const text = await response.text();
|
|
let parsed: unknown;
|
|
try {
|
|
parsed = text ? JSON.parse(text) : {};
|
|
} catch {
|
|
parsed = text;
|
|
}
|
|
if (!response.ok) {
|
|
throw new Error(`HTTP ${response.status} from ${url}: ${text}`);
|
|
}
|
|
return parsed;
|
|
}
|
|
|
|
function outputToolNames(response: unknown): string[] {
|
|
const output = (response as { output?: Array<{ type?: unknown; name?: unknown }> }).output;
|
|
if (!Array.isArray(output)) {
|
|
return [];
|
|
}
|
|
return output
|
|
.filter((item) => item.type === "function_call" && typeof item.name === "string")
|
|
.map((item) => item.name as string);
|
|
}
|
|
|
|
function outputText(response: unknown): string {
|
|
const output = (response as { output?: Array<{ type?: unknown; content?: unknown }> }).output;
|
|
if (!Array.isArray(output)) {
|
|
return "";
|
|
}
|
|
return output
|
|
.flatMap((item) => {
|
|
if (item.type !== "message" || !Array.isArray(item.content)) {
|
|
return [];
|
|
}
|
|
return item.content.flatMap((piece) => {
|
|
if (!piece || typeof piece !== "object") {
|
|
return [];
|
|
}
|
|
const record = piece as { text?: unknown };
|
|
return typeof record.text === "string" ? [record.text] : [];
|
|
});
|
|
})
|
|
.join("\n");
|
|
}
|
|
|
|
function readContentText(content: unknown): string {
|
|
if (typeof content === "string") {
|
|
return content;
|
|
}
|
|
if (!Array.isArray(content)) {
|
|
return "";
|
|
}
|
|
return content
|
|
.map((item) => {
|
|
if (!item || typeof item !== "object") {
|
|
return "";
|
|
}
|
|
const record = item as { type?: unknown; text?: unknown };
|
|
return typeof record.text === "string" ? record.text : "";
|
|
})
|
|
.join("\n");
|
|
}
|
|
|
|
function countSystemPromptChars(body: unknown): number {
|
|
if (!body || typeof body !== "object") {
|
|
return 0;
|
|
}
|
|
const record = body as { instructions?: unknown; input?: unknown };
|
|
let total = typeof record.instructions === "string" ? record.instructions.length : 0;
|
|
if (Array.isArray(record.input)) {
|
|
for (const item of record.input) {
|
|
if (!item || typeof item !== "object") {
|
|
continue;
|
|
}
|
|
const inputRecord = item as { role?: unknown; content?: unknown };
|
|
if (inputRecord.role === "system" || inputRecord.role === "developer") {
|
|
total += readContentText(inputRecord.content).length;
|
|
}
|
|
}
|
|
}
|
|
return total;
|
|
}
|
|
|
|
async function writeConfig(params: {
|
|
lane: Lane;
|
|
stateDir: string;
|
|
configPath: string;
|
|
workspaceDir: string;
|
|
gatewayPort: number;
|
|
providerBaseUrl: string;
|
|
fakePluginDir: string;
|
|
}) {
|
|
let cfg = buildQaGatewayConfig({
|
|
bind: "loopback",
|
|
gatewayPort: params.gatewayPort,
|
|
gatewayToken: "tool-search-e2e",
|
|
providerBaseUrl: `${params.providerBaseUrl}/v1`,
|
|
workspaceDir: params.workspaceDir,
|
|
controlUiEnabled: false,
|
|
providerMode: "mock-openai",
|
|
});
|
|
cfg = {
|
|
...cfg,
|
|
tools: {
|
|
...cfg.tools,
|
|
alsoAllow: [...new Set([...(cfg.tools?.alsoAllow ?? []), FAKE_PLUGIN_ID])],
|
|
},
|
|
};
|
|
if (params.lane === "code") {
|
|
cfg = {
|
|
...cfg,
|
|
tools: {
|
|
...cfg.tools,
|
|
alsoAllow: [
|
|
...new Set([
|
|
...(cfg.tools?.alsoAllow ?? []),
|
|
"tool_search_code",
|
|
"tool_search",
|
|
"tool_describe",
|
|
"tool_call",
|
|
]),
|
|
],
|
|
toolSearch: true,
|
|
},
|
|
plugins: {
|
|
...cfg.plugins,
|
|
allow: [...new Set([...(cfg.plugins?.allow ?? []), FAKE_PLUGIN_ID])],
|
|
entries: {
|
|
...cfg.plugins?.entries,
|
|
[FAKE_PLUGIN_ID]: {
|
|
enabled: true,
|
|
},
|
|
},
|
|
},
|
|
};
|
|
} else {
|
|
cfg = {
|
|
...cfg,
|
|
plugins: {
|
|
...cfg.plugins,
|
|
allow: [...new Set([...(cfg.plugins?.allow ?? []), FAKE_PLUGIN_ID])],
|
|
entries: {
|
|
...cfg.plugins?.entries,
|
|
[FAKE_PLUGIN_ID]: {
|
|
enabled: true,
|
|
},
|
|
},
|
|
},
|
|
};
|
|
}
|
|
cfg = {
|
|
...cfg,
|
|
plugins: {
|
|
...cfg.plugins,
|
|
load: {
|
|
...cfg.plugins?.load,
|
|
paths: [...new Set([...(cfg.plugins?.load?.paths ?? []), params.fakePluginDir])],
|
|
},
|
|
},
|
|
};
|
|
cfg = await stageQaMockAuthProfiles({
|
|
cfg,
|
|
stateDir: params.stateDir,
|
|
agentIds: ["qa"],
|
|
providers: ["mock-openai", "openai", "anthropic"],
|
|
});
|
|
cfg = {
|
|
...cfg,
|
|
gateway: {
|
|
...cfg.gateway,
|
|
http: {
|
|
endpoints: {
|
|
responses: {
|
|
enabled: true,
|
|
},
|
|
},
|
|
},
|
|
},
|
|
};
|
|
await fs.mkdir(path.dirname(params.configPath), { recursive: true });
|
|
await fs.writeFile(params.configPath, `${JSON.stringify(cfg, null, 2)}\n`, "utf8");
|
|
}
|
|
|
|
async function writeFakePlugin(params: {
|
|
rootDir: string;
|
|
repoRoot: string;
|
|
fakeTools: ReturnType<typeof buildFakeTools>;
|
|
}): Promise<string> {
|
|
const pluginDir = path.join(params.rootDir, "fake-plugin");
|
|
await fs.mkdir(pluginDir, { recursive: true });
|
|
await fs.writeFile(
|
|
path.join(pluginDir, "package.json"),
|
|
`${JSON.stringify(
|
|
{
|
|
name: "@openclaw/tool-search-e2e-fixture",
|
|
version: "0.0.0",
|
|
type: "module",
|
|
openclaw: {
|
|
extensions: ["./index.js"],
|
|
},
|
|
},
|
|
null,
|
|
2,
|
|
)}\n`,
|
|
"utf8",
|
|
);
|
|
await fs.writeFile(
|
|
path.join(pluginDir, "openclaw.plugin.json"),
|
|
`${JSON.stringify(
|
|
{
|
|
id: FAKE_PLUGIN_ID,
|
|
activation: {
|
|
onStartup: true,
|
|
},
|
|
name: "Tool Search E2E Fixture",
|
|
description: "Fake plugin with a large tool catalog for Tool Search gateway validation.",
|
|
contracts: {
|
|
tools: params.fakeTools.map((tool) => tool.name),
|
|
},
|
|
configSchema: {
|
|
type: "object",
|
|
additionalProperties: false,
|
|
properties: {},
|
|
},
|
|
},
|
|
null,
|
|
2,
|
|
)}\n`,
|
|
"utf8",
|
|
);
|
|
const pluginEntryUrl = pathToFileURL(
|
|
path.join(params.repoRoot, "src/plugin-sdk/plugin-entry.ts"),
|
|
).href;
|
|
await fs.writeFile(
|
|
path.join(pluginDir, "index.js"),
|
|
[
|
|
`import { definePluginEntry } from ${JSON.stringify(pluginEntryUrl)};`,
|
|
`const tools = ${JSON.stringify(params.fakeTools, null, 2)};`,
|
|
"export default definePluginEntry({",
|
|
` id: ${JSON.stringify(FAKE_PLUGIN_ID)},`,
|
|
" name: 'Tool Search E2E Fixture',",
|
|
" register(api) {",
|
|
" for (const spec of tools) {",
|
|
" api.registerTool({",
|
|
" name: spec.name,",
|
|
" label: spec.name,",
|
|
" description: spec.description,",
|
|
" parameters: spec.parameters,",
|
|
" execute: async (_toolCallId, input) => ({",
|
|
" content: [{ type: 'text', text: `FAKE_PLUGIN_OK ${spec.name} ${JSON.stringify(input ?? {})}` }],",
|
|
" details: { status: 'ok', tool: spec.name, input },",
|
|
" }),",
|
|
" }, { name: spec.name });",
|
|
" }",
|
|
" },",
|
|
"});",
|
|
"",
|
|
].join("\n"),
|
|
"utf8",
|
|
);
|
|
return pluginDir;
|
|
}
|
|
|
|
async function runLane(params: {
|
|
lane: Lane;
|
|
rootDir: string;
|
|
providerBaseUrl: string;
|
|
targetTool: string;
|
|
fakeTools: ReturnType<typeof buildFakeTools>;
|
|
fakePluginDir: string;
|
|
}): Promise<LaneResult> {
|
|
const stateDir = path.join(params.rootDir, params.lane, "state");
|
|
const configPath = path.join(stateDir, "openclaw.json");
|
|
const workspaceDir = path.join(params.rootDir, params.lane, "workspace");
|
|
const gatewayPort = await freePort();
|
|
await fs.mkdir(workspaceDir, { recursive: true });
|
|
await writeConfig({
|
|
lane: params.lane,
|
|
stateDir,
|
|
configPath,
|
|
workspaceDir,
|
|
gatewayPort,
|
|
providerBaseUrl: params.providerBaseUrl,
|
|
fakePluginDir: params.fakePluginDir,
|
|
});
|
|
|
|
process.env.OPENCLAW_STATE_DIR = stateDir;
|
|
process.env.OPENCLAW_CONFIG_PATH = configPath;
|
|
process.env.OPENCLAW_TEST_FAST = "1";
|
|
resetConfigRuntimeState();
|
|
|
|
const server = await startGatewayServer(gatewayPort, {
|
|
host: "127.0.0.1",
|
|
auth: { mode: "none" },
|
|
controlUiEnabled: false,
|
|
openResponsesEnabled: true,
|
|
});
|
|
try {
|
|
const beforeRequests = (await fetchJson(
|
|
`${params.providerBaseUrl}/debug/requests`,
|
|
)) as unknown[];
|
|
const response = await fetchJson(`http://127.0.0.1:${gatewayPort}/v1/responses`, {
|
|
method: "POST",
|
|
headers: {
|
|
"content-type": "application/json",
|
|
"x-openclaw-scopes": "operator.write",
|
|
"x-openclaw-agent": "qa",
|
|
},
|
|
body: JSON.stringify({
|
|
model: "openclaw/qa",
|
|
input: [
|
|
{
|
|
type: "message",
|
|
role: "user",
|
|
content: [
|
|
{
|
|
type: "input_text",
|
|
text: `tool search qa check target=${params.targetTool}`,
|
|
},
|
|
],
|
|
},
|
|
],
|
|
max_output_tokens: 256,
|
|
stream: false,
|
|
}),
|
|
});
|
|
const requests = (await fetchJson(`${params.providerBaseUrl}/debug/requests`)) as Array<{
|
|
raw?: string;
|
|
body?: { tools?: unknown[] };
|
|
instructions?: string;
|
|
plannedToolName?: string;
|
|
}>;
|
|
const laneRequests = requests.slice(beforeRequests.length);
|
|
const lastRequest = laneRequests.at(-1) ?? {};
|
|
const responseStatus = (response as { status?: unknown }).status;
|
|
return {
|
|
lane: params.lane,
|
|
status: typeof responseStatus === "string" ? responseStatus : "",
|
|
providerRequestCount: laneRequests.length,
|
|
providerRawBytes: typeof lastRequest.raw === "string" ? lastRequest.raw.length : 0,
|
|
providerSystemPromptChars: countSystemPromptChars(lastRequest.body),
|
|
providerDeclaredToolCount: Array.isArray(lastRequest.body?.tools)
|
|
? lastRequest.body.tools.length
|
|
: 0,
|
|
providerPlannedTools: laneRequests
|
|
.map((request) => request.plannedToolName)
|
|
.filter((name): name is string => typeof name === "string"),
|
|
gatewayOutputToolNames: outputToolNames(response),
|
|
gatewayOutputText: outputText(response),
|
|
transcriptToolMentions: await readSqliteTranscriptMentions({
|
|
stateDir,
|
|
targetTool: params.targetTool,
|
|
}),
|
|
};
|
|
} finally {
|
|
await server.close({ reason: `${params.lane} lane complete` });
|
|
resetConfigRuntimeState();
|
|
}
|
|
}
|
|
|
|
async function main() {
|
|
const rootDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-tool-search-"));
|
|
const provider = await startQaMockOpenAiServer();
|
|
const fakeTools = buildFakeTools();
|
|
const fakePluginDir = await writeFakePlugin({
|
|
rootDir,
|
|
repoRoot: process.cwd(),
|
|
fakeTools,
|
|
});
|
|
const targetTool = "fake_plugin_tool_17";
|
|
try {
|
|
const normal = await runLane({
|
|
lane: "normal",
|
|
rootDir,
|
|
providerBaseUrl: provider.baseUrl,
|
|
targetTool,
|
|
fakeTools,
|
|
fakePluginDir,
|
|
});
|
|
const code = await runLane({
|
|
lane: "code",
|
|
rootDir,
|
|
providerBaseUrl: provider.baseUrl,
|
|
targetTool,
|
|
fakeTools,
|
|
fakePluginDir,
|
|
});
|
|
|
|
assert(
|
|
normal.providerPlannedTools.includes(targetTool) &&
|
|
normal.gatewayOutputText.includes("FAKE_PLUGIN_OK") &&
|
|
normal.gatewayOutputText.includes(targetTool),
|
|
`normal lane did not call ${targetTool}`,
|
|
);
|
|
assert(
|
|
code.providerPlannedTools.includes("tool_search_code") &&
|
|
code.gatewayOutputText.includes(targetTool) &&
|
|
code.transcriptToolMentions[targetTool] > 0,
|
|
`code lane did not bridge-call ${targetTool}`,
|
|
);
|
|
assert(
|
|
normal.providerDeclaredToolCount > code.providerDeclaredToolCount,
|
|
`expected Tool Search to expose fewer tools to provider: normal=${normal.providerDeclaredToolCount} code=${code.providerDeclaredToolCount}`,
|
|
);
|
|
assert(
|
|
normal.providerRawBytes > code.providerRawBytes,
|
|
`expected Tool Search request to be smaller: normal=${normal.providerRawBytes} code=${code.providerRawBytes}`,
|
|
);
|
|
assert(
|
|
code.transcriptToolMentions.tool_search_code > 0 &&
|
|
code.transcriptToolMentions[targetTool] > 0,
|
|
"code lane SQLite transcript did not record bridge and target tool mentions",
|
|
);
|
|
|
|
const summary = {
|
|
ok: true,
|
|
rootDir,
|
|
targetTool,
|
|
normal,
|
|
code,
|
|
reduction: {
|
|
providerRawBytes: normal.providerRawBytes - code.providerRawBytes,
|
|
providerDeclaredTools: normal.providerDeclaredToolCount - code.providerDeclaredToolCount,
|
|
providerSystemPromptChars:
|
|
normal.providerSystemPromptChars - code.providerSystemPromptChars,
|
|
},
|
|
};
|
|
process.stdout.write(`${JSON.stringify(summary, null, 2)}\n`);
|
|
} finally {
|
|
await provider.stop();
|
|
}
|
|
}
|
|
|
|
await main();
|