openclaw/scripts/tool-search-gateway-e2e.ts

import fs from "node:fs/promises";
import net from "node:net";
import os from "node:os";
import path from "node:path";
import process from "node:process";
import { pathToFileURL } from "node:url";
import { startQaMockOpenAiServer } from "../extensions/qa-lab/src/providers/mock-openai/server.js";
import { stageQaMockAuthProfiles } from "../extensions/qa-lab/src/providers/shared/mock-auth.js";
import { buildQaGatewayConfig } from "../extensions/qa-lab/src/qa-gateway-config.js";
import { resetConfigRuntimeState } from "../src/config/config.js";
import {
  listSqliteSessionTranscripts,
  loadSqliteSessionTranscriptEvents,
} from "../src/config/sessions/transcript-store.sqlite.js";
import { startGatewayServer } from "../src/gateway/server.js";

type Lane = "normal" | "code";

type LaneResult = {
  lane: Lane;
  status: string;
  providerRequestCount: number;
  providerRawBytes: number;
  providerSystemPromptChars: number;
  providerDeclaredToolCount: number;
  providerPlannedTools: string[];
  gatewayOutputToolNames: string[];
  gatewayOutputText: string;
  transcriptToolMentions: Record<string, number>;
};

const FAKE_PLUGIN_ID = "tool-search-e2e-fixture";

function assert(condition: unknown, message: string): asserts condition {
  if (!condition) {
    throw new Error(message);
  }
}

async function freePort(): Promise<number> {
  return await new Promise((resolve, reject) => {
    const server = net.createServer();
    server.once("error", reject);
    server.listen(0, "127.0.0.1", () => {
      const address = server.address();
      const port = typeof address === "object" && address ? address.port : 0;
      server.close((error) => (error ? reject(error) : resolve(port)));
    });
  });
}

function buildFakeTools(count = 36) {
  return Array.from({ length: count }, (_, index) => {
    const id = `fake_plugin_tool_${String(index + 1).padStart(2, "0")}`;
    return {
      type: "function",
      name: id,
      description: [
        `Fake plugin tool ${index + 1}.`,
        "Used by the Tool Search gateway E2E to prove a large plugin-owned tool catalog can be hidden from the model prompt and still called through the compact bridge.",
        "The description is intentionally non-trivial so prompt-size regression is measurable.",
      ].join(" "),
      parameters: {
        type: "object",
        properties: {
          marker: {
            type: "string",
            description: "Lane marker supplied by the scripted model.",
          },
        },
        required: ["marker"],
        additionalProperties: false,
      },
      strict: true,
    };
  });
}

function countOccurrences(haystack: string, needle: string): number {
  if (!needle) {
    return 0;
  }
  let count = 0;
  let offset = 0;
  while (true) {
    const next = haystack.indexOf(needle, offset);
    if (next < 0) {
      return count;
    }
    count += 1;
    offset = next + needle.length;
  }
}

function stringifyTranscriptEvent(event: unknown): string {
  try {
    return JSON.stringify(event);
  } catch {
    return "";
  }
}

async function readSqliteTranscriptMentions(params: {
  stateDir: string;
  targetTool: string;
}): Promise<Record<string, number>> {
  const mentions: Record<string, number> = {
    tool_search_code: 0,
    [params.targetTool]: 0,
  };
  const env = { ...process.env, OPENCLAW_STATE_DIR: params.stateDir };
  for (const transcript of listSqliteSessionTranscripts({ env, agentId: "qa" })) {
    for (const entry of loadSqliteSessionTranscriptEvents({
      env,
      agentId: transcript.agentId,
      sessionId: transcript.sessionId,
    })) {
      const raw = stringifyTranscriptEvent(entry.event);
      mentions.tool_search_code += countOccurrences(raw, "tool_search_code");
      mentions[params.targetTool] += countOccurrences(raw, params.targetTool);
    }
  }
  return mentions;
}

async function fetchJson(url: string, init?: RequestInit): Promise<unknown> {
  const response = await fetch(url, init);
  const text = await response.text();
  let parsed: unknown;
  try {
    parsed = text ? JSON.parse(text) : {};
  } catch {
    parsed = text;
  }
  if (!response.ok) {
    throw new Error(`HTTP ${response.status} from ${url}: ${text}`);
  }
  return parsed;
}

function outputToolNames(response: unknown): string[] {
  const output = (response as { output?: Array<{ type?: unknown; name?: unknown }> }).output;
  if (!Array.isArray(output)) {
    return [];
  }
  return output
    .filter((item) => item.type === "function_call" && typeof item.name === "string")
    .map((item) => item.name as string);
}

function outputText(response: unknown): string {
  const output = (response as { output?: Array<{ type?: unknown; content?: unknown }> }).output;
  if (!Array.isArray(output)) {
    return "";
  }
  return output
    .flatMap((item) => {
      if (item.type !== "message" || !Array.isArray(item.content)) {
        return [];
      }
      return item.content.flatMap((piece) => {
        if (!piece || typeof piece !== "object") {
          return [];
        }
        const record = piece as { text?: unknown };
        return typeof record.text === "string" ? [record.text] : [];
      });
    })
    .join("\n");
}

function readContentText(content: unknown): string {
  if (typeof content === "string") {
    return content;
  }
  if (!Array.isArray(content)) {
    return "";
  }
  return content
    .map((item) => {
      if (!item || typeof item !== "object") {
        return "";
      }
      const record = item as { type?: unknown; text?: unknown };
      return typeof record.text === "string" ? record.text : "";
    })
    .join("\n");
}

function countSystemPromptChars(body: unknown): number {
  if (!body || typeof body !== "object") {
    return 0;
  }
  const record = body as { instructions?: unknown; input?: unknown };
  let total = typeof record.instructions === "string" ? record.instructions.length : 0;
  if (Array.isArray(record.input)) {
    for (const item of record.input) {
      if (!item || typeof item !== "object") {
        continue;
      }
      const inputRecord = item as { role?: unknown; content?: unknown };
      if (inputRecord.role === "system" || inputRecord.role === "developer") {
        total += readContentText(inputRecord.content).length;
      }
    }
  }
  return total;
}

async function writeConfig(params: {
  lane: Lane;
  stateDir: string;
  configPath: string;
  workspaceDir: string;
  gatewayPort: number;
  providerBaseUrl: string;
  fakePluginDir: string;
}) {
  let cfg = buildQaGatewayConfig({
    bind: "loopback",
    gatewayPort: params.gatewayPort,
    gatewayToken: "tool-search-e2e",
    providerBaseUrl: `${params.providerBaseUrl}/v1`,
    workspaceDir: params.workspaceDir,
    controlUiEnabled: false,
    providerMode: "mock-openai",
  });
  cfg = {
    ...cfg,
    tools: {
      ...cfg.tools,
      alsoAllow: [...new Set([...(cfg.tools?.alsoAllow ?? []), FAKE_PLUGIN_ID])],
    },
  };
  if (params.lane === "code") {
    cfg = {
      ...cfg,
      tools: {
        ...cfg.tools,
        alsoAllow: [
          ...new Set([
            ...(cfg.tools?.alsoAllow ?? []),
            "tool_search_code",
            "tool_search",
            "tool_describe",
            "tool_call",
          ]),
        ],
        toolSearch: true,
      },
      plugins: {
        ...cfg.plugins,
        allow: [...new Set([...(cfg.plugins?.allow ?? []), FAKE_PLUGIN_ID])],
        entries: {
          ...cfg.plugins?.entries,
          [FAKE_PLUGIN_ID]: {
            enabled: true,
          },
        },
      },
    };
  } else {
    cfg = {
      ...cfg,
      plugins: {
        ...cfg.plugins,
        allow: [...new Set([...(cfg.plugins?.allow ?? []), FAKE_PLUGIN_ID])],
        entries: {
          ...cfg.plugins?.entries,
          [FAKE_PLUGIN_ID]: {
            enabled: true,
          },
        },
      },
    };
  }
  cfg = {
    ...cfg,
    plugins: {
      ...cfg.plugins,
      load: {
        ...cfg.plugins?.load,
        paths: [...new Set([...(cfg.plugins?.load?.paths ?? []), params.fakePluginDir])],
      },
    },
  };
  cfg = await stageQaMockAuthProfiles({
    cfg,
    stateDir: params.stateDir,
    agentIds: ["qa"],
    providers: ["mock-openai", "openai", "anthropic"],
  });
  cfg = {
    ...cfg,
    gateway: {
      ...cfg.gateway,
      http: {
        endpoints: {
          responses: {
            enabled: true,
          },
        },
      },
    },
  };
  await fs.mkdir(path.dirname(params.configPath), { recursive: true });
  await fs.writeFile(params.configPath, `${JSON.stringify(cfg, null, 2)}\n`, "utf8");
}

async function writeFakePlugin(params: {
  rootDir: string;
  repoRoot: string;
  fakeTools: ReturnType<typeof buildFakeTools>;
}): Promise<string> {
  const pluginDir = path.join(params.rootDir, "fake-plugin");
  await fs.mkdir(pluginDir, { recursive: true });
  await fs.writeFile(
    path.join(pluginDir, "package.json"),
    `${JSON.stringify(
      {
        name: "@openclaw/tool-search-e2e-fixture",
        version: "0.0.0",
        type: "module",
        openclaw: {
          extensions: ["./index.js"],
        },
      },
      null,
      2,
    )}\n`,
    "utf8",
  );
  await fs.writeFile(
    path.join(pluginDir, "openclaw.plugin.json"),
    `${JSON.stringify(
      {
        id: FAKE_PLUGIN_ID,
        activation: {
          onStartup: true,
        },
        name: "Tool Search E2E Fixture",
        description: "Fake plugin with a large tool catalog for Tool Search gateway validation.",
        contracts: {
          tools: params.fakeTools.map((tool) => tool.name),
        },
        configSchema: {
          type: "object",
          additionalProperties: false,
          properties: {},
        },
      },
      null,
      2,
    )}\n`,
    "utf8",
  );
  const pluginEntryUrl = pathToFileURL(
    path.join(params.repoRoot, "src/plugin-sdk/plugin-entry.ts"),
  ).href;
  await fs.writeFile(
    path.join(pluginDir, "index.js"),
    [
      `import { definePluginEntry } from ${JSON.stringify(pluginEntryUrl)};`,
      `const tools = ${JSON.stringify(params.fakeTools, null, 2)};`,
      "export default definePluginEntry({",
      `  id: ${JSON.stringify(FAKE_PLUGIN_ID)},`,
      "  name: 'Tool Search E2E Fixture',",
      "  register(api) {",
      "    for (const spec of tools) {",
      "      api.registerTool({",
      "        name: spec.name,",
      "        label: spec.name,",
      "        description: spec.description,",
      "        parameters: spec.parameters,",
      "        execute: async (_toolCallId, input) => ({",
      "          content: [{ type: 'text', text: `FAKE_PLUGIN_OK ${spec.name} ${JSON.stringify(input ?? {})}` }],",
      "          details: { status: 'ok', tool: spec.name, input },",
      "        }),",
      "      }, { name: spec.name });",
      "    }",
      "  },",
      "});",
      "",
    ].join("\n"),
    "utf8",
  );
  return pluginDir;
}

async function runLane(params: {
  lane: Lane;
  rootDir: string;
  providerBaseUrl: string;
  targetTool: string;
  fakeTools: ReturnType<typeof buildFakeTools>;
  fakePluginDir: string;
}): Promise<LaneResult> {
  const stateDir = path.join(params.rootDir, params.lane, "state");
  const configPath = path.join(stateDir, "openclaw.json");
  const workspaceDir = path.join(params.rootDir, params.lane, "workspace");
  const gatewayPort = await freePort();
  await fs.mkdir(workspaceDir, { recursive: true });
  await writeConfig({
    lane: params.lane,
    stateDir,
    configPath,
    workspaceDir,
    gatewayPort,
    providerBaseUrl: params.providerBaseUrl,
    fakePluginDir: params.fakePluginDir,
  });

  process.env.OPENCLAW_STATE_DIR = stateDir;
  process.env.OPENCLAW_CONFIG_PATH = configPath;
  process.env.OPENCLAW_TEST_FAST = "1";
  resetConfigRuntimeState();

  const server = await startGatewayServer(gatewayPort, {
    host: "127.0.0.1",
    auth: { mode: "none" },
    controlUiEnabled: false,
    openResponsesEnabled: true,
  });
  try {
    const beforeRequests = (await fetchJson(
      `${params.providerBaseUrl}/debug/requests`,
    )) as unknown[];
    const response = await fetchJson(`http://127.0.0.1:${gatewayPort}/v1/responses`, {
      method: "POST",
      headers: {
        "content-type": "application/json",
        "x-openclaw-scopes": "operator.write",
        "x-openclaw-agent": "qa",
      },
      body: JSON.stringify({
        model: "openclaw/qa",
        input: [
          {
            type: "message",
            role: "user",
            content: [
              {
                type: "input_text",
                text: `tool search qa check target=${params.targetTool}`,
              },
            ],
          },
        ],
        max_output_tokens: 256,
        stream: false,
      }),
    });
    const requests = (await fetchJson(`${params.providerBaseUrl}/debug/requests`)) as Array<{
      raw?: string;
      body?: { tools?: unknown[] };
      instructions?: string;
      plannedToolName?: string;
    }>;
    const laneRequests = requests.slice(beforeRequests.length);
    const lastRequest = laneRequests.at(-1) ?? {};
    const responseStatus = (response as { status?: unknown }).status;
    return {
      lane: params.lane,
      status: typeof responseStatus === "string" ? responseStatus : "",
      providerRequestCount: laneRequests.length,
      providerRawBytes: typeof lastRequest.raw === "string" ? lastRequest.raw.length : 0,
      providerSystemPromptChars: countSystemPromptChars(lastRequest.body),
      providerDeclaredToolCount: Array.isArray(lastRequest.body?.tools)
        ? lastRequest.body.tools.length
        : 0,
      providerPlannedTools: laneRequests
        .map((request) => request.plannedToolName)
        .filter((name): name is string => typeof name === "string"),
      gatewayOutputToolNames: outputToolNames(response),
      gatewayOutputText: outputText(response),
      transcriptToolMentions: await readSqliteTranscriptMentions({
        stateDir,
        targetTool: params.targetTool,
      }),
    };
  } finally {
    await server.close({ reason: `${params.lane} lane complete` });
    resetConfigRuntimeState();
  }
}

async function main() {
  const rootDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-tool-search-"));
  const provider = await startQaMockOpenAiServer();
  const fakeTools = buildFakeTools();
  const fakePluginDir = await writeFakePlugin({
    rootDir,
    repoRoot: process.cwd(),
    fakeTools,
  });
  const targetTool = "fake_plugin_tool_17";
  try {
    const normal = await runLane({
      lane: "normal",
      rootDir,
      providerBaseUrl: provider.baseUrl,
      targetTool,
      fakeTools,
      fakePluginDir,
    });
    const code = await runLane({
      lane: "code",
      rootDir,
      providerBaseUrl: provider.baseUrl,
      targetTool,
      fakeTools,
      fakePluginDir,
    });

    assert(
      normal.providerPlannedTools.includes(targetTool) &&
        normal.gatewayOutputText.includes("FAKE_PLUGIN_OK") &&
        normal.gatewayOutputText.includes(targetTool),
      `normal lane did not call ${targetTool}`,
    );
    assert(
      code.providerPlannedTools.includes("tool_search_code") &&
        code.gatewayOutputText.includes(targetTool) &&
        code.transcriptToolMentions[targetTool] > 0,
      `code lane did not bridge-call ${targetTool}`,
    );
    assert(
      normal.providerDeclaredToolCount > code.providerDeclaredToolCount,
      `expected Tool Search to expose fewer tools to provider: normal=${normal.providerDeclaredToolCount} code=${code.providerDeclaredToolCount}`,
    );
    assert(
      normal.providerRawBytes > code.providerRawBytes,
      `expected Tool Search request to be smaller: normal=${normal.providerRawBytes} code=${code.providerRawBytes}`,
    );
    assert(
      code.transcriptToolMentions.tool_search_code > 0 &&
        code.transcriptToolMentions[targetTool] > 0,
      "code lane SQLite transcript did not record bridge and target tool mentions",
    );

    const summary = {
      ok: true,
      rootDir,
      targetTool,
      normal,
      code,
      reduction: {
        providerRawBytes: normal.providerRawBytes - code.providerRawBytes,
        providerDeclaredTools: normal.providerDeclaredToolCount - code.providerDeclaredToolCount,
        providerSystemPromptChars:
          normal.providerSystemPromptChars - code.providerSystemPromptChars,
      },
    };
    process.stdout.write(`${JSON.stringify(summary, null, 2)}\n`);
  } finally {
    await provider.stop();
  }
}

await main();