diff --git a/docs/channels/qa-channel.md b/docs/channels/qa-channel.md index 107baba5389..c62ef42dabd 100644 --- a/docs/channels/qa-channel.md +++ b/docs/channels/qa-channel.md @@ -79,6 +79,12 @@ pnpm qa:lab:build pnpm openclaw qa ui ``` +Full repo-backed QA suite: + +```bash +pnpm openclaw qa suite +``` + That launches the private QA debugger at a local URL, separate from the shipped Control UI bundle. diff --git a/extensions/qa-lab/api.ts b/extensions/qa-lab/api.ts index 04b17e74e65..e807386e1c1 100644 --- a/extensions/qa-lab/api.ts +++ b/extensions/qa-lab/api.ts @@ -15,3 +15,5 @@ export * from "./src/scenario.js"; export * from "./src/scenario-catalog.js"; export * from "./src/self-check-scenario.js"; export * from "./src/self-check.js"; +export * from "./src/gateway-child.js"; +export * from "./src/suite.js"; diff --git a/extensions/qa-lab/src/cli.runtime.ts b/extensions/qa-lab/src/cli.runtime.ts index 95a1b44fe4a..8ef23858e03 100644 --- a/extensions/qa-lab/src/cli.runtime.ts +++ b/extensions/qa-lab/src/cli.runtime.ts @@ -2,6 +2,7 @@ import path from "node:path"; import { buildQaDockerHarnessImage, writeQaDockerHarnessFiles } from "./docker-harness.js"; import { startQaLabServer } from "./lab-server.js"; import { startQaMockOpenAiServer } from "./mock-openai-server.js"; +import { runQaSuite } from "./suite.js"; export async function runQaLabSelfCheckCommand(opts: { output?: string }) { const server = await startQaLabServer({ @@ -15,6 +16,14 @@ export async function runQaLabSelfCheckCommand(opts: { output?: string }) { } } +export async function runQaSuiteCommand(opts: { outputDir?: string }) { + const result = await runQaSuite({ + outputDir: opts.outputDir ? path.resolve(opts.outputDir) : undefined, + }); + process.stdout.write(`QA suite report: ${result.reportPath}\n`); + process.stdout.write(`QA suite summary: ${result.summaryPath}\n`); +} + export async function runQaLabUiCommand(opts: { host?: string; port?: number; diff --git a/extensions/qa-lab/src/cli.ts b/extensions/qa-lab/src/cli.ts index bbe8d7472ec..493e20e147c 100644 --- a/extensions/qa-lab/src/cli.ts +++ b/extensions/qa-lab/src/cli.ts @@ -14,6 +14,11 @@ async function runQaSelfCheck(opts: { output?: string }) { await runtime.runQaLabSelfCheckCommand(opts); } +async function runQaSuite(opts: { outputDir?: string }) { + const runtime = await loadQaLabCliRuntime(); + await runtime.runQaSuiteCommand(opts); +} + async function runQaUi(opts: { host?: string; port?: number; @@ -63,6 +68,13 @@ export function registerQaLabCli(program: Command) { await runQaSelfCheck(opts); }); + qa.command("suite") + .description("Run all repo-backed QA scenarios against the real QA gateway lane") + .option("--output-dir ", "Suite artifact directory") + .action(async (opts: { outputDir?: string }) => { + await runQaSuite(opts); + }); + qa.command("ui") .description("Start the private QA debugger UI and local QA bus") .option("--host ", "Bind host", "127.0.0.1") diff --git a/extensions/qa-lab/src/gateway-child.ts b/extensions/qa-lab/src/gateway-child.ts new file mode 100644 index 00000000000..ef133ce60bc --- /dev/null +++ b/extensions/qa-lab/src/gateway-child.ts @@ -0,0 +1,210 @@ +import { spawn } from "node:child_process"; +import { randomUUID } from "node:crypto"; +import fs from "node:fs/promises"; +import net from "node:net"; +import os from "node:os"; +import path from "node:path"; +import { setTimeout as sleep } from "node:timers/promises"; +import type { OpenClawConfig } from "openclaw/plugin-sdk/core"; +import { seedQaAgentWorkspace } from "./qa-agent-workspace.js"; +import { buildQaGatewayConfig } from "./qa-gateway-config.js"; + +async function getFreePort() { + return await new Promise((resolve, reject) => { + const server = net.createServer(); + server.once("error", reject); + server.listen(0, "127.0.0.1", () => { + const address = server.address(); + if (!address || typeof address === "string") { + reject(new Error("failed to allocate port")); + return; + } + server.close((error) => (error ? reject(error) : resolve(address.port))); + }); + }); +} + +async function waitForGatewayReady(baseUrl: string, logs: () => string, timeoutMs = 30_000) { + const startedAt = Date.now(); + while (Date.now() - startedAt < timeoutMs) { + try { + const response = await fetch(`${baseUrl}/readyz`); + if (response.ok) { + return; + } + } catch { + // retry until timeout + } + await sleep(250); + } + throw new Error(`gateway failed to become healthy:\n${logs()}`); +} + +async function runCliJson(params: { cwd: string; env: NodeJS.ProcessEnv; args: string[] }) { + const stdout: Buffer[] = []; + const stderr: Buffer[] = []; + await new Promise((resolve, reject) => { + const child = spawn(process.execPath, params.args, { + cwd: params.cwd, + env: params.env, + stdio: ["ignore", "pipe", "pipe"], + }); + child.stdout.on("data", (chunk) => stdout.push(Buffer.from(chunk))); + child.stderr.on("data", (chunk) => stderr.push(Buffer.from(chunk))); + child.once("error", reject); + child.once("exit", (code) => { + if (code === 0) { + resolve(); + return; + } + reject( + new Error( + `gateway cli failed (${code ?? "unknown"}): ${Buffer.concat(stderr).toString("utf8")}`, + ), + ); + }); + }); + const text = Buffer.concat(stdout).toString("utf8").trim(); + return text ? (JSON.parse(text) as unknown) : {}; +} + +export async function startQaGatewayChild(params: { + repoRoot: string; + providerBaseUrl: string; + qaBusBaseUrl: string; +}) { + const tempRoot = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-qa-suite-")); + const workspaceDir = path.join(tempRoot, "workspace"); + const stateDir = path.join(tempRoot, "state"); + const homeDir = path.join(tempRoot, "home"); + const xdgConfigHome = path.join(tempRoot, "xdg-config"); + const xdgDataHome = path.join(tempRoot, "xdg-data"); + const xdgCacheHome = path.join(tempRoot, "xdg-cache"); + const configPath = path.join(tempRoot, "openclaw.json"); + const gatewayPort = await getFreePort(); + const gatewayToken = `qa-suite-${randomUUID()}`; + await seedQaAgentWorkspace({ + workspaceDir, + repoRoot: params.repoRoot, + }); + await Promise.all([ + fs.mkdir(stateDir, { recursive: true }), + fs.mkdir(homeDir, { recursive: true }), + fs.mkdir(xdgConfigHome, { recursive: true }), + fs.mkdir(xdgDataHome, { recursive: true }), + fs.mkdir(xdgCacheHome, { recursive: true }), + ]); + const cfg = buildQaGatewayConfig({ + bind: "loopback", + gatewayPort, + gatewayToken, + providerBaseUrl: params.providerBaseUrl, + qaBusBaseUrl: params.qaBusBaseUrl, + workspaceDir, + }); + await fs.writeFile(configPath, `${JSON.stringify(cfg, null, 2)}\n`, "utf8"); + + const stdout: Buffer[] = []; + const stderr: Buffer[] = []; + const env = { + ...process.env, + HOME: homeDir, + OPENCLAW_HOME: homeDir, + OPENCLAW_CONFIG_PATH: configPath, + OPENCLAW_STATE_DIR: stateDir, + OPENCLAW_OAUTH_DIR: path.join(stateDir, "credentials"), + OPENCLAW_GATEWAY_TOKEN: gatewayToken, + OPENCLAW_SKIP_BROWSER_CONTROL_SERVER: "1", + OPENCLAW_SKIP_GMAIL_WATCHER: "1", + OPENCLAW_SKIP_CANVAS_HOST: "1", + OPENCLAW_NO_RESPAWN: "1", + OPENCLAW_TEST_FAST: "1", + XDG_CONFIG_HOME: xdgConfigHome, + XDG_DATA_HOME: xdgDataHome, + XDG_CACHE_HOME: xdgCacheHome, + }; + + const child = spawn( + process.execPath, + [ + "dist/index.js", + "gateway", + "run", + "--port", + String(gatewayPort), + "--bind", + "loopback", + "--allow-unconfigured", + ], + { + cwd: params.repoRoot, + env, + stdio: ["ignore", "pipe", "pipe"], + }, + ); + child.stdout.on("data", (chunk) => stdout.push(Buffer.from(chunk))); + child.stderr.on("data", (chunk) => stderr.push(Buffer.from(chunk))); + + const baseUrl = `http://127.0.0.1:${gatewayPort}`; + const wsUrl = `ws://127.0.0.1:${gatewayPort}`; + const logs = () => + `${Buffer.concat(stdout).toString("utf8")}\n${Buffer.concat(stderr).toString("utf8")}`.trim(); + + try { + await waitForGatewayReady(baseUrl, logs); + } catch (error) { + child.kill("SIGTERM"); + throw error; + } + + return { + cfg, + baseUrl, + wsUrl, + token: gatewayToken, + workspaceDir, + tempRoot, + configPath, + logs, + async call( + method: string, + rpcParams?: unknown, + opts?: { expectFinal?: boolean; timeoutMs?: number }, + ) { + return await runCliJson({ + cwd: params.repoRoot, + env, + args: [ + "dist/index.js", + "gateway", + "call", + method, + "--url", + wsUrl, + "--token", + gatewayToken, + "--json", + "--timeout", + String(opts?.timeoutMs ?? 20_000), + ...(opts?.expectFinal ? ["--expect-final"] : []), + "--params", + JSON.stringify(rpcParams ?? {}), + ], + }); + }, + async stop() { + if (!child.killed) { + child.kill("SIGTERM"); + await Promise.race([ + new Promise((resolve) => child.once("exit", () => resolve())), + sleep(5_000).then(() => { + if (!child.killed) { + child.kill("SIGKILL"); + } + }), + ]); + } + await fs.rm(tempRoot, { recursive: true, force: true }); + }, + }; +} diff --git a/extensions/qa-lab/src/lab-server.test.ts b/extensions/qa-lab/src/lab-server.test.ts index 3250696d2c1..35454bf4bc9 100644 --- a/extensions/qa-lab/src/lab-server.test.ts +++ b/extensions/qa-lab/src/lab-server.test.ts @@ -192,4 +192,34 @@ describe("qa-lab server", () => { expect(html).not.toContain("QA Lab UI not built"); expect(html).toContain(""); }); + + it("can disable the embedded echo gateway for real-suite runs", async () => { + const lab = await startQaLabServer({ + host: "127.0.0.1", + port: 0, + embeddedGateway: "disabled", + }); + cleanups.push(async () => { + await lab.stop(); + }); + + await fetch(`${lab.baseUrl}/api/inbound/message`, { + method: "POST", + headers: { + "content-type": "application/json", + }, + body: JSON.stringify({ + conversation: { id: "bob", kind: "direct" }, + senderId: "bob", + senderName: "Bob", + text: "hello from suite", + }), + }); + + await new Promise((resolve) => setTimeout(resolve, 800)); + const snapshot = (await (await fetch(`${lab.baseUrl}/api/state`)).json()) as { + messages: Array<{ direction: string }>; + }; + expect(snapshot.messages.filter((message) => message.direction === "outbound")).toHaveLength(0); + }); }); diff --git a/extensions/qa-lab/src/lab-server.ts b/extensions/qa-lab/src/lab-server.ts index 09553528bb6..1ec8fe602ee 100644 --- a/extensions/qa-lab/src/lab-server.ts +++ b/extensions/qa-lab/src/lab-server.ts @@ -372,6 +372,7 @@ export async function startQaLabServer(params?: { stop: () => Promise<void>; } | undefined; + const embeddedGatewayEnabled = params?.embeddedGateway !== "disabled"; let publicBaseUrl = ""; const server = createServer(async (req, res) => { @@ -514,7 +515,9 @@ export async function startQaLabServer(params?: { advertiseHost: params?.advertiseHost, advertisePort: params?.advertisePort, }); - gateway = await startQaGatewayLoop({ state, baseUrl: listenUrl }); + if (embeddedGatewayEnabled) { + gateway = await startQaGatewayLoop({ state, baseUrl: listenUrl }); + } if (params?.sendKickoffOnStart) { injectKickoffMessage({ state, @@ -544,7 +547,7 @@ export async function startQaLabServer(params?: { async runSelfCheck() { const result = await runQaSelfCheckAgainstState({ state, - cfg: gateway!.cfg, + cfg: gateway?.cfg ?? createQaLabConfig(listenUrl), outputPath: params?.outputPath, }); latestReport = { diff --git a/extensions/qa-lab/src/suite.ts b/extensions/qa-lab/src/suite.ts new file mode 100644 index 00000000000..698f317d9d1 --- /dev/null +++ b/extensions/qa-lab/src/suite.ts @@ -0,0 +1,738 @@ +import { randomUUID } from "node:crypto"; +import fs from "node:fs/promises"; +import path from "node:path"; +import { setTimeout as sleep } from "node:timers/promises"; +import type { OpenClawConfig } from "openclaw/plugin-sdk/core"; +import type { QaBusState } from "./bus-state.js"; +import { extractQaToolPayload } from "./extract-tool-payload.js"; +import { startQaGatewayChild } from "./gateway-child.js"; +import { startQaLabServer } from "./lab-server.js"; +import { startQaMockOpenAiServer } from "./mock-openai-server.js"; +import { renderQaMarkdownReport, type QaReportCheck, type QaReportScenario } from "./report.js"; +import { qaChannelPlugin, type QaBusMessage } from "./runtime-api.js"; +import { readQaBootstrapScenarioCatalog } from "./scenario-catalog.js"; + +type QaSuiteStep = { + name: string; + run: () => Promise<string | void>; +}; + +type QaSuiteScenarioResult = { + name: string; + status: "pass" | "fail"; + steps: QaReportCheck[]; + details?: string; +}; + +type QaSuiteEnvironment = { + lab: Awaited<ReturnType<typeof startQaLabServer>>; + mock: Awaited<ReturnType<typeof startQaMockOpenAiServer>>; + gateway: Awaited<ReturnType<typeof startQaGatewayChild>>; + cfg: OpenClawConfig; +}; + +export type QaSuiteResult = { + outputDir: string; + reportPath: string; + summaryPath: string; + report: string; + scenarios: QaSuiteScenarioResult[]; +}; + +function createQaActionConfig(baseUrl: string): OpenClawConfig { + return { + channels: { + "qa-channel": { + enabled: true, + baseUrl, + botUserId: "openclaw", + botDisplayName: "OpenClaw QA", + allowFrom: ["*"], + }, + }, + }; +} + +async function waitForCondition<T>( + check: () => T | Promise<T | null | undefined> | null | undefined, + timeoutMs = 15_000, + intervalMs = 100, +): Promise<T> { + const startedAt = Date.now(); + while (Date.now() - startedAt < timeoutMs) { + const value = await check(); + if (value !== null && value !== undefined) { + return value; + } + await sleep(intervalMs); + } + throw new Error(`timed out after ${timeoutMs}ms`); +} + +async function waitForOutboundMessage( + state: QaBusState, + predicate: (message: QaBusMessage) => boolean, + timeoutMs = 15_000, +) { + return await waitForCondition( + () => + state + .getSnapshot() + .messages.filter((message) => message.direction === "outbound") + .find(predicate), + timeoutMs, + ); +} + +async function waitForNoOutbound(state: QaBusState, timeoutMs = 1_200) { + await sleep(timeoutMs); + const outbound = state + .getSnapshot() + .messages.filter((message) => message.direction === "outbound"); + if (outbound.length > 0) { + throw new Error(`expected no outbound messages, saw ${outbound.length}`); + } +} + +async function runScenario(name: string, steps: QaSuiteStep[]): Promise<QaSuiteScenarioResult> { + const stepResults: QaReportCheck[] = []; + for (const step of steps) { + try { + const details = await step.run(); + stepResults.push({ + name: step.name, + status: "pass", + ...(details ? { details } : {}), + }); + } catch (error) { + const details = error instanceof Error ? error.message : String(error); + stepResults.push({ + name: step.name, + status: "fail", + details, + }); + return { + name, + status: "fail", + steps: stepResults, + details, + }; + } + } + return { + name, + status: "pass", + steps: stepResults, + }; +} + +async function fetchJson<T>(url: string): Promise<T> { + const response = await fetch(url); + if (!response.ok) { + throw new Error(`request failed ${response.status}: ${url}`); + } + return (await response.json()) as T; +} + +async function runAgentPrompt( + env: QaSuiteEnvironment, + params: { + sessionKey: string; + message: string; + to?: string; + threadId?: string; + provider?: string; + model?: string; + timeoutMs?: number; + }, +) { + const target = params.to ?? "dm:qa-operator"; + const started = (await env.gateway.call( + "agent", + { + idempotencyKey: randomUUID(), + agentId: "qa", + sessionKey: params.sessionKey, + message: params.message, + deliver: true, + channel: "qa-channel", + to: target, + replyChannel: "qa-channel", + replyTo: target, + ...(params.threadId ? { threadId: params.threadId } : {}), + ...(params.provider ? { provider: params.provider } : {}), + ...(params.model ? { model: params.model } : {}), + }, + { + timeoutMs: params.timeoutMs ?? 30_000, + }, + )) as { runId?: string; status?: string }; + if (!started.runId) { + throw new Error(`agent call did not return a runId: ${JSON.stringify(started)}`); + } + const waited = (await env.gateway.call( + "agent.wait", + { + runId: started.runId, + timeoutMs: params.timeoutMs ?? 30_000, + }, + { + timeoutMs: (params.timeoutMs ?? 30_000) + 5_000, + }, + )) as { status?: string; error?: string }; + if (waited.status !== "ok") { + throw new Error( + `agent.wait returned ${String(waited.status ?? "unknown")}: ${waited.error ?? "no error"}`, + ); + } + return { + started, + waited, + }; +} + +type QaActionName = "delete" | "edit" | "react" | "thread-create"; + +async function handleQaAction(params: { + env: QaSuiteEnvironment; + action: QaActionName; + args: Record<string, unknown>; +}) { + const result = await qaChannelPlugin.actions?.handleAction?.({ + channel: "qa-channel", + action: params.action, + cfg: params.env.cfg, + accountId: "default", + params: params.args, + }); + return extractQaToolPayload(result); +} + +function buildScenarioMap(env: QaSuiteEnvironment) { + const state = env.lab.state; + const reset = async () => { + state.reset(); + await sleep(100); + }; + + return new Map<string, () => Promise<QaSuiteScenarioResult>>([ + [ + "channel-chat-baseline", + async () => + await runScenario("Channel baseline conversation", [ + { + name: "ignores unmentioned channel chatter", + run: async () => { + await reset(); + state.addInboundMessage({ + conversation: { id: "qa-room", kind: "channel", title: "QA Room" }, + senderId: "alice", + senderName: "Alice", + text: "hello team, no bot ping here", + }); + await waitForNoOutbound(state); + }, + }, + { + name: "replies when mentioned in channel", + run: async () => { + state.addInboundMessage({ + conversation: { id: "qa-room", kind: "channel", title: "QA Room" }, + senderId: "alice", + senderName: "Alice", + text: "@openclaw explain the QA lab", + }); + const message = await waitForOutboundMessage( + state, + (candidate) => candidate.conversation.id === "qa-room" && !candidate.threadId, + ); + return message.text; + }, + }, + ]), + ], + [ + "cron-one-minute-ping", + async () => + await runScenario("Cron one-minute ping", [ + { + name: "stores a reminder roughly one minute ahead", + run: async () => { + await reset(); + const at = new Date(Date.now() + 60_000).toISOString(); + const response = (await env.gateway.call("cron.add", { + name: `qa-suite-${randomUUID()}`, + enabled: true, + schedule: { kind: "at", at }, + sessionTarget: "isolated", + wakeMode: "next-heartbeat", + payload: { + kind: "agentTurn", + message: + "A QA cron just fired. Send a one-line ping back to the room so the operator can verify delivery.", + }, + delivery: { + mode: "announce", + channel: "qa-channel", + to: "channel:qa-room", + }, + })) as { id?: string; schedule?: { at?: string } }; + const scheduledAt = response.schedule?.at ?? at; + const delta = new Date(scheduledAt).getTime() - Date.now(); + if (delta < 45_000 || delta > 75_000) { + throw new Error(`expected ~1 minute schedule, got ${delta}ms`); + } + (globalThis as typeof globalThis & { __qaCronJobId?: string }).__qaCronJobId = + response.id; + return scheduledAt; + }, + }, + { + name: "forces the reminder through QA channel delivery", + run: async () => { + const jobId = (globalThis as typeof globalThis & { __qaCronJobId?: string }) + .__qaCronJobId; + if (!jobId) { + throw new Error("missing cron job id"); + } + await env.gateway.call( + "cron.run", + { id: jobId, mode: "force" }, + { timeoutMs: 30_000 }, + ); + const outbound = await waitForOutboundMessage( + state, + (candidate) => candidate.conversation.id === "qa-room", + 30_000, + ); + return outbound.text; + }, + }, + ]), + ], + [ + "dm-chat-baseline", + async () => + await runScenario("DM baseline conversation", [ + { + name: "replies coherently in DM", + run: async () => { + await reset(); + state.addInboundMessage({ + conversation: { id: "alice", kind: "direct" }, + senderId: "alice", + senderName: "Alice", + text: "Hello there, who are you?", + }); + const outbound = await waitForOutboundMessage( + state, + (candidate) => candidate.conversation.id === "alice", + ); + return outbound.text; + }, + }, + ]), + ], + [ + "lobster-invaders-build", + async () => + await runScenario("Build Lobster Invaders", [ + { + name: "creates the artifact after reading context", + run: async () => { + await reset(); + await runAgentPrompt(env, { + sessionKey: "agent:qa:lobster-invaders", + message: + "Read the QA kickoff context first, then build a tiny Lobster Invaders HTML game in this workspace and tell me where it is.", + }); + await waitForOutboundMessage( + state, + (candidate) => candidate.conversation.id === "qa-operator", + ); + const artifactPath = path.join(env.gateway.workspaceDir, "lobster-invaders.html"); + const artifact = await fs.readFile(artifactPath, "utf8"); + if (!artifact.includes("Lobster Invaders")) { + throw new Error("missing Lobster Invaders artifact"); + } + const requests = await fetchJson<Array<{ prompt?: string; toolOutput?: string }>>( + `${env.mock.baseUrl}/debug/requests`, + ); + if (!requests.some((request) => (request.toolOutput ?? "").includes("QA mission"))) { + throw new Error("expected pre-write read evidence"); + } + return "lobster-invaders.html"; + }, + }, + ]), + ], + [ + "memory-recall", + async () => + await runScenario("Memory recall after context switch", [ + { + name: "stores the canary fact", + run: async () => { + await reset(); + await runAgentPrompt(env, { + sessionKey: "agent:qa:memory", + message: "Please remember this fact for later: the QA canary code is ALPHA-7.", + }); + const outbound = await waitForOutboundMessage( + state, + (candidate) => candidate.conversation.id === "qa-operator", + ); + return outbound.text; + }, + }, + { + name: "recalls the same fact later", + run: async () => { + await runAgentPrompt(env, { + sessionKey: "agent:qa:memory", + message: "What was the QA canary code I asked you to remember earlier?", + }); + const outbound = await waitForCondition( + () => + state + .getSnapshot() + .messages.filter( + (candidate) => + candidate.direction === "outbound" && + candidate.conversation.id === "qa-operator" && + candidate.text.includes("ALPHA-7"), + ) + .at(-1), + 20_000, + ); + return outbound.text; + }, + }, + ]), + ], + [ + "model-switch-follow-up", + async () => + await runScenario("Model switch follow-up", [ + { + name: "runs on the default configured model", + run: async () => { + await reset(); + await runAgentPrompt(env, { + sessionKey: "agent:qa:model-switch", + message: "Say hello from the default configured model.", + }); + await waitForOutboundMessage( + state, + (candidate) => candidate.conversation.id === "qa-operator", + ); + const request = await fetchJson<{ body?: { model?: string } }>( + `${env.mock.baseUrl}/debug/last-request`, + ); + return String(request.body?.model ?? ""); + }, + }, + { + name: "switches to the alternate model and continues", + run: async () => { + await runAgentPrompt(env, { + sessionKey: "agent:qa:model-switch", + message: "Continue the exchange after switching models and note the handoff.", + provider: "mock-openai", + model: "gpt-5.4-alt", + }); + const outbound = await waitForCondition( + () => + state + .getSnapshot() + .messages.filter( + (candidate) => + candidate.direction === "outbound" && + candidate.conversation.id === "qa-operator" && + candidate.text.toLowerCase().includes("switch"), + ) + .at(-1), + 20_000, + ); + const request = await fetchJson<{ body?: { model?: string } }>( + `${env.mock.baseUrl}/debug/last-request`, + ); + if (request.body?.model !== "gpt-5.4-alt") { + throw new Error(`expected gpt-5.4-alt, got ${String(request.body?.model ?? "")}`); + } + return outbound.text; + }, + }, + ]), + ], + [ + "reaction-edit-delete", + async () => + await runScenario("Reaction, edit, delete lifecycle", [ + { + name: "records reaction, edit, and delete actions", + run: async () => { + await reset(); + const seed = state.addOutboundMessage({ + to: "channel:qa-room", + text: "seed message", + }); + await handleQaAction({ + env, + action: "react", + args: { messageId: seed.id, emoji: "white_check_mark" }, + }); + await handleQaAction({ + env, + action: "edit", + args: { messageId: seed.id, text: "seed message (edited)" }, + }); + await handleQaAction({ + env, + action: "delete", + args: { messageId: seed.id }, + }); + const message = state.readMessage({ messageId: seed.id }); + if ( + message.reactions.length === 0 || + !message.deleted || + !message.text.includes("(edited)") + ) { + throw new Error("message lifecycle did not persist"); + } + return message.text; + }, + }, + ]), + ], + [ + "source-docs-discovery-report", + async () => + await runScenario("Source and docs discovery report", [ + { + name: "reads seeded material and emits a protocol report", + run: async () => { + await reset(); + await runAgentPrompt(env, { + sessionKey: "agent:qa:discovery", + message: + "Read the seeded docs and source plan, then report grouped into Worked, Failed, Blocked, and Follow-up.", + }); + const outbound = await waitForCondition( + () => + state + .getSnapshot() + .messages.filter( + (candidate) => + candidate.direction === "outbound" && + candidate.conversation.id === "qa-operator" && + candidate.text.includes("Worked:"), + ) + .at(-1), + 20_000, + ); + return outbound.text; + }, + }, + ]), + ], + [ + "subagent-handoff", + async () => + await runScenario("Subagent handoff", [ + { + name: "delegates a bounded task and reports the result", + run: async () => { + await reset(); + await runAgentPrompt(env, { + sessionKey: "agent:qa:subagent", + message: + "Delegate a bounded QA task to a subagent, then summarize the delegated result clearly.", + timeoutMs: 45_000, + }); + const outbound = await waitForCondition( + () => + state + .getSnapshot() + .messages.filter( + (candidate) => + candidate.direction === "outbound" && + candidate.conversation.id === "qa-operator" && + candidate.text.toLowerCase().includes("delegated"), + ) + .at(-1), + 45_000, + ); + const sessions = await waitForCondition( + async () => { + const listed = (await env.gateway.call("sessions.list", { + spawnedBy: "agent:qa:subagent", + })) as { + sessions?: Array<{ + key?: string; + parentSessionKey?: string; + spawnedBy?: string; + }>; + }; + return (listed.sessions ?? []).length > 0 ? listed : null; + }, + 20_000, + 250, + ); + if ((sessions.sessions ?? []).length === 0) { + throw new Error("expected spawned child session"); + } + return outbound.text; + }, + }, + ]), + ], + [ + "thread-follow-up", + async () => + await runScenario("Threaded follow-up", [ + { + name: "keeps follow-up inside the thread", + run: async () => { + await reset(); + const threadPayload = (await handleQaAction({ + env, + action: "thread-create", + args: { + channelId: "qa-room", + title: "QA deep dive", + }, + })) as { thread?: { id?: string } } | undefined; + const threadId = threadPayload?.thread?.id; + if (!threadId) { + throw new Error("missing thread id"); + } + state.addInboundMessage({ + conversation: { id: "qa-room", kind: "channel", title: "QA Room" }, + senderId: "alice", + senderName: "Alice", + text: "@openclaw continue this work inside the thread", + threadId, + threadTitle: "QA deep dive", + }); + const outbound = await waitForOutboundMessage( + state, + (candidate) => + candidate.conversation.id === "qa-room" && candidate.threadId === threadId, + ); + const leaked = state + .getSnapshot() + .messages.some( + (candidate) => + candidate.direction === "outbound" && + candidate.conversation.id === "qa-room" && + !candidate.threadId, + ); + if (leaked) { + throw new Error("thread reply leaked into root channel"); + } + return outbound.text; + }, + }, + ]), + ], + ]); +} + +export async function runQaSuite(params?: { outputDir?: string }) { + const startedAt = new Date(); + const outputDir = + params?.outputDir ?? + path.join(process.cwd(), ".artifacts", "qa-e2e", `suite-${Date.now().toString(36)}`); + await fs.mkdir(outputDir, { recursive: true }); + + const lab = await startQaLabServer({ + host: "127.0.0.1", + port: 0, + embeddedGateway: "disabled", + }); + const mock = await startQaMockOpenAiServer({ + host: "127.0.0.1", + port: 0, + }); + const gateway = await startQaGatewayChild({ + repoRoot: process.cwd(), + providerBaseUrl: `${mock.baseUrl}/v1`, + qaBusBaseUrl: lab.listenUrl, + }); + const env: QaSuiteEnvironment = { + lab, + mock, + gateway, + cfg: createQaActionConfig(lab.listenUrl), + }; + + try { + const catalog = readQaBootstrapScenarioCatalog(); + const scenarioMap = buildScenarioMap(env); + const scenarios: QaSuiteScenarioResult[] = []; + + for (const scenario of catalog.scenarios) { + const run = scenarioMap.get(scenario.id); + if (!run) { + scenarios.push({ + name: scenario.title, + status: "fail", + details: `no executable scenario registered for ${scenario.id}`, + steps: [], + }); + continue; + } + scenarios.push(await run()); + } + + const finishedAt = new Date(); + const report = renderQaMarkdownReport({ + title: "OpenClaw QA Scenario Suite", + startedAt, + finishedAt, + checks: [], + scenarios: scenarios.map((scenario) => ({ + name: scenario.name, + status: scenario.status, + details: scenario.details, + steps: scenario.steps, + })) satisfies QaReportScenario[], + notes: [ + "Runs against qa-channel + qa-lab bus + real gateway child + mock OpenAI provider.", + "Cron uses a one-minute schedule assertion plus forced execution for fast verification.", + ], + }); + const reportPath = path.join(outputDir, "qa-suite-report.md"); + const summaryPath = path.join(outputDir, "qa-suite-summary.json"); + await fs.writeFile(reportPath, report, "utf8"); + await fs.writeFile( + summaryPath, + `${JSON.stringify( + { + scenarios, + counts: { + total: scenarios.length, + passed: scenarios.filter((scenario) => scenario.status === "pass").length, + failed: scenarios.filter((scenario) => scenario.status === "fail").length, + }, + }, + null, + 2, + )}\n`, + "utf8", + ); + + return { + outputDir, + reportPath, + summaryPath, + report, + scenarios, + } satisfies QaSuiteResult; + } finally { + await gateway.stop(); + await mock.stop(); + await lab.stop(); + } +}