From 4db162db7f3ef6754ad5433ee74cf6d8ba3da7b3 Mon Sep 17 00:00:00 2001 From: Gustavo Madeira Santana Date: Thu, 16 Apr 2026 03:08:39 -0400 Subject: [PATCH] QA: split lab runtime and extend Matrix coverage (#67430) Merged via squash. Prepared head SHA: 790418b93b81f1297a3407f08a756cd0118f01b4 Co-authored-by: gumadeiras <5599352+gumadeiras@users.noreply.github.com> Co-authored-by: gumadeiras <5599352+gumadeiras@users.noreply.github.com> Reviewed-by: @gumadeiras --- CHANGELOG.md | 1 + .../qa-lab/src/lab-server-capture.test.ts | 70 + extensions/qa-lab/src/lab-server-capture.ts | 127 ++ extensions/qa-lab/src/lab-server-ui.test.ts | 91 ++ extensions/qa-lab/src/lab-server-ui.ts | 288 ++++ extensions/qa-lab/src/lab-server.test.ts | 119 -- extensions/qa-lab/src/lab-server.ts | 422 +----- extensions/qa-lab/src/suite-planning.test.ts | 253 ++++ extensions/qa-lab/src/suite-planning.ts | 222 +++ .../qa-lab/src/suite-runtime-agent-common.ts | 14 + .../src/suite-runtime-agent-media.test.ts | 113 ++ .../qa-lab/src/suite-runtime-agent-media.ts | 135 ++ .../src/suite-runtime-agent-process.test.ts | 227 +++ .../qa-lab/src/suite-runtime-agent-process.ts | 235 ++++ .../src/suite-runtime-agent-session.test.ts | 100 ++ .../qa-lab/src/suite-runtime-agent-session.ts | 96 ++ .../src/suite-runtime-agent-tools.test.ts | 151 ++ .../qa-lab/src/suite-runtime-agent-tools.ts | 77 ++ extensions/qa-lab/src/suite-runtime-agent.ts | 26 + .../qa-lab/src/suite-runtime-flow.test.ts | 264 ++++ extensions/qa-lab/src/suite-runtime-flow.ts | 221 +++ .../qa-lab/src/suite-runtime-gateway.test.ts | 22 + .../qa-lab/src/suite-runtime-gateway.ts | 247 ++++ .../src/suite-runtime-transport.test.ts | 211 +++ .../qa-lab/src/suite-runtime-transport.ts | 175 +++ extensions/qa-lab/src/suite-runtime-types.ts | 70 + extensions/qa-lab/src/suite-test-helpers.ts | 31 + extensions/qa-lab/src/suite.test.ts | 463 +------ extensions/qa-lab/src/suite.ts | 1230 +---------------- .../src/runners/contract/scenario-catalog.ts | 21 + .../runners/contract/scenario-runtime-room.ts | 162 +++ .../src/runners/contract/scenario-runtime.ts | 6 + .../src/runners/contract/scenario-types.ts | 5 + .../src/runners/contract/scenarios.test.ts | 167 +++ .../src/runners/contract/scenarios.ts | 2 + .../qa-matrix/src/substrate/artifacts.test.ts | 13 +- .../qa-matrix/src/substrate/artifacts.ts | 6 + .../qa-matrix/src/substrate/client.test.ts | 73 + extensions/qa-matrix/src/substrate/client.ts | 146 ++ .../qa-matrix/src/substrate/events.test.ts | 47 + extensions/qa-matrix/src/substrate/events.ts | 62 + qa/scenarios/memory-dreaming-sweep.md | 10 +- 42 files changed, 4230 insertions(+), 2191 deletions(-) create mode 100644 extensions/qa-lab/src/lab-server-capture.test.ts create mode 100644 extensions/qa-lab/src/lab-server-capture.ts create mode 100644 extensions/qa-lab/src/lab-server-ui.test.ts create mode 100644 extensions/qa-lab/src/lab-server-ui.ts create mode 100644 extensions/qa-lab/src/suite-planning.test.ts create mode 100644 extensions/qa-lab/src/suite-planning.ts create mode 100644 extensions/qa-lab/src/suite-runtime-agent-common.ts create mode 100644 extensions/qa-lab/src/suite-runtime-agent-media.test.ts create mode 100644 extensions/qa-lab/src/suite-runtime-agent-media.ts create mode 100644 extensions/qa-lab/src/suite-runtime-agent-process.test.ts create mode 100644 extensions/qa-lab/src/suite-runtime-agent-process.ts create mode 100644 extensions/qa-lab/src/suite-runtime-agent-session.test.ts create mode 100644 extensions/qa-lab/src/suite-runtime-agent-session.ts create mode 100644 extensions/qa-lab/src/suite-runtime-agent-tools.test.ts create mode 100644 extensions/qa-lab/src/suite-runtime-agent-tools.ts create mode 100644 extensions/qa-lab/src/suite-runtime-agent.ts create mode 100644 extensions/qa-lab/src/suite-runtime-flow.test.ts create mode 100644 extensions/qa-lab/src/suite-runtime-flow.ts create mode 100644 extensions/qa-lab/src/suite-runtime-gateway.test.ts create mode 100644 extensions/qa-lab/src/suite-runtime-gateway.ts create mode 100644 extensions/qa-lab/src/suite-runtime-transport.test.ts create mode 100644 extensions/qa-lab/src/suite-runtime-transport.ts create mode 100644 extensions/qa-lab/src/suite-runtime-types.ts create mode 100644 extensions/qa-lab/src/suite-test-helpers.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 7e4a3ba5318..f7c2f10a3ba 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,6 +31,7 @@ Docs: https://docs.openclaw.ai - WhatsApp/web-session: drain the pending per-auth creds save queue before reopening sockets so reconnect-time auth bootstrap no longer races in-flight `creds.json` writes and falsely restores from backup. (#67464) Thanks @neeravmakwana. - BlueBubbles/catchup: add a per-message retry ceiling (`catchup.maxFailureRetries`, default 10) so a persistently-failing message with a malformed payload no longer wedges the catchup cursor forever. After N consecutive `processMessage` failures against the same GUID, catchup logs a WARN, skips that message on subsequent sweeps, and lets the cursor advance past it. Transient failures still retry from the same point as before. Also fixes a lost-update race in the persistent dedupe file lock that silently dropped inbound GUIDs on concurrent writes, a dedupe file naming migration gap on version upgrade, and a balloon-event bypass that let catchup replay debouncer-coalesced events as standalone messages. (#67426, #66870) Thanks @omarshahine. - Ollama/chat: strip the `ollama/` provider prefix from Ollama chat request model ids so configured refs like `ollama/qwen3:14b-q8_0` stop 404ing against the Ollama API. (#67457) Thanks @suboss87. +- QA/Matrix: split the private QA lab runtime into smaller tested modules, add Matrix media contract coverage for image understanding and generated-image delivery, and update the memory-dreaming QA sweep to assert the separate phase-report layout. (#67430) Thanks @gumadeiras. ## 2026.4.15-beta.1 diff --git a/extensions/qa-lab/src/lab-server-capture.test.ts b/extensions/qa-lab/src/lab-server-capture.test.ts new file mode 100644 index 00000000000..eb75418cac9 --- /dev/null +++ b/extensions/qa-lab/src/lab-server-capture.test.ts @@ -0,0 +1,70 @@ +import { createServer } from "node:http"; +import { afterEach, describe, expect, it } from "vitest"; +import { mapCaptureEventForQa, probeTcpReachability } from "./lab-server-capture.js"; + +const cleanups: Array<() => Promise> = []; + +afterEach(async () => { + while (cleanups.length > 0) { + await cleanups.pop()?.(); + } +}); + +describe("qa-lab server capture helpers", () => { + it("maps capture rows into QA-friendly fields", () => { + expect( + mapCaptureEventForQa({ + flowId: "flow-1", + dataText: '{"hello":"world"}', + metaJson: JSON.stringify({ + provider: "openai", + api: "responses", + model: "gpt-5.4", + captureOrigin: "shared-fetch", + }), + }), + ).toEqual( + expect.objectContaining({ + flowId: "flow-1", + payloadPreview: '{"hello":"world"}', + provider: "openai", + api: "responses", + model: "gpt-5.4", + captureOrigin: "shared-fetch", + }), + ); + }); + + it("probes tcp reachability for reachable and unreachable targets", async () => { + const server = createServer((_req, res) => { + res.writeHead(200); + res.end("ok"); + }); + await new Promise((resolve, reject) => { + server.once("error", reject); + server.listen(0, "127.0.0.1", () => resolve()); + }); + cleanups.push( + async () => + await new Promise((resolve, reject) => + server.close((error) => (error ? reject(error) : resolve())), + ), + ); + + const address = server.address(); + if (!address || typeof address === "string") { + throw new Error("expected tcp probe address"); + } + + await expect(probeTcpReachability(`http://127.0.0.1:${address.port}`)).resolves.toEqual( + expect.objectContaining({ + ok: true, + }), + ); + await expect(probeTcpReachability("http://127.0.0.1:9", 50)).resolves.toEqual( + expect.objectContaining({ + ok: false, + }), + ); + }); +}); diff --git a/extensions/qa-lab/src/lab-server-capture.ts b/extensions/qa-lab/src/lab-server-capture.ts new file mode 100644 index 00000000000..34859be24b3 --- /dev/null +++ b/extensions/qa-lab/src/lab-server-capture.ts @@ -0,0 +1,127 @@ +import net from "node:net"; +import { formatErrorMessage } from "openclaw/plugin-sdk/error-runtime"; + +const CAPTURE_QUERY_PRESETS = new Set([ + "double-sends", + "retry-storms", + "cache-busting", + "ws-duplicate-frames", + "missing-ack", + "error-bursts", +]); + +export type QaStartupProbeStatus = { + label: string; + url: string; + ok: boolean; + error?: string; +}; + +export function isCaptureQueryPreset( + value: string, +): value is Parameters< + ReturnType< + typeof import("openclaw/plugin-sdk/proxy-capture").getDebugProxyCaptureStore + >["queryPreset"] +>[0] { + return CAPTURE_QUERY_PRESETS.has(value); +} + +function parseCaptureMeta(metaJson: unknown): Record | null { + if (typeof metaJson !== "string" || metaJson.trim().length === 0) { + return null; + } + try { + const parsed = JSON.parse(metaJson) as unknown; + return parsed && typeof parsed === "object" ? (parsed as Record) : null; + } catch { + return null; + } +} + +function readCaptureMetaString( + meta: Record | null, + key: string, +): string | undefined { + const value = meta?.[key]; + return typeof value === "string" && value.trim().length > 0 ? value.trim() : undefined; +} + +export function mapCaptureEventForQa(row: Record) { + const meta = parseCaptureMeta(row.metaJson); + return { + ...row, + payloadPreview: typeof row.dataText === "string" ? row.dataText : undefined, + provider: readCaptureMetaString(meta, "provider"), + api: readCaptureMetaString(meta, "api"), + model: readCaptureMetaString(meta, "model"), + captureOrigin: readCaptureMetaString(meta, "captureOrigin"), + }; +} + +function defaultPortForProtocol(protocol: string): number { + if (protocol === "https:") { + return 443; + } + if (protocol === "http:") { + return 80; + } + return 0; +} + +export async function probeTcpReachability( + rawUrl: string, + timeoutMs = 700, +): Promise { + let parsed: URL; + try { + parsed = new URL(rawUrl); + } catch { + return { + label: rawUrl, + url: rawUrl, + ok: false, + error: "invalid url", + }; + } + const host = parsed.hostname; + const port = parsed.port ? Number(parsed.port) : defaultPortForProtocol(parsed.protocol); + if (!host || !Number.isFinite(port) || port <= 0) { + return { + label: parsed.origin, + url: parsed.toString(), + ok: false, + error: "missing host or port", + }; + } + try { + await new Promise((resolve, reject) => { + const socket = net.createConnection({ host, port }); + const onError = (error: Error) => { + socket.destroy(); + reject(error); + }; + socket.setTimeout(timeoutMs, () => { + socket.destroy(new Error("timeout")); + }); + socket.once("connect", () => { + socket.end(); + resolve(); + }); + socket.once("error", onError); + socket.once("timeout", () => onError(new Error("timeout"))); + }); + return { + label: parsed.host, + url: parsed.toString(), + ok: true, + }; + } catch (error) { + return { + label: parsed.host, + url: parsed.toString(), + ok: false, + error: formatErrorMessage(error), + }; + } +} diff --git a/extensions/qa-lab/src/lab-server-ui.test.ts b/extensions/qa-lab/src/lab-server-ui.test.ts new file mode 100644 index 00000000000..47748e6d261 --- /dev/null +++ b/extensions/qa-lab/src/lab-server-ui.test.ts @@ -0,0 +1,91 @@ +import { mkdir, mkdtemp, rm, writeFile } from "node:fs/promises"; +import os from "node:os"; +import path from "node:path"; +import { afterEach, describe, expect, it } from "vitest"; +import { + detectContentType, + missingUiHtml, + resolveUiAssetVersion, + tryResolveUiAsset, +} from "./lab-server-ui.js"; + +const cleanups: Array<() => Promise> = []; + +afterEach(async () => { + while (cleanups.length > 0) { + await cleanups.pop()?.(); + } +}); + +describe("qa-lab server ui helpers", () => { + it("detects basic UI asset content types", () => { + expect(detectContentType("index.html")).toBe("text/html; charset=utf-8"); + expect(detectContentType("styles.css")).toBe("text/css; charset=utf-8"); + expect(detectContentType("main.js")).toBe("text/javascript; charset=utf-8"); + expect(detectContentType("icon.svg")).toBe("image/svg+xml"); + }); + + it("renders the missing-ui placeholder html", () => { + expect(missingUiHtml()).toContain("QA Lab UI not built"); + expect(missingUiHtml()).toContain("pnpm qa:lab:build"); + }); + + it("hashes built UI assets and changes when bundle contents change", async () => { + const uiDistDir = await mkdtemp(path.join(os.tmpdir(), "qa-lab-ui-dist-")); + cleanups.push(async () => { + await rm(uiDistDir, { recursive: true, force: true }); + }); + await writeFile( + path.join(uiDistDir, "index.html"), + "QA Lab
", + "utf8", + ); + + const version1 = resolveUiAssetVersion(uiDistDir); + expect(version1).toMatch(/^[0-9a-f]{12}$/); + + await writeFile( + path.join(uiDistDir, "index.html"), + "QA Lab Updated
", + "utf8", + ); + + const version2 = resolveUiAssetVersion(uiDistDir); + expect(version2).toMatch(/^[0-9a-f]{12}$/); + expect(version2).not.toBe(version1); + }); + + it("never resolves sibling files outside the UI dist root", async () => { + const rootDir = await mkdtemp(path.join(os.tmpdir(), "qa-lab-ui-boundary-")); + cleanups.push(async () => { + await rm(rootDir, { recursive: true, force: true }); + }); + const uiDistDir = path.join(rootDir, "dist"); + const siblingDir = path.join(rootDir, "dist-other"); + await mkdir(uiDistDir, { recursive: true }); + await mkdir(siblingDir, { recursive: true }); + await writeFile( + path.join(uiDistDir, "index.html"), + "bundle-root", + "utf8", + ); + await writeFile(path.join(siblingDir, "secret.txt"), "sibling-secret", "utf8"); + + expect(tryResolveUiAsset("/", uiDistDir, rootDir)).toBe(path.join(uiDistDir, "index.html")); + expect(tryResolveUiAsset("/../dist-other/secret.txt", uiDistDir, rootDir)).toBeNull(); + }); + + it("rejects malformed percent-encoded UI asset paths", async () => { + const uiDistDir = await mkdtemp(path.join(os.tmpdir(), "qa-lab-ui-malformed-")); + cleanups.push(async () => { + await rm(uiDistDir, { recursive: true, force: true }); + }); + await writeFile( + path.join(uiDistDir, "index.html"), + "bundle-root", + "utf8", + ); + + expect(tryResolveUiAsset("/%E0%A4", uiDistDir, uiDistDir)).toBeNull(); + }); +}); diff --git a/extensions/qa-lab/src/lab-server-ui.ts b/extensions/qa-lab/src/lab-server-ui.ts new file mode 100644 index 00000000000..53c9d110597 --- /dev/null +++ b/extensions/qa-lab/src/lab-server-ui.ts @@ -0,0 +1,288 @@ +import { createHash } from "node:crypto"; +import fs from "node:fs"; +import { request as httpRequest, type IncomingMessage, type ServerResponse } from "node:http"; +import { request as httpsRequest } from "node:https"; +import net from "node:net"; +import path from "node:path"; +import type { Duplex } from "node:stream"; +import tls from "node:tls"; +import { fileURLToPath } from "node:url"; +import { normalizeLowercaseStringOrEmpty } from "openclaw/plugin-sdk/text-runtime"; +import { writeError } from "./bus-server.js"; + +export function detectContentType(filePath: string): string { + if (filePath.endsWith(".css")) { + return "text/css; charset=utf-8"; + } + if (filePath.endsWith(".js")) { + return "text/javascript; charset=utf-8"; + } + if (filePath.endsWith(".json")) { + return "application/json; charset=utf-8"; + } + if (filePath.endsWith(".svg")) { + return "image/svg+xml"; + } + return "text/html; charset=utf-8"; +} + +export function missingUiHtml() { + return ` + + + + + QA Lab UI Missing + + + +
+

QA Lab UI not built

+

Build the private debugger bundle, then reload this page.

+

pnpm qa:lab:build

+
+ +`; +} + +export function resolveUiDistDir(overrideDir?: string | null, repoRoot = process.cwd()) { + if (overrideDir?.trim()) { + return overrideDir; + } + const candidates = [ + path.resolve(repoRoot, "extensions/qa-lab/web/dist"), + path.resolve(repoRoot, "dist/extensions/qa-lab/web/dist"), + fileURLToPath(new URL("../web/dist", import.meta.url)), + ]; + return ( + candidates.find((candidate) => { + if (!fs.existsSync(candidate)) { + return false; + } + const indexPath = path.join(candidate, "index.html"); + return fs.existsSync(indexPath) && fs.statSync(indexPath).isFile(); + }) ?? candidates[0] + ); +} + +function listUiAssetFiles(rootDir: string, currentDir = rootDir): string[] { + const entries = fs + .readdirSync(currentDir, { withFileTypes: true }) + .toSorted((left, right) => left.name.localeCompare(right.name)); + const files: string[] = []; + for (const entry of entries) { + const resolved = path.join(currentDir, entry.name); + if (entry.isDirectory()) { + files.push(...listUiAssetFiles(rootDir, resolved)); + continue; + } + if (!entry.isFile()) { + continue; + } + files.push(path.relative(rootDir, resolved)); + } + return files; +} + +export function resolveUiAssetVersion(overrideDir?: string | null): string | null { + try { + const distDir = resolveUiDistDir(overrideDir); + const indexPath = path.join(distDir, "index.html"); + if (!fs.existsSync(indexPath) || !fs.statSync(indexPath).isFile()) { + return null; + } + const hash = createHash("sha1"); + for (const relativeFile of listUiAssetFiles(distDir)) { + hash.update(relativeFile); + hash.update("\0"); + hash.update(fs.readFileSync(path.join(distDir, relativeFile))); + hash.update("\0"); + } + return hash.digest("hex").slice(0, 12); + } catch { + return null; + } +} + +export function resolveAdvertisedBaseUrl(params: { + bindHost?: string; + bindPort: number; + advertiseHost?: string; + advertisePort?: number; +}) { + const advertisedHost = + params.advertiseHost?.trim() || + (params.bindHost && params.bindHost !== "0.0.0.0" ? params.bindHost : "127.0.0.1"); + const advertisedPort = + typeof params.advertisePort === "number" && Number.isFinite(params.advertisePort) + ? params.advertisePort + : params.bindPort; + return `http://${advertisedHost}:${advertisedPort}`; +} + +export function isControlUiProxyPath(pathname: string) { + return pathname === "/control-ui" || pathname.startsWith("/control-ui/"); +} + +function rewriteControlUiProxyPath(pathname: string, search: string) { + const stripped = pathname === "/control-ui" ? "/" : pathname.slice("/control-ui".length) || "/"; + return `${stripped}${search}`; +} + +function rewriteEmbeddedControlUiHeaders( + headers: IncomingMessage["headers"], +): Record { + const rewritten: Record = { ...headers }; + delete rewritten["x-frame-options"]; + + const csp = headers["content-security-policy"]; + if (typeof csp === "string") { + rewritten["content-security-policy"] = csp.includes("frame-ancestors") + ? csp.replace(/frame-ancestors\s+[^;]+/i, "frame-ancestors 'self'") + : `${csp}; frame-ancestors 'self'`; + } + + return rewritten; +} + +export async function proxyHttpRequest(params: { + req: IncomingMessage; + res: ServerResponse; + target: URL; + pathname: string; + search: string; +}) { + const client = params.target.protocol === "https:" ? httpsRequest : httpRequest; + const upstreamReq = client( + { + protocol: params.target.protocol, + hostname: params.target.hostname, + port: params.target.port || (params.target.protocol === "https:" ? 443 : 80), + method: params.req.method, + path: rewriteControlUiProxyPath(params.pathname, params.search), + headers: { + ...params.req.headers, + host: params.target.host, + }, + }, + (upstreamRes) => { + params.res.writeHead( + upstreamRes.statusCode ?? 502, + rewriteEmbeddedControlUiHeaders(upstreamRes.headers), + ); + upstreamRes.pipe(params.res); + }, + ); + + upstreamReq.on("error", (error) => { + if (!params.res.headersSent) { + writeError(params.res, 502, error); + return; + } + params.res.destroy(error); + }); + + if (params.req.method === "GET" || params.req.method === "HEAD") { + upstreamReq.end(); + return; + } + params.req.pipe(upstreamReq); +} + +export function proxyUpgradeRequest(params: { + req: IncomingMessage; + socket: Duplex; + head: Buffer; + target: URL; +}) { + const requestUrl = new URL(params.req.url ?? "/", "http://127.0.0.1"); + const port = Number(params.target.port || (params.target.protocol === "https:" ? 443 : 80)); + const upstream = + params.target.protocol === "https:" + ? tls.connect({ + host: params.target.hostname, + port, + servername: params.target.hostname, + }) + : net.connect({ + host: params.target.hostname, + port, + }); + + const headerLines: string[] = []; + for (let index = 0; index < params.req.rawHeaders.length; index += 2) { + const name = params.req.rawHeaders[index]; + const value = params.req.rawHeaders[index + 1] ?? ""; + if (normalizeLowercaseStringOrEmpty(name) === "host") { + continue; + } + headerLines.push(`${name}: ${value}`); + } + + upstream.once("connect", () => { + const requestText = [ + `${params.req.method ?? "GET"} ${rewriteControlUiProxyPath(requestUrl.pathname, requestUrl.search)} HTTP/${params.req.httpVersion}`, + `Host: ${params.target.host}`, + ...headerLines, + "", + "", + ].join("\r\n"); + upstream.write(requestText); + if (params.head.length > 0) { + upstream.write(params.head); + } + upstream.pipe(params.socket); + params.socket.pipe(upstream); + }); + + const closeBoth = () => { + if (!params.socket.destroyed) { + params.socket.destroy(); + } + if (!upstream.destroyed) { + upstream.destroy(); + } + }; + + upstream.on("error", () => { + if (!params.socket.destroyed) { + params.socket.write("HTTP/1.1 502 Bad Gateway\r\nConnection: close\r\n\r\n"); + } + closeBoth(); + }); + params.socket.on("error", closeBoth); + params.socket.on("close", closeBoth); +} + +export function tryResolveUiAsset( + pathname: string, + overrideDir?: string | null, + repoRoot = process.cwd(), +): string | null { + const distDir = resolveUiDistDir(overrideDir, repoRoot); + if (!fs.existsSync(distDir)) { + return null; + } + const safePath = pathname === "/" ? "/index.html" : pathname; + let decoded: string; + try { + decoded = decodeURIComponent(safePath); + } catch { + return null; + } + const candidate = path.resolve(distDir, `.${decoded.startsWith("/") ? decoded : `/${decoded}`}`); + const relative = path.relative(distDir, candidate); + if (relative.startsWith("..") || path.isAbsolute(relative)) { + return null; + } + if (fs.existsSync(candidate) && fs.statSync(candidate).isFile()) { + return candidate; + } + const fallback = path.join(distDir, "index.html"); + return fs.existsSync(fallback) ? fallback : null; +} diff --git a/extensions/qa-lab/src/lab-server.test.ts b/extensions/qa-lab/src/lab-server.test.ts index 04b626a69ca..7dae0fdee02 100644 --- a/extensions/qa-lab/src/lab-server.test.ts +++ b/extensions/qa-lab/src/lab-server.test.ts @@ -271,76 +271,6 @@ describe("qa-lab server", () => { expect(await rootResponse.text()).toContain("Control UI"); }); - it("reports startup reachability for proxy and gateway", async () => { - const proxy = createServer((_req, res) => { - res.writeHead(200, { "content-type": "text/plain; charset=utf-8" }); - res.end("proxy"); - }); - await new Promise((resolve, reject) => { - proxy.once("error", reject); - proxy.listen(0, "127.0.0.1", () => resolve()); - }); - cleanups.push( - async () => - await new Promise((resolve, reject) => - proxy.close((error) => (error ? reject(error) : resolve())), - ), - ); - - const gateway = createServer((_req, res) => { - res.writeHead(200, { "content-type": "text/plain; charset=utf-8" }); - res.end("gateway"); - }); - await new Promise((resolve, reject) => { - gateway.once("error", reject); - gateway.listen(0, "127.0.0.1", () => resolve()); - }); - cleanups.push( - async () => - await new Promise((resolve, reject) => - gateway.close((error) => (error ? reject(error) : resolve())), - ), - ); - - const proxyAddress = proxy.address(); - const gatewayAddress = gateway.address(); - if ( - !proxyAddress || - typeof proxyAddress === "string" || - !gatewayAddress || - typeof gatewayAddress === "string" - ) { - throw new Error("expected startup probe addresses"); - } - - process.env.OPENCLAW_DEBUG_PROXY_URL = `http://127.0.0.1:${proxyAddress.port}`; - const lab = await startQaLabServer({ - host: "127.0.0.1", - port: 0, - controlUiUrl: `http://127.0.0.1:${gatewayAddress.port}/`, - }); - cleanups.push(async () => { - delete process.env.OPENCLAW_DEBUG_PROXY_URL; - await lab.stop(); - }); - - const response = await fetchWithRetry(`${lab.baseUrl}/api/capture/startup-status`); - expect(response.status).toBe(200); - const payload = (await response.json()) as { - status: { - proxy: { ok: boolean; url: string }; - gateway: { ok: boolean; url: string }; - qaLab: { ok: boolean; url: string }; - }; - }; - expect(payload.status.proxy.ok).toBe(true); - expect(payload.status.proxy.url).toBe(`http://127.0.0.1:${proxyAddress.port}/`); - expect(payload.status.gateway.ok).toBe(true); - expect(payload.status.gateway.url).toBe(`http://127.0.0.1:${gatewayAddress.port}/`); - expect(payload.status.qaLab.ok).toBe(true); - expect(payload.status.qaLab.url).toBe(lab.baseUrl); - }); - it("serves the built QA UI bundle when available", async () => { const uiDistDir = await mkdtemp(path.join(os.tmpdir(), "qa-lab-ui-dist-")); cleanups.push(async () => { @@ -366,55 +296,6 @@ describe("qa-lab server", () => { const html = await rootResponse.text(); expect(html).not.toContain("QA Lab UI not built"); expect(html).toContain(""); - - const version1 = (await (await fetch(`${lab.baseUrl}/api/ui-version`)).json()) as { - version: string | null; - }; - expect(version1.version).toMatch(/^[0-9a-f]{12}$/); - - await writeFile( - path.join(uiDistDir, "index.html"), - "<!doctype html><html><head><title>QA Lab Updated
", - "utf8", - ); - - const version2 = (await (await fetch(`${lab.baseUrl}/api/ui-version`)).json()) as { - version: string | null; - }; - expect(version2.version).toMatch(/^[0-9a-f]{12}$/); - expect(version2.version).not.toBe(version1.version); - }); - - it("does not serve sibling files outside the UI dist root", async () => { - const rootDir = await mkdtemp(path.join(os.tmpdir(), "qa-lab-ui-boundary-")); - cleanups.push(async () => { - await rm(rootDir, { recursive: true, force: true }); - }); - const uiDistDir = path.join(rootDir, "dist"); - const siblingDir = path.join(rootDir, "dist-other"); - await mkdir(uiDistDir, { recursive: true }); - await mkdir(siblingDir, { recursive: true }); - await writeFile( - path.join(uiDistDir, "index.html"), - "bundle-root", - "utf8", - ); - await writeFile(path.join(siblingDir, "secret.txt"), "sibling-secret", "utf8"); - - const lab = await startQaLabServer({ - host: "127.0.0.1", - port: 0, - uiDistDir, - }); - cleanups.push(async () => { - await lab.stop(); - }); - - const response = await fetchWithRetry(`${lab.baseUrl}/../dist-other/secret.txt`); - expect(response.status).toBe(200); - const body = await response.text(); - expect(body).toContain("bundle-root"); - expect(body).not.toContain("sibling-secret"); }); it("uses the explicit repo root for ui assets and runner model discovery", async () => { diff --git a/extensions/qa-lab/src/lab-server.ts b/extensions/qa-lab/src/lab-server.ts index 5ee07a417c0..26fbbaa2bab 100644 --- a/extensions/qa-lab/src/lab-server.ts +++ b/extensions/qa-lab/src/lab-server.ts @@ -1,26 +1,29 @@ -import { createHash } from "node:crypto"; import fs from "node:fs"; -import { - createServer, - request as httpRequest, - type IncomingMessage, - type ServerResponse, -} from "node:http"; -import { request as httpsRequest } from "node:https"; -import net from "node:net"; +import { createServer, type IncomingMessage } from "node:http"; import path from "node:path"; -import type { Duplex } from "node:stream"; -import tls from "node:tls"; -import { fileURLToPath } from "node:url"; import { formatErrorMessage } from "openclaw/plugin-sdk/error-runtime"; import { getDebugProxyCaptureStore, resolveDebugProxySettings, } from "openclaw/plugin-sdk/proxy-capture"; -import { normalizeLowercaseStringOrEmpty } from "openclaw/plugin-sdk/text-runtime"; import { closeQaHttpServer, handleQaBusRequest, writeError, writeJson } from "./bus-server.js"; import { createQaBusState, type QaBusState } from "./bus-state.js"; import { createQaRunnerRuntime } from "./harness-runtime.js"; +import { + isCaptureQueryPreset, + mapCaptureEventForQa, + probeTcpReachability, +} from "./lab-server-capture.js"; +import { + detectContentType, + isControlUiProxyPath, + missingUiHtml, + proxyHttpRequest, + proxyUpgradeRequest, + resolveAdvertisedBaseUrl, + resolveUiAssetVersion, + tryResolveUiAsset, +} from "./lab-server-ui.js"; import type { QaLabLatestReport, QaLabScenarioOutcome, @@ -39,21 +42,6 @@ import { qaChannelPlugin, setQaChannelRuntime, type OpenClawConfig } from "./run import { readQaBootstrapScenarioCatalog } from "./scenario-catalog.js"; import { runQaSelfCheckAgainstState, type QaSelfCheckResult } from "./self-check.js"; -const CAPTURE_QUERY_PRESETS = new Set([ - "double-sends", - "retry-storms", - "cache-busting", - "ws-duplicate-frames", - "missing-ack", - "error-bursts", -]); - -function isCaptureQueryPreset( - value: string, -): value is Parameters["queryPreset"]>[0] { - return CAPTURE_QUERY_PRESETS.has(value); -} - type QaLabBootstrapDefaults = { conversationKind: "direct" | "channel"; conversationId: string; @@ -69,112 +57,6 @@ export type { QaLabServerStartParams, } from "./lab-server.types.js"; -function parseCaptureMeta(metaJson: unknown): Record | null { - if (typeof metaJson !== "string" || metaJson.trim().length === 0) { - return null; - } - try { - const parsed = JSON.parse(metaJson) as unknown; - return parsed && typeof parsed === "object" ? (parsed as Record) : null; - } catch { - return null; - } -} - -function readCaptureMetaString( - meta: Record | null, - key: string, -): string | undefined { - const value = meta?.[key]; - return typeof value === "string" && value.trim().length > 0 ? value.trim() : undefined; -} - -function mapCaptureEventForQa(row: Record) { - const meta = parseCaptureMeta(row.metaJson); - return { - ...row, - payloadPreview: typeof row.dataText === "string" ? row.dataText : undefined, - provider: readCaptureMetaString(meta, "provider"), - api: readCaptureMetaString(meta, "api"), - model: readCaptureMetaString(meta, "model"), - captureOrigin: readCaptureMetaString(meta, "captureOrigin"), - }; -} - -type QaStartupProbeStatus = { - label: string; - url: string; - ok: boolean; - error?: string; -}; - -function defaultPortForProtocol(protocol: string): number { - if (protocol === "https:") { - return 443; - } - if (protocol === "http:") { - return 80; - } - return 0; -} - -async function probeTcpReachability( - rawUrl: string, - timeoutMs = 700, -): Promise { - let parsed: URL; - try { - parsed = new URL(rawUrl); - } catch { - return { - label: rawUrl, - url: rawUrl, - ok: false, - error: "invalid url", - }; - } - const host = parsed.hostname; - const port = parsed.port ? Number(parsed.port) : defaultPortForProtocol(parsed.protocol); - if (!host || !Number.isFinite(port) || port <= 0) { - return { - label: parsed.origin, - url: parsed.toString(), - ok: false, - error: "missing host or port", - }; - } - try { - await new Promise((resolve, reject) => { - const socket = net.createConnection({ host, port }); - const onError = (error: Error) => { - socket.destroy(); - reject(error); - }; - socket.setTimeout(timeoutMs, () => { - socket.destroy(new Error("timeout")); - }); - socket.once("connect", () => { - socket.end(); - resolve(); - }); - socket.once("error", onError); - socket.once("timeout", () => onError(new Error("timeout"))); - }); - return { - label: parsed.host, - url: parsed.toString(), - ok: true, - }; - } catch (error) { - return { - label: parsed.host, - url: parsed.toString(), - ok: false, - error: formatErrorMessage(error), - }; - } -} - function countQaLabScenarioRun(scenarios: QaLabScenarioOutcome[]) { return { total: scenarios.length, @@ -221,121 +103,6 @@ async function readJson(req: IncomingMessage): Promise { return text ? (JSON.parse(text) as unknown) : {}; } -function detectContentType(filePath: string): string { - if (filePath.endsWith(".css")) { - return "text/css; charset=utf-8"; - } - if (filePath.endsWith(".js")) { - return "text/javascript; charset=utf-8"; - } - if (filePath.endsWith(".json")) { - return "application/json; charset=utf-8"; - } - if (filePath.endsWith(".svg")) { - return "image/svg+xml"; - } - return "text/html; charset=utf-8"; -} - -function missingUiHtml() { - return ` - - - - - QA Lab UI Missing - - - -
-

QA Lab UI not built

-

Build the private debugger bundle, then reload this page.

-

pnpm qa:lab:build

-
- -`; -} - -function resolveUiDistDir(overrideDir?: string | null, repoRoot = process.cwd()) { - if (overrideDir?.trim()) { - return overrideDir; - } - const candidates = [ - path.resolve(repoRoot, "extensions/qa-lab/web/dist"), - path.resolve(repoRoot, "dist/extensions/qa-lab/web/dist"), - fileURLToPath(new URL("../web/dist", import.meta.url)), - ]; - return ( - candidates.find((candidate) => { - if (!fs.existsSync(candidate)) { - return false; - } - const indexPath = path.join(candidate, "index.html"); - return fs.existsSync(indexPath) && fs.statSync(indexPath).isFile(); - }) ?? candidates[0] - ); -} - -function listUiAssetFiles(rootDir: string, currentDir = rootDir): string[] { - const entries = fs - .readdirSync(currentDir, { withFileTypes: true }) - .toSorted((left, right) => left.name.localeCompare(right.name)); - const files: string[] = []; - for (const entry of entries) { - const resolved = path.join(currentDir, entry.name); - if (entry.isDirectory()) { - files.push(...listUiAssetFiles(rootDir, resolved)); - continue; - } - if (!entry.isFile()) { - continue; - } - files.push(path.relative(rootDir, resolved)); - } - return files; -} - -function resolveUiAssetVersion(overrideDir?: string | null): string | null { - try { - const distDir = resolveUiDistDir(overrideDir); - const indexPath = path.join(distDir, "index.html"); - if (!fs.existsSync(indexPath) || !fs.statSync(indexPath).isFile()) { - return null; - } - const hash = createHash("sha1"); - for (const relativeFile of listUiAssetFiles(distDir)) { - hash.update(relativeFile); - hash.update("\0"); - hash.update(fs.readFileSync(path.join(distDir, relativeFile))); - hash.update("\0"); - } - return hash.digest("hex").slice(0, 12); - } catch { - return null; - } -} - -function resolveAdvertisedBaseUrl(params: { - bindHost?: string; - bindPort: number; - advertiseHost?: string; - advertisePort?: number; -}) { - const advertisedHost = - params.advertiseHost?.trim() || - (params.bindHost && params.bindHost !== "0.0.0.0" ? params.bindHost : "127.0.0.1"); - const advertisedPort = - typeof params.advertisePort === "number" && Number.isFinite(params.advertisePort) - ? params.advertisePort - : params.bindPort; - return `http://${advertisedHost}:${advertisedPort}`; -} - function createBootstrapDefaults(autoKickoffTarget?: string): QaLabBootstrapDefaults { if (autoKickoffTarget === "channel") { return { @@ -353,163 +120,6 @@ function createBootstrapDefaults(autoKickoffTarget?: string): QaLabBootstrapDefa }; } -function isControlUiProxyPath(pathname: string) { - return pathname === "/control-ui" || pathname.startsWith("/control-ui/"); -} - -function rewriteControlUiProxyPath(pathname: string, search: string) { - const stripped = pathname === "/control-ui" ? "/" : pathname.slice("/control-ui".length) || "/"; - return `${stripped}${search}`; -} - -function rewriteEmbeddedControlUiHeaders( - headers: IncomingMessage["headers"], -): Record { - const rewritten: Record = { ...headers }; - delete rewritten["x-frame-options"]; - - const csp = headers["content-security-policy"]; - if (typeof csp === "string") { - rewritten["content-security-policy"] = csp.includes("frame-ancestors") - ? csp.replace(/frame-ancestors\s+[^;]+/i, "frame-ancestors 'self'") - : `${csp}; frame-ancestors 'self'`; - } - - return rewritten; -} - -async function proxyHttpRequest(params: { - req: IncomingMessage; - res: ServerResponse; - target: URL; - pathname: string; - search: string; -}) { - const client = params.target.protocol === "https:" ? httpsRequest : httpRequest; - const upstreamReq = client( - { - protocol: params.target.protocol, - hostname: params.target.hostname, - port: params.target.port || (params.target.protocol === "https:" ? 443 : 80), - method: params.req.method, - path: rewriteControlUiProxyPath(params.pathname, params.search), - headers: { - ...params.req.headers, - host: params.target.host, - }, - }, - (upstreamRes) => { - params.res.writeHead( - upstreamRes.statusCode ?? 502, - rewriteEmbeddedControlUiHeaders(upstreamRes.headers), - ); - upstreamRes.pipe(params.res); - }, - ); - - upstreamReq.on("error", (error) => { - if (!params.res.headersSent) { - writeError(params.res, 502, error); - return; - } - params.res.destroy(error); - }); - - if (params.req.method === "GET" || params.req.method === "HEAD") { - upstreamReq.end(); - return; - } - params.req.pipe(upstreamReq); -} - -function proxyUpgradeRequest(params: { - req: IncomingMessage; - socket: Duplex; - head: Buffer; - target: URL; -}) { - const requestUrl = new URL(params.req.url ?? "/", "http://127.0.0.1"); - const port = Number(params.target.port || (params.target.protocol === "https:" ? 443 : 80)); - const upstream = - params.target.protocol === "https:" - ? tls.connect({ - host: params.target.hostname, - port, - servername: params.target.hostname, - }) - : net.connect({ - host: params.target.hostname, - port, - }); - - const headerLines: string[] = []; - for (let index = 0; index < params.req.rawHeaders.length; index += 2) { - const name = params.req.rawHeaders[index]; - const value = params.req.rawHeaders[index + 1] ?? ""; - if (normalizeLowercaseStringOrEmpty(name) === "host") { - continue; - } - headerLines.push(`${name}: ${value}`); - } - - upstream.once("connect", () => { - const requestText = [ - `${params.req.method ?? "GET"} ${rewriteControlUiProxyPath(requestUrl.pathname, requestUrl.search)} HTTP/${params.req.httpVersion}`, - `Host: ${params.target.host}`, - ...headerLines, - "", - "", - ].join("\r\n"); - upstream.write(requestText); - if (params.head.length > 0) { - upstream.write(params.head); - } - upstream.pipe(params.socket); - params.socket.pipe(upstream); - }); - - const closeBoth = () => { - if (!params.socket.destroyed) { - params.socket.destroy(); - } - if (!upstream.destroyed) { - upstream.destroy(); - } - }; - - upstream.on("error", () => { - if (!params.socket.destroyed) { - params.socket.write("HTTP/1.1 502 Bad Gateway\r\nConnection: close\r\n\r\n"); - } - closeBoth(); - }); - params.socket.on("error", closeBoth); - params.socket.on("close", closeBoth); -} - -function tryResolveUiAsset( - pathname: string, - overrideDir?: string | null, - repoRoot = process.cwd(), -): string | null { - const distDir = resolveUiDistDir(overrideDir, repoRoot); - if (!fs.existsSync(distDir)) { - return null; - } - const safePath = pathname === "/" ? "/index.html" : pathname; - const decoded = decodeURIComponent(safePath); - const candidate = path.resolve(distDir, `.${decoded.startsWith("/") ? decoded : `/${decoded}`}`); - const relative = path.relative(distDir, candidate); - if (relative.startsWith("..") || path.isAbsolute(relative)) { - return null; - } - if (fs.existsSync(candidate) && fs.statSync(candidate).isFile()) { - return candidate; - } - const fallback = path.join(distDir, "index.html"); - return fs.existsSync(fallback) ? fallback : null; -} - function createQaLabConfig(baseUrl: string): OpenClawConfig { return createQaChannelGatewayConfig({ baseUrl }); } diff --git a/extensions/qa-lab/src/suite-planning.test.ts b/extensions/qa-lab/src/suite-planning.test.ts new file mode 100644 index 00000000000..15ca4a1e7a6 --- /dev/null +++ b/extensions/qa-lab/src/suite-planning.test.ts @@ -0,0 +1,253 @@ +import { lstat, mkdir, mkdtemp, rm, symlink } from "node:fs/promises"; +import os from "node:os"; +import path from "node:path"; +import { describe, expect, it } from "vitest"; +import { defaultQaSuiteConcurrencyForTransport } from "./qa-transport-registry.js"; +import { + collectQaSuiteGatewayConfigPatch, + collectQaSuiteGatewayRuntimeOptions, + collectQaSuitePluginIds, + mapQaSuiteWithConcurrency, + normalizeQaSuiteConcurrency, + resolveQaSuiteOutputDir, + scenarioRequiresControlUi, + selectQaSuiteScenarios, +} from "./suite-planning.js"; +import { makeQaSuiteTestScenario } from "./suite-test-helpers.js"; + +describe("qa suite planning helpers", () => { + it("normalizes suite concurrency to a bounded integer", () => { + const previous = process.env.OPENCLAW_QA_SUITE_CONCURRENCY; + delete process.env.OPENCLAW_QA_SUITE_CONCURRENCY; + try { + expect(normalizeQaSuiteConcurrency(undefined, 10)).toBe(10); + expect(normalizeQaSuiteConcurrency(undefined, 80)).toBe(64); + expect( + normalizeQaSuiteConcurrency( + undefined, + 80, + defaultQaSuiteConcurrencyForTransport("qa-channel"), + ), + ).toBe(4); + expect(normalizeQaSuiteConcurrency(2.8, 10)).toBe(2); + expect(normalizeQaSuiteConcurrency(20, 3)).toBe(3); + expect(normalizeQaSuiteConcurrency(0, 3)).toBe(1); + } finally { + if (previous === undefined) { + delete process.env.OPENCLAW_QA_SUITE_CONCURRENCY; + } else { + process.env.OPENCLAW_QA_SUITE_CONCURRENCY = previous; + } + } + }); + + it("keeps programmatic suite output dirs within the repo root", async () => { + const repoRoot = await mkdtemp(path.join(os.tmpdir(), "qa-suite-existing-root-")); + try { + await expect( + resolveQaSuiteOutputDir(repoRoot, path.join(repoRoot, ".artifacts", "qa-e2e", "custom")), + ).resolves.toBe(path.join(repoRoot, ".artifacts", "qa-e2e", "custom")); + await expect( + lstat(path.join(repoRoot, ".artifacts", "qa-e2e", "custom")).then((stats) => + stats.isDirectory(), + ), + ).resolves.toBe(true); + await expect(resolveQaSuiteOutputDir(repoRoot, "/tmp/outside")).rejects.toThrow( + "QA suite outputDir must stay within the repo root.", + ); + } finally { + await rm(repoRoot, { recursive: true, force: true }); + } + }); + + it("rejects symlinked suite output dirs that escape the repo root", async () => { + const repoRoot = await mkdtemp(path.join(os.tmpdir(), "qa-suite-root-")); + const outsideRoot = await mkdtemp(path.join(os.tmpdir(), "qa-suite-outside-")); + try { + await mkdir(path.join(repoRoot, ".artifacts"), { recursive: true }); + await symlink(outsideRoot, path.join(repoRoot, ".artifacts", "qa-e2e"), "dir"); + + await expect(resolveQaSuiteOutputDir(repoRoot, ".artifacts/qa-e2e/custom")).rejects.toThrow( + "QA suite outputDir must not traverse symlinks.", + ); + } finally { + await rm(repoRoot, { recursive: true, force: true }); + await rm(outsideRoot, { recursive: true, force: true }); + } + }); + + it("maps suite work with bounded concurrency while preserving order", async () => { + let active = 0; + let maxActive = 0; + const result = await mapQaSuiteWithConcurrency([1, 2, 3, 4], 2, async (item) => { + active += 1; + maxActive = Math.max(maxActive, active); + await new Promise((resolve) => setTimeout(resolve, 10)); + active -= 1; + return item * 10; + }); + + expect(maxActive).toBe(2); + expect(result).toEqual([10, 20, 30, 40]); + }); + + it("keeps explicitly requested provider-specific scenarios", () => { + const scenarios = [ + makeQaSuiteTestScenario("generic"), + makeQaSuiteTestScenario("anthropic-only", { + config: { + requiredProvider: "anthropic", + requiredModel: "claude-opus-4-6", + }, + }), + ]; + + expect( + selectQaSuiteScenarios({ + scenarios, + scenarioIds: ["anthropic-only"], + providerMode: "live-frontier", + primaryModel: "openai/gpt-5.4", + }).map((scenario) => scenario.id), + ).toEqual(["anthropic-only"]); + }); + + it("collects unique scenario-declared bundled plugins in encounter order", () => { + const scenarios = [ + makeQaSuiteTestScenario("generic", { plugins: ["active-memory", "memory-wiki"] }), + makeQaSuiteTestScenario("other", { plugins: ["memory-wiki", "openai"] }), + makeQaSuiteTestScenario("plain"), + ]; + + expect(collectQaSuitePluginIds(scenarios)).toEqual(["active-memory", "memory-wiki", "openai"]); + }); + + it("merge-patches scenario startup config in encounter order", () => { + const scenarios = [ + makeQaSuiteTestScenario("active-memory", { + plugins: ["active-memory"], + gatewayConfigPatch: { + plugins: { + entries: { + "active-memory": { + config: { + enabled: true, + agents: ["qa"], + }, + }, + }, + }, + }, + }), + makeQaSuiteTestScenario("live-defaults", { + gatewayConfigPatch: { + agents: { + defaults: { + thinkingDefault: "minimal", + }, + }, + plugins: { + entries: { + "active-memory": { + config: { + transcriptDir: "qa-memory-e2e", + }, + }, + }, + }, + }, + }), + ]; + + expect(collectQaSuiteGatewayConfigPatch(scenarios)).toEqual({ + agents: { + defaults: { + thinkingDefault: "minimal", + }, + }, + plugins: { + entries: { + "active-memory": { + config: { + enabled: true, + agents: ["qa"], + transcriptDir: "qa-memory-e2e", + }, + }, + }, + }, + }); + }); + + it("ignores prototype-mutating keys in scenario startup config patches", () => { + const scenarios = [ + makeQaSuiteTestScenario("polluted", { + gatewayConfigPatch: JSON.parse( + `{"plugins":{"entries":{}},"__proto__":{"polluted":true},"constructor":{"prototype":{"polluted":true}}}`, + ) as Record, + }), + ]; + + const patch = collectQaSuiteGatewayConfigPatch(scenarios); + + expect(patch).toEqual({ plugins: { entries: {} } }); + expect(({} as { polluted?: boolean }).polluted).toBeUndefined(); + }); + + it("collects gateway runtime options across selected scenarios", () => { + const scenarios = [ + makeQaSuiteTestScenario("plain"), + makeQaSuiteTestScenario("browser-ui", { + plugins: ["browser"], + gatewayRuntime: { forwardHostHome: true }, + }), + ]; + + expect(collectQaSuiteGatewayRuntimeOptions(scenarios)).toEqual({ + forwardHostHome: true, + }); + }); + + it("enables Control UI only for Control UI scenario workers", () => { + expect( + scenarioRequiresControlUi( + makeQaSuiteTestScenario("control-ui", { + surface: "control-ui", + }), + ), + ).toBe(true); + expect(scenarioRequiresControlUi(makeQaSuiteTestScenario("plain"))).toBe(false); + }); + + it("filters provider-specific scenarios from an implicit live lane", () => { + const scenarios = [ + makeQaSuiteTestScenario("generic"), + makeQaSuiteTestScenario("openai-only", { + config: { requiredProvider: "openai", requiredModel: "gpt-5.4" }, + }), + makeQaSuiteTestScenario("anthropic-only", { + config: { requiredProvider: "anthropic", requiredModel: "claude-opus-4-6" }, + }), + makeQaSuiteTestScenario("claude-subscription", { + config: { requiredProvider: "claude-cli", authMode: "subscription" }, + }), + ]; + + expect( + selectQaSuiteScenarios({ + scenarios, + providerMode: "live-frontier", + primaryModel: "openai/gpt-5.4", + }).map((scenario) => scenario.id), + ).toEqual(["generic", "openai-only"]); + + expect( + selectQaSuiteScenarios({ + scenarios, + providerMode: "live-frontier", + primaryModel: "claude-cli/claude-sonnet-4-6", + claudeCliAuthMode: "subscription", + }).map((scenario) => scenario.id), + ).toEqual(["generic", "claude-subscription"]); + }); +}); diff --git a/extensions/qa-lab/src/suite-planning.ts b/extensions/qa-lab/src/suite-planning.ts new file mode 100644 index 00000000000..1872a96c492 --- /dev/null +++ b/extensions/qa-lab/src/suite-planning.ts @@ -0,0 +1,222 @@ +import path from "node:path"; +import { normalizeLowercaseStringOrEmpty } from "openclaw/plugin-sdk/text-runtime"; +import { ensureRepoBoundDirectory, resolveRepoRelativeOutputDir } from "./cli-paths.js"; +import type { QaCliBackendAuthMode } from "./gateway-child.js"; +import type { QaTransportId } from "./qa-transport-registry.js"; +import { readQaBootstrapScenarioCatalog } from "./scenario-catalog.js"; + +const DEFAULT_QA_SUITE_CONCURRENCY = 64; +const QA_MERGE_PATCH_BLOCKED_KEYS = new Set(["__proto__", "constructor", "prototype"]); + +type QaSeedScenario = ReturnType["scenarios"][number]; + +function splitModelRef(ref: string) { + const slash = ref.indexOf("/"); + if (slash <= 0 || slash === ref.length - 1) { + return null; + } + return { + provider: ref.slice(0, slash), + model: ref.slice(slash + 1), + }; +} + +function normalizeQaConfigString(value: unknown): string | undefined { + return typeof value === "string" && value.trim() ? value.trim() : undefined; +} + +function scenarioMatchesLiveLane(params: { + scenario: QaSeedScenario; + primaryModel: string; + providerMode: "mock-openai" | "live-frontier"; + claudeCliAuthMode?: QaCliBackendAuthMode; +}) { + if (params.providerMode !== "live-frontier") { + return true; + } + const selected = splitModelRef(params.primaryModel); + const config = params.scenario.execution.config ?? {}; + const requiredProvider = normalizeQaConfigString(config.requiredProvider); + if (requiredProvider && selected?.provider !== requiredProvider) { + return false; + } + const requiredModel = normalizeQaConfigString(config.requiredModel); + if (requiredModel && selected?.model !== requiredModel) { + return false; + } + const requiredAuthMode = normalizeQaConfigString(config.authMode); + if (requiredAuthMode && params.claudeCliAuthMode !== requiredAuthMode) { + return false; + } + return true; +} + +function selectQaSuiteScenarios(params: { + scenarios: ReturnType["scenarios"]; + scenarioIds?: string[]; + providerMode: "mock-openai" | "live-frontier"; + primaryModel: string; + claudeCliAuthMode?: QaCliBackendAuthMode; +}) { + const requestedScenarioIds = + params.scenarioIds && params.scenarioIds.length > 0 ? new Set(params.scenarioIds) : null; + const requestedScenarios = requestedScenarioIds + ? params.scenarios.filter((scenario) => requestedScenarioIds.has(scenario.id)) + : params.scenarios; + if (requestedScenarioIds) { + const foundScenarioIds = new Set(requestedScenarios.map((scenario) => scenario.id)); + const missingScenarioIds = [...requestedScenarioIds].filter( + (scenarioId) => !foundScenarioIds.has(scenarioId), + ); + if (missingScenarioIds.length > 0) { + throw new Error(`unknown QA scenario id(s): ${missingScenarioIds.join(", ")}`); + } + return requestedScenarios; + } + return requestedScenarios.filter((scenario) => + scenarioMatchesLiveLane({ + scenario, + providerMode: params.providerMode, + primaryModel: params.primaryModel, + claudeCliAuthMode: params.claudeCliAuthMode, + }), + ); +} + +function collectQaSuitePluginIds( + scenarios: ReturnType["scenarios"], +) { + return [ + ...new Set( + scenarios.flatMap((scenario) => + Array.isArray(scenario.plugins) + ? scenario.plugins + .map((pluginId) => pluginId.trim()) + .filter((pluginId) => pluginId.length > 0) + : [], + ), + ), + ]; +} + +function isQaPlainObject(value: unknown): value is Record { + return value !== null && typeof value === "object" && !Array.isArray(value); +} + +function applyQaMergePatch(base: unknown, patch: unknown): unknown { + if (!isQaPlainObject(patch)) { + return patch; + } + const result = isQaPlainObject(base) ? { ...base } : {}; + for (const [key, value] of Object.entries(patch)) { + if (QA_MERGE_PATCH_BLOCKED_KEYS.has(key)) { + continue; + } + if (value === null) { + delete result[key]; + continue; + } + result[key] = isQaPlainObject(value) ? applyQaMergePatch(result[key], value) : value; + } + return result; +} + +function collectQaSuiteGatewayConfigPatch( + scenarios: ReturnType["scenarios"], +): Record | undefined { + let merged: Record | undefined; + for (const scenario of scenarios) { + if (!isQaPlainObject(scenario.gatewayConfigPatch)) { + continue; + } + merged = applyQaMergePatch(merged ?? {}, scenario.gatewayConfigPatch) as Record< + string, + unknown + >; + } + return merged; +} + +function collectQaSuiteGatewayRuntimeOptions( + scenarios: ReturnType["scenarios"], +) { + let forwardHostHome = false; + for (const scenario of scenarios) { + if (scenario.gatewayRuntime?.forwardHostHome === true) { + forwardHostHome = true; + } + } + return forwardHostHome ? { forwardHostHome: true } : undefined; +} + +function scenarioRequiresControlUi(scenario: QaSeedScenario) { + return normalizeLowercaseStringOrEmpty(scenario.surface) === "control-ui"; +} + +function normalizeQaSuiteConcurrency( + value: number | undefined, + scenarioCount: number, + defaultConcurrency = DEFAULT_QA_SUITE_CONCURRENCY, +) { + const envValue = Number(process.env.OPENCLAW_QA_SUITE_CONCURRENCY); + const raw = + typeof value === "number" && Number.isFinite(value) + ? value + : Number.isFinite(envValue) + ? envValue + : defaultConcurrency; + return Math.max(1, Math.min(Math.floor(raw), Math.max(1, scenarioCount))); +} + +async function mapQaSuiteWithConcurrency( + items: readonly T[], + concurrency: number, + mapper: (item: T, index: number) => Promise, +) { + const results = Array.from({ length: items.length }); + let nextIndex = 0; + const workerCount = Math.min(Math.max(1, Math.floor(concurrency)), items.length); + const workers = Array.from({ length: workerCount }, async () => { + while (nextIndex < items.length) { + const index = nextIndex; + nextIndex += 1; + results[index] = await mapper(items[index], index); + } + }); + await Promise.all(workers); + return results; +} + +async function resolveQaSuiteOutputDir(repoRoot: string, outputDir?: string) { + const targetDir = !outputDir + ? path.join(repoRoot, ".artifacts", "qa-e2e", `suite-${Date.now().toString(36)}`) + : outputDir; + if (!path.isAbsolute(targetDir)) { + const resolved = resolveRepoRelativeOutputDir(repoRoot, targetDir); + if (!resolved) { + throw new Error("QA suite outputDir must be set."); + } + return await ensureRepoBoundDirectory(repoRoot, resolved, "QA suite outputDir", { + mode: 0o700, + }); + } + return await ensureRepoBoundDirectory(repoRoot, targetDir, "QA suite outputDir", { + mode: 0o700, + }); +} + +export { + applyQaMergePatch, + collectQaSuiteGatewayConfigPatch, + collectQaSuiteGatewayRuntimeOptions, + collectQaSuitePluginIds, + mapQaSuiteWithConcurrency, + normalizeQaSuiteConcurrency, + resolveQaSuiteOutputDir, + scenarioMatchesLiveLane, + scenarioRequiresControlUi, + selectQaSuiteScenarios, + splitModelRef, +}; + +export type { QaTransportId }; diff --git a/extensions/qa-lab/src/suite-runtime-agent-common.ts b/extensions/qa-lab/src/suite-runtime-agent-common.ts new file mode 100644 index 00000000000..a4494200823 --- /dev/null +++ b/extensions/qa-lab/src/suite-runtime-agent-common.ts @@ -0,0 +1,14 @@ +import { resolveQaLiveTurnTimeoutMs } from "./live-timeout.js"; + +type QaLiveTimeoutEnv = { + providerMode: "mock-openai" | "live-frontier"; + primaryModel: string; + alternateModel: string; +}; + +function liveTurnTimeoutMs(env: QaLiveTimeoutEnv, fallbackMs: number) { + return resolveQaLiveTurnTimeoutMs(env, fallbackMs); +} + +export { liveTurnTimeoutMs }; +export type { QaLiveTimeoutEnv }; diff --git a/extensions/qa-lab/src/suite-runtime-agent-media.test.ts b/extensions/qa-lab/src/suite-runtime-agent-media.test.ts new file mode 100644 index 00000000000..8cd4c2ae5d4 --- /dev/null +++ b/extensions/qa-lab/src/suite-runtime-agent-media.test.ts @@ -0,0 +1,113 @@ +import fs from "node:fs/promises"; +import os from "node:os"; +import path from "node:path"; +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; + +const fetchJsonMock = vi.hoisted(() => vi.fn()); +const patchConfigMock = vi.hoisted(() => vi.fn(async () => undefined)); +const waitForGatewayHealthyMock = vi.hoisted(() => vi.fn(async () => undefined)); +const waitForTransportReadyMock = vi.hoisted(() => vi.fn(async () => undefined)); + +vi.mock("./suite-runtime-gateway.js", () => ({ + fetchJson: fetchJsonMock, + patchConfig: patchConfigMock, + waitForGatewayHealthy: waitForGatewayHealthyMock, + waitForTransportReady: waitForTransportReadyMock, +})); + +import { + ensureImageGenerationConfigured, + extractMediaPathFromText, + resolveGeneratedImagePath, +} from "./suite-runtime-agent-media.js"; + +const tempDirs: string[] = []; + +async function makeTempDir(prefix: string) { + const dir = await fs.mkdtemp(path.join(os.tmpdir(), prefix)); + tempDirs.push(dir); + return dir; +} + +afterEach(async () => { + await Promise.all(tempDirs.splice(0).map((dir) => fs.rm(dir, { recursive: true, force: true }))); +}); + +describe("qa suite runtime agent media helpers", () => { + beforeEach(() => { + fetchJsonMock.mockReset(); + patchConfigMock.mockClear(); + waitForGatewayHealthyMock.mockClear(); + waitForTransportReadyMock.mockClear(); + }); + + it("extracts media paths from tool output text", () => { + expect(extractMediaPathFromText("done\nMEDIA:/tmp/image.png")).toBe("/tmp/image.png"); + expect(extractMediaPathFromText("done")).toBeUndefined(); + }); + + it("resolves generated image paths from mock request logs first", async () => { + fetchJsonMock.mockResolvedValue([ + { + allInputText: "irrelevant", + toolOutput: "MEDIA:/tmp/other.png", + }, + { + allInputText: "prompt snippet", + toolOutput: "done\nMEDIA:/tmp/generated.png", + }, + ]); + + await expect( + resolveGeneratedImagePath({ + env: { + mock: { baseUrl: "http://127.0.0.1:9999" }, + gateway: { tempRoot: "/tmp/runtime" }, + } as never, + promptSnippet: "prompt snippet", + startedAtMs: Date.now(), + timeoutMs: 2_000, + }), + ).resolves.toBe("/tmp/generated.png"); + }); + + it("falls back to generated image files under the gateway temp root", async () => { + const tempRoot = await makeTempDir("qa-generated-image-"); + const mediaDir = path.join(tempRoot, "state", "media", "tool-image-generation"); + await fs.mkdir(mediaDir, { recursive: true }); + const mediaPath = path.join(mediaDir, "generated.png"); + await fs.writeFile(mediaPath, "png", "utf8"); + + await expect( + resolveGeneratedImagePath({ + env: { + mock: null, + gateway: { tempRoot }, + } as never, + promptSnippet: "unused", + startedAtMs: Date.now(), + timeoutMs: 2_000, + }), + ).resolves.toBe(mediaPath); + }); + + it("applies mock image generation config with transport-required plugins", async () => { + await ensureImageGenerationConfigured({ + providerMode: "mock-openai", + mock: { baseUrl: "http://127.0.0.1:9999" }, + transport: { requiredPluginIds: ["qa-channel", "browser"] }, + } as never); + + expect(patchConfigMock).toHaveBeenCalledWith( + expect.objectContaining({ + patch: expect.objectContaining({ + plugins: expect.objectContaining({ + allow: expect.arrayContaining(["memory-core", "openai", "qa-channel", "browser"]), + }), + }), + }), + ); + expect(waitForGatewayHealthyMock).toHaveBeenCalled(); + expect(waitForTransportReadyMock).toHaveBeenCalledWith(expect.anything(), 60_000); + }); +}); diff --git a/extensions/qa-lab/src/suite-runtime-agent-media.ts b/extensions/qa-lab/src/suite-runtime-agent-media.ts new file mode 100644 index 00000000000..5f5fbe1124b --- /dev/null +++ b/extensions/qa-lab/src/suite-runtime-agent-media.ts @@ -0,0 +1,135 @@ +import fs from "node:fs/promises"; +import path from "node:path"; +import { + fetchJson, + patchConfig, + waitForGatewayHealthy, + waitForTransportReady, +} from "./suite-runtime-gateway.js"; +import type { QaSuiteRuntimeEnv } from "./suite-runtime-types.js"; + +function extractMediaPathFromText(text: string | undefined): string | undefined { + return /MEDIA:([^\n]+)/.exec(text ?? "")?.[1]?.trim(); +} + +async function resolveGeneratedImagePath(params: { + env: Pick; + promptSnippet: string; + startedAtMs: number; + timeoutMs: number; +}) { + const startedAt = Date.now(); + while (Date.now() - startedAt < params.timeoutMs) { + if (params.env.mock) { + const requests = await fetchJson>( + `${params.env.mock.baseUrl}/debug/requests`, + ); + for (let index = requests.length - 1; index >= 0; index -= 1) { + const request = requests[index]; + if (!(request.allInputText ?? "").includes(params.promptSnippet)) { + continue; + } + const mediaPath = extractMediaPathFromText(request.toolOutput); + if (mediaPath) { + return mediaPath; + } + } + } + + const mediaDir = path.join( + params.env.gateway.tempRoot, + "state", + "media", + "tool-image-generation", + ); + const entries = await fs.readdir(mediaDir).catch(() => []); + const candidates = await Promise.all( + entries.map(async (entry) => { + const fullPath = path.join(mediaDir, entry); + const stat = await fs.stat(fullPath).catch(() => null); + if (!stat?.isFile()) { + return null; + } + return { + fullPath, + mtimeMs: stat.mtimeMs, + }; + }), + ); + const match = candidates + .filter((entry): entry is NonNullable => Boolean(entry)) + .filter((entry) => entry.mtimeMs >= params.startedAtMs - 1_000) + .toSorted((left, right) => right.mtimeMs - left.mtimeMs) + .at(0)?.fullPath; + if (match) { + return match; + } + await new Promise((resolve) => setTimeout(resolve, 250)); + } + throw new Error(`timed out after ${params.timeoutMs}ms`); +} + +async function ensureImageGenerationConfigured(env: QaSuiteRuntimeEnv) { + const imageModelRef = "openai/gpt-image-1"; + await patchConfig({ + env, + patch: + env.providerMode === "mock-openai" + ? { + plugins: { + allow: [...new Set(["memory-core", "openai", ...env.transport.requiredPluginIds])], + entries: { + openai: { + enabled: true, + }, + }, + }, + models: { + providers: { + openai: { + baseUrl: `${env.mock?.baseUrl}/v1`, + apiKey: "test", + api: "openai-responses", + models: [ + { + id: "gpt-image-1", + name: "gpt-image-1", + api: "openai-responses", + reasoning: false, + input: ["text"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128_000, + maxTokens: 4096, + }, + ], + }, + }, + }, + agents: { + defaults: { + imageGenerationModel: { + primary: imageModelRef, + }, + }, + }, + } + : { + agents: { + defaults: { + imageGenerationModel: { + primary: imageModelRef, + }, + }, + }, + }, + }); + await waitForGatewayHealthy(env); + await waitForTransportReady(env, 60_000); +} + +export { ensureImageGenerationConfigured, extractMediaPathFromText, resolveGeneratedImagePath }; diff --git a/extensions/qa-lab/src/suite-runtime-agent-process.test.ts b/extensions/qa-lab/src/suite-runtime-agent-process.test.ts new file mode 100644 index 00000000000..2eec215ab13 --- /dev/null +++ b/extensions/qa-lab/src/suite-runtime-agent-process.test.ts @@ -0,0 +1,227 @@ +import { EventEmitter } from "node:events"; +import { beforeEach, describe, expect, it, vi } from "vitest"; + +const spawnMock = vi.hoisted(() => vi.fn()); +const resolveQaNodeExecPathMock = vi.hoisted(() => vi.fn(async () => "/usr/bin/node")); +const waitForGatewayHealthyMock = vi.hoisted(() => vi.fn(async () => undefined)); +const waitForTransportReadyMock = vi.hoisted(() => vi.fn(async () => undefined)); + +vi.mock("node:child_process", () => ({ + spawn: spawnMock, +})); + +vi.mock("./node-exec.js", () => ({ + resolveQaNodeExecPath: resolveQaNodeExecPathMock, +})); + +vi.mock("./suite-runtime-gateway.js", () => ({ + waitForGatewayHealthy: waitForGatewayHealthyMock, + waitForTransportReady: waitForTransportReadyMock, +})); + +import { + listCronJobs, + readDoctorMemoryStatus, + runAgentPrompt, + runQaCli, + startAgentRun, + waitForAgentRun, + waitForMemorySearchMatch, +} from "./suite-runtime-agent-process.js"; + +function createSpawnedProcess() { + const child = new EventEmitter() as EventEmitter & { + stdout: EventEmitter; + stderr: EventEmitter; + kill: ReturnType; + once: (event: string, listener: (...args: unknown[]) => void) => unknown; + on: (event: string, listener: (...args: unknown[]) => void) => unknown; + }; + child.stdout = new EventEmitter(); + child.stderr = new EventEmitter(); + child.kill = vi.fn(); + return child; +} + +async function waitForSpawnCount(count: number) { + while (spawnMock.mock.calls.length < count) { + await new Promise((resolve) => setTimeout(resolve, 0)); + } + await new Promise((resolve) => setTimeout(resolve, 0)); +} + +describe("qa suite runtime agent process helpers", () => { + beforeEach(() => { + spawnMock.mockReset(); + resolveQaNodeExecPathMock.mockClear(); + waitForGatewayHealthyMock.mockClear(); + waitForTransportReadyMock.mockClear(); + }); + + it("runs the qa cli through the resolved node executable", async () => { + const child = createSpawnedProcess(); + spawnMock.mockReturnValue(child); + + const pending = runQaCli( + { + repoRoot: "/repo", + gateway: { + tempRoot: "/tmp/runtime", + runtimeEnv: { PATH: "/usr/bin" }, + }, + primaryModel: "openai/gpt-5.4", + alternateModel: "openai/gpt-5.4-mini", + providerMode: "mock-openai", + } as never, + ["qa", "suite"], + ); + + await waitForSpawnCount(1); + child.stdout.emit("data", Buffer.from("ok\n")); + child.emit("exit", 0); + + await expect(pending).resolves.toBe("ok"); + expect(spawnMock).toHaveBeenCalledWith( + "/usr/bin/node", + ["/repo/dist/index.js", "qa", "suite"], + expect.objectContaining({ + cwd: "/tmp/runtime", + env: { PATH: "/usr/bin" }, + }), + ); + }); + + it("parses json qa cli output when requested", async () => { + const child = createSpawnedProcess(); + spawnMock.mockReturnValue(child); + + const pending = runQaCli( + { + repoRoot: "/repo", + gateway: { + tempRoot: "/tmp/runtime", + runtimeEnv: {}, + }, + primaryModel: "openai/gpt-5.4", + alternateModel: "openai/gpt-5.4-mini", + providerMode: "mock-openai", + } as never, + ["memory", "search"], + { json: true }, + ); + + await waitForSpawnCount(1); + child.stdout.emit("data", Buffer.from('{"ok":true}\n')); + child.emit("exit", 0); + + await expect(pending).resolves.toEqual({ ok: true }); + }); + + it("starts an agent run with transport-derived delivery metadata", async () => { + const gatewayCall = vi.fn(async () => ({ runId: "run-1" })); + const env = { + gateway: { call: gatewayCall }, + transport: { + buildAgentDelivery: vi.fn(() => ({ + channel: "qa-channel", + replyChannel: "reply-channel", + replyTo: "reply-target", + })), + }, + } as never; + + await expect( + startAgentRun(env, { + sessionKey: "session-1", + message: "hello", + }), + ).resolves.toEqual({ runId: "run-1" }); + expect(gatewayCall).toHaveBeenCalledWith( + "agent", + expect.objectContaining({ + sessionKey: "session-1", + message: "hello", + channel: "qa-channel", + replyChannel: "reply-channel", + replyTo: "reply-target", + }), + expect.any(Object), + ); + }); + + it("waits for an agent run and fails when the run does not finish ok", async () => { + const gatewayCall = vi + .fn() + .mockResolvedValueOnce({ runId: "run-2" }) + .mockResolvedValueOnce({ status: "error", error: "boom" }); + const env = { + gateway: { call: gatewayCall }, + transport: { + buildAgentDelivery: vi.fn(() => ({ + channel: "qa-channel", + replyChannel: "reply-channel", + replyTo: "reply-target", + })), + }, + } as never; + + await expect( + runAgentPrompt(env, { + sessionKey: "session-2", + message: "hello", + }), + ).rejects.toThrow("agent.wait returned error: boom"); + }); + + it("waits for a specific agent run id", async () => { + const gatewayCall = vi.fn(async () => ({ status: "ok" })); + + await expect( + waitForAgentRun({ gateway: { call: gatewayCall } } as never, "run-3"), + ).resolves.toEqual({ status: "ok" }); + expect(gatewayCall).toHaveBeenCalledWith( + "agent.wait", + { runId: "run-3", timeoutMs: 30_000 }, + { timeoutMs: 35_000 }, + ); + }); + + it("lists cron jobs and doctor memory status through the gateway", async () => { + const gatewayCall = vi + .fn() + .mockResolvedValueOnce({ + jobs: [{ id: "job-1", name: "dreaming" }], + }) + .mockResolvedValueOnce({ + dreaming: { enabled: true, shortTermCount: 3 }, + }); + const env = { gateway: { call: gatewayCall } } as never; + + await expect(listCronJobs(env)).resolves.toEqual([{ id: "job-1", name: "dreaming" }]); + await expect(readDoctorMemoryStatus(env)).resolves.toEqual({ + dreaming: { enabled: true, shortTermCount: 3 }, + }); + }); + + it("polls memory search results until the expected needle appears", async () => { + const search = vi + .fn() + .mockResolvedValueOnce({ + results: [{ path: "memory/2020-01-01.md", text: "ORBIT-9" }], + }) + .mockResolvedValueOnce({ + results: [{ path: "memory/2020-01-01.md", text: "ORBIT-10" }], + }); + + await expect( + waitForMemorySearchMatch({ + search, + expectedNeedle: "ORBIT-10", + timeoutMs: 2_000, + }), + ).resolves.toEqual({ + results: [{ path: "memory/2020-01-01.md", text: "ORBIT-10" }], + }); + expect(search).toHaveBeenCalledTimes(2); + }); +}); diff --git a/extensions/qa-lab/src/suite-runtime-agent-process.ts b/extensions/qa-lab/src/suite-runtime-agent-process.ts new file mode 100644 index 00000000000..4f513cc24f1 --- /dev/null +++ b/extensions/qa-lab/src/suite-runtime-agent-process.ts @@ -0,0 +1,235 @@ +import { spawn } from "node:child_process"; +import { randomUUID } from "node:crypto"; +import path from "node:path"; +import { resolveQaNodeExecPath } from "./node-exec.js"; +import { liveTurnTimeoutMs } from "./suite-runtime-agent-common.js"; +import { waitForGatewayHealthy, waitForTransportReady } from "./suite-runtime-gateway.js"; +import type { QaDreamingStatus, QaSuiteRuntimeEnv } from "./suite-runtime-types.js"; + +type QaMemorySearchResult = { + results?: Array<{ snippet?: string; text?: string; path?: string }>; +}; + +async function runQaCli( + env: Pick< + QaSuiteRuntimeEnv, + "gateway" | "repoRoot" | "primaryModel" | "alternateModel" | "providerMode" + >, + args: string[], + opts?: { timeoutMs?: number; json?: boolean }, +) { + const stdout: Buffer[] = []; + const stderr: Buffer[] = []; + const distEntryPath = path.join(env.repoRoot, "dist", "index.js"); + const nodeExecPath = await resolveQaNodeExecPath(); + await new Promise((resolve, reject) => { + const child = spawn(nodeExecPath, [distEntryPath, ...args], { + cwd: env.gateway.tempRoot, + env: env.gateway.runtimeEnv, + stdio: ["ignore", "pipe", "pipe"], + }); + const timeout = setTimeout(() => { + child.kill("SIGKILL"); + reject(new Error(`qa cli timed out: openclaw ${args.join(" ")}`)); + }, opts?.timeoutMs ?? 60_000); + child.stdout.on("data", (chunk) => stdout.push(Buffer.from(chunk))); + child.stderr.on("data", (chunk) => stderr.push(Buffer.from(chunk))); + child.once("error", (error) => { + clearTimeout(timeout); + reject(error); + }); + child.once("exit", (code) => { + clearTimeout(timeout); + if (code === 0) { + resolve(); + return; + } + reject( + new Error( + `qa cli failed (${code ?? "unknown"}): ${Buffer.concat(stderr).toString("utf8").trim()}`, + ), + ); + }); + }); + const text = Buffer.concat(stdout).toString("utf8").trim(); + if (!opts?.json) { + return text; + } + return text ? (JSON.parse(text) as unknown) : {}; +} + +async function startAgentRun( + env: Pick, + params: { + sessionKey: string; + message: string; + to?: string; + threadId?: string; + provider?: string; + model?: string; + timeoutMs?: number; + attachments?: Array<{ + mimeType: string; + fileName: string; + content: string; + }>; + }, +) { + const target = params.to ?? "dm:qa-operator"; + const delivery = env.transport.buildAgentDelivery({ target }); + const started = (await env.gateway.call( + "agent", + { + idempotencyKey: randomUUID(), + agentId: "qa", + sessionKey: params.sessionKey, + message: params.message, + deliver: true, + channel: delivery.channel, + to: target, + replyChannel: delivery.replyChannel, + replyTo: delivery.replyTo, + ...(params.threadId ? { threadId: params.threadId } : {}), + ...(params.provider ? { provider: params.provider } : {}), + ...(params.model ? { model: params.model } : {}), + ...(params.attachments ? { attachments: params.attachments } : {}), + }, + { + timeoutMs: params.timeoutMs ?? 30_000, + }, + )) as { runId?: string; status?: string }; + if (!started.runId) { + throw new Error(`agent call did not return a runId: ${JSON.stringify(started)}`); + } + return started; +} + +async function waitForAgentRun( + env: Pick, + runId: string, + timeoutMs = 30_000, +) { + return (await env.gateway.call( + "agent.wait", + { + runId, + timeoutMs, + }, + { + timeoutMs: timeoutMs + 5_000, + }, + )) as { status?: string; error?: string }; +} + +async function listCronJobs(env: Pick) { + const payload = (await env.gateway.call( + "cron.list", + { + includeDisabled: true, + limit: 200, + sortBy: "name", + sortDir: "asc", + }, + { timeoutMs: 30_000 }, + )) as { + jobs?: Array<{ + id?: string; + name?: string; + payload?: { kind?: string; text?: string }; + state?: { nextRunAtMs?: number }; + }>; + }; + return payload.jobs ?? []; +} + +async function readDoctorMemoryStatus(env: Pick) { + return (await env.gateway.call("doctor.memory.status", {}, { timeoutMs: 30_000 })) as { + dreaming?: QaDreamingStatus; + }; +} + +async function waitForMemorySearchMatch(params: { + search: () => Promise; + expectedNeedle: string; + timeoutMs: number; +}) { + const startedAt = Date.now(); + while (Date.now() - startedAt < params.timeoutMs) { + const result = await params.search(); + const haystack = JSON.stringify(result.results ?? []); + if (haystack.includes(params.expectedNeedle)) { + return result; + } + await new Promise((resolve) => setTimeout(resolve, 500)); + } + throw new Error(`memory index missing expected fact after reindex: ${params.expectedNeedle}`); +} + +async function forceMemoryIndex(params: { + env: Pick< + QaSuiteRuntimeEnv, + "gateway" | "transport" | "primaryModel" | "alternateModel" | "providerMode" | "repoRoot" + >; + query: string; + expectedNeedle: string; +}) { + await waitForGatewayHealthy(params.env, 60_000); + await waitForTransportReady(params.env, 60_000); + await runQaCli(params.env, ["memory", "index", "--agent", "qa", "--force"], { + timeoutMs: liveTurnTimeoutMs(params.env, 60_000), + }); + return await waitForMemorySearchMatch({ + expectedNeedle: params.expectedNeedle, + timeoutMs: liveTurnTimeoutMs(params.env, 20_000), + search: async () => + (await runQaCli( + params.env, + ["memory", "search", "--agent", "qa", "--json", "--query", params.query], + { + timeoutMs: liveTurnTimeoutMs(params.env, 60_000), + json: true, + }, + )) as QaMemorySearchResult, + }); +} + +async function runAgentPrompt( + env: Pick, + params: { + sessionKey: string; + message: string; + to?: string; + threadId?: string; + provider?: string; + model?: string; + timeoutMs?: number; + attachments?: Array<{ + mimeType: string; + fileName: string; + content: string; + }>; + }, +) { + const started = await startAgentRun(env, params); + const waited = await waitForAgentRun(env, started.runId!, params.timeoutMs ?? 30_000); + if (waited.status !== "ok") { + throw new Error( + `agent.wait returned ${waited.status ?? "unknown"}: ${waited.error ?? "no error"}`, + ); + } + return { + started, + waited, + }; +} + +export { + forceMemoryIndex, + listCronJobs, + readDoctorMemoryStatus, + runAgentPrompt, + runQaCli, + startAgentRun, + waitForMemorySearchMatch, + waitForAgentRun, +}; diff --git a/extensions/qa-lab/src/suite-runtime-agent-session.test.ts b/extensions/qa-lab/src/suite-runtime-agent-session.test.ts new file mode 100644 index 00000000000..b63cf8718bd --- /dev/null +++ b/extensions/qa-lab/src/suite-runtime-agent-session.test.ts @@ -0,0 +1,100 @@ +import fs from "node:fs/promises"; +import os from "node:os"; +import path from "node:path"; +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { + createSession, + readEffectiveTools, + readRawQaSessionStore, + readSkillStatus, +} from "./suite-runtime-agent-session.js"; + +const tempDirs: string[] = []; + +async function makeTempDir(prefix: string) { + const dir = await fs.mkdtemp(path.join(os.tmpdir(), prefix)); + tempDirs.push(dir); + return dir; +} + +afterEach(async () => { + await Promise.all(tempDirs.splice(0).map((dir) => fs.rm(dir, { recursive: true, force: true }))); +}); + +describe("qa suite runtime agent session helpers", () => { + const gatewayCall = vi.fn(); + const env = { + gateway: { call: gatewayCall }, + primaryModel: "openai/gpt-5.4", + alternateModel: "openai/gpt-5.4-mini", + providerMode: "mock-openai", + } as never; + + beforeEach(() => { + gatewayCall.mockReset(); + }); + + it("creates sessions and trims the returned key", async () => { + gatewayCall.mockResolvedValueOnce({ key: " session-1 " }); + + await expect(createSession(env, "Test Session")).resolves.toBe("session-1"); + expect(gatewayCall).toHaveBeenCalledWith( + "sessions.create", + { label: "Test Session" }, + expect.objectContaining({ timeoutMs: expect.any(Number) }), + ); + }); + + it("reads effective tool ids once and drops blanks", async () => { + gatewayCall.mockResolvedValueOnce({ + groups: [ + { tools: [{ id: "alpha" }, { id: " beta " }] }, + { tools: [{ id: "alpha" }, { id: "" }, {}] }, + ], + }); + + await expect(readEffectiveTools(env, "session-1")).resolves.toEqual(new Set(["alpha", "beta"])); + }); + + it("reads skill status for the default qa agent", async () => { + gatewayCall.mockResolvedValueOnce({ + skills: [{ name: "alpha", eligible: true }], + }); + + await expect(readSkillStatus(env)).resolves.toEqual([{ name: "alpha", eligible: true }]); + expect(gatewayCall).toHaveBeenCalledWith( + "skills.status", + { agentId: "qa" }, + expect.objectContaining({ timeoutMs: expect.any(Number) }), + ); + }); + + it("reads the raw qa session store from disk", async () => { + const tempRoot = await makeTempDir("qa-session-store-"); + const storeDir = path.join(tempRoot, "state", "agents", "qa", "sessions"); + await fs.mkdir(storeDir, { recursive: true }); + await fs.writeFile( + path.join(storeDir, "sessions.json"), + JSON.stringify({ "session-1": { sessionId: "session-1", status: "ready" } }), + "utf8", + ); + + await expect( + readRawQaSessionStore({ + gateway: { tempRoot }, + } as never), + ).resolves.toEqual({ + "session-1": { sessionId: "session-1", status: "ready" }, + }); + }); + + it("returns an empty session store when the file does not exist", async () => { + const tempRoot = await makeTempDir("qa-session-store-missing-"); + + await expect( + readRawQaSessionStore({ + gateway: { tempRoot }, + } as never), + ).resolves.toEqual({}); + }); +}); diff --git a/extensions/qa-lab/src/suite-runtime-agent-session.ts b/extensions/qa-lab/src/suite-runtime-agent-session.ts new file mode 100644 index 00000000000..7346d93c53a --- /dev/null +++ b/extensions/qa-lab/src/suite-runtime-agent-session.ts @@ -0,0 +1,96 @@ +import fs from "node:fs/promises"; +import path from "node:path"; +import { liveTurnTimeoutMs } from "./suite-runtime-agent-common.js"; +import type { + QaRawSessionStoreEntry, + QaSkillStatusEntry, + QaSuiteRuntimeEnv, +} from "./suite-runtime-types.js"; + +async function createSession( + env: Pick, + label: string, + key?: string, +) { + const created = (await env.gateway.call( + "sessions.create", + { + label, + ...(key ? { key } : {}), + }, + { + timeoutMs: liveTurnTimeoutMs(env, 60_000), + }, + )) as { key?: string }; + const sessionKey = created.key?.trim(); + if (!sessionKey) { + throw new Error("sessions.create returned no key"); + } + return sessionKey; +} + +async function readEffectiveTools( + env: Pick, + sessionKey: string, +) { + const payload = (await env.gateway.call( + "tools.effective", + { + sessionKey, + }, + { + timeoutMs: liveTurnTimeoutMs(env, 90_000), + }, + )) as { + groups?: Array<{ tools?: Array<{ id?: string }> }>; + }; + const ids = new Set(); + for (const group of payload.groups ?? []) { + for (const tool of group.tools ?? []) { + if (tool.id?.trim()) { + ids.add(tool.id.trim()); + } + } + } + return ids; +} + +async function readSkillStatus( + env: Pick, + agentId = "qa", +) { + const payload = (await env.gateway.call( + "skills.status", + { + agentId, + }, + { + timeoutMs: liveTurnTimeoutMs(env, 45_000), + }, + )) as { + skills?: QaSkillStatusEntry[]; + }; + return payload.skills ?? []; +} + +async function readRawQaSessionStore(env: Pick) { + const storePath = path.join( + env.gateway.tempRoot, + "state", + "agents", + "qa", + "sessions", + "sessions.json", + ); + try { + const raw = await fs.readFile(storePath, "utf8"); + return JSON.parse(raw) as Record; + } catch (error) { + if ((error as NodeJS.ErrnoException).code === "ENOENT") { + return {}; + } + throw error; + } +} + +export { createSession, readEffectiveTools, readRawQaSessionStore, readSkillStatus }; diff --git a/extensions/qa-lab/src/suite-runtime-agent-tools.test.ts b/extensions/qa-lab/src/suite-runtime-agent-tools.test.ts new file mode 100644 index 00000000000..db0e8799771 --- /dev/null +++ b/extensions/qa-lab/src/suite-runtime-agent-tools.test.ts @@ -0,0 +1,151 @@ +import fs from "node:fs/promises"; +import os from "node:os"; +import path from "node:path"; +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; + +const connectMock = vi.hoisted(() => vi.fn(async () => undefined)); +const listToolsMock = vi.hoisted(() => vi.fn(async () => ({ tools: [] }))); +const callToolMock = vi.hoisted(() => vi.fn(async () => ({ content: [] }))); +const closeMock = vi.hoisted(() => vi.fn(async () => undefined)); +const resolveQaNodeExecPathMock = vi.hoisted(() => vi.fn(async () => "/usr/bin/node")); +const stdioTransportMock = vi.hoisted(() => + vi.fn().mockImplementation(function StdioClientTransport( + this: { params?: unknown }, + params: unknown, + ) { + this.params = params; + }), +); + +vi.mock("@modelcontextprotocol/sdk/client/index.js", () => ({ + Client: vi + .fn() + .mockImplementation( + function Client(this: { + connect?: typeof connectMock; + listTools?: typeof listToolsMock; + callTool?: typeof callToolMock; + close?: typeof closeMock; + }) { + this.connect = connectMock; + this.listTools = listToolsMock; + this.callTool = callToolMock; + this.close = closeMock; + }, + ), +})); + +vi.mock("@modelcontextprotocol/sdk/client/stdio.js", () => ({ + StdioClientTransport: stdioTransportMock, +})); + +vi.mock("./node-exec.js", () => ({ + resolveQaNodeExecPath: resolveQaNodeExecPathMock, +})); + +import { + callPluginToolsMcp, + findSkill, + handleQaAction, + writeWorkspaceSkill, +} from "./suite-runtime-agent-tools.js"; + +const tempDirs: string[] = []; + +async function makeTempDir(prefix: string) { + const dir = await fs.mkdtemp(path.join(os.tmpdir(), prefix)); + tempDirs.push(dir); + return dir; +} + +afterEach(async () => { + await Promise.all(tempDirs.splice(0).map((dir) => fs.rm(dir, { recursive: true, force: true }))); +}); + +describe("qa suite runtime agent tools helpers", () => { + beforeEach(() => { + connectMock.mockClear(); + listToolsMock.mockReset(); + callToolMock.mockReset(); + closeMock.mockClear(); + resolveQaNodeExecPathMock.mockClear(); + stdioTransportMock.mockClear(); + }); + + it("finds a skill by exact name", () => { + expect(findSkill([{ name: "alpha" }, { name: "beta" }], "beta")).toEqual({ name: "beta" }); + expect(findSkill([{ name: "alpha" }], "beta")).toBeUndefined(); + }); + + it("writes a workspace skill under the gateway workspace", async () => { + const workspaceDir = await makeTempDir("qa-workspace-"); + + const skillPath = await writeWorkspaceSkill({ + env: { gateway: { workspaceDir } } as never, + name: "my-skill", + body: "hello world", + }); + + await expect(fs.readFile(skillPath, "utf8")).resolves.toBe("hello world\n"); + expect(skillPath).toBe(path.join(workspaceDir, "skills", "my-skill", "SKILL.md")); + }); + + it("routes generic transport actions through the payload extractor", async () => { + const handleAction = vi.fn(async () => ({ + content: [{ type: "text", text: "done" }], + })); + + await expect( + handleQaAction({ + env: { + cfg: {} as never, + transport: { handleAction }, + } as never, + action: "react", + args: { messageId: "1", emoji: ":+1:" }, + }), + ).resolves.toEqual("done"); + }); + + it("calls plugin-tools MCP through the resolved node executable", async () => { + listToolsMock.mockResolvedValueOnce({ + tools: [{ name: "plugin.echo" }] as never[], + }); + callToolMock.mockResolvedValueOnce({ + content: [{ type: "text", text: "echoed" }] as never[], + }); + + await expect( + callPluginToolsMcp({ + env: { + gateway: { + runtimeEnv: { + PATH: "/usr/bin", + OPENCLAW_KEY: "1", + EMPTY: undefined, + }, + }, + } as never, + toolName: "plugin.echo", + args: { text: "hello" }, + }), + ).resolves.toEqual({ + content: [{ type: "text", text: "echoed" }], + }); + + expect(stdioTransportMock).toHaveBeenCalledWith({ + command: "/usr/bin/node", + args: ["--import", "tsx", "src/mcp/plugin-tools-serve.ts"], + stderr: "pipe", + env: { + PATH: "/usr/bin", + OPENCLAW_KEY: "1", + }, + }); + expect(callToolMock).toHaveBeenCalledWith({ + name: "plugin.echo", + arguments: { text: "hello" }, + }); + expect(closeMock).toHaveBeenCalled(); + }); +}); diff --git a/extensions/qa-lab/src/suite-runtime-agent-tools.ts b/extensions/qa-lab/src/suite-runtime-agent-tools.ts new file mode 100644 index 00000000000..c57fdf1a193 --- /dev/null +++ b/extensions/qa-lab/src/suite-runtime-agent-tools.ts @@ -0,0 +1,77 @@ +import fs from "node:fs/promises"; +import path from "node:path"; +import { Client } from "@modelcontextprotocol/sdk/client/index.js"; +import { StdioClientTransport } from "@modelcontextprotocol/sdk/client/stdio.js"; +import { extractQaToolPayload } from "./extract-tool-payload.js"; +import { resolveQaNodeExecPath } from "./node-exec.js"; +import type { + QaRuntimeActionHandlerEnv, + QaSkillStatusEntry, + QaSuiteRuntimeEnv, + QaTransportActionName, +} from "./suite-runtime-types.js"; + +function findSkill(skills: QaSkillStatusEntry[], name: string) { + return skills.find((skill) => skill.name === name); +} + +async function writeWorkspaceSkill(params: { + env: Pick; + name: string; + body: string; +}) { + const skillDir = path.join(params.env.gateway.workspaceDir, "skills", params.name); + await fs.mkdir(skillDir, { recursive: true }); + const skillPath = path.join(skillDir, "SKILL.md"); + await fs.writeFile(skillPath, `${params.body.trim()}\n`, "utf8"); + return skillPath; +} + +async function callPluginToolsMcp(params: { + env: Pick; + toolName: string; + args: Record; +}) { + const transportEnv = Object.fromEntries( + Object.entries(params.env.gateway.runtimeEnv).filter( + (entry): entry is [string, string] => typeof entry[1] === "string", + ), + ); + const nodeExecPath = await resolveQaNodeExecPath(); + const transport = new StdioClientTransport({ + command: nodeExecPath, + args: ["--import", "tsx", "src/mcp/plugin-tools-serve.ts"], + stderr: "pipe", + env: transportEnv, + }); + const client = new Client({ name: "openclaw-qa-suite", version: "0.0.0" }, {}); + try { + await client.connect(transport); + const listed = await client.listTools(); + const tool = listed.tools.find((entry) => entry.name === params.toolName); + if (!tool) { + throw new Error(`MCP tool missing: ${params.toolName}`); + } + return await client.callTool({ + name: params.toolName, + arguments: params.args, + }); + } finally { + await client.close().catch(() => {}); + } +} + +async function handleQaAction(params: { + env: QaRuntimeActionHandlerEnv; + action: QaTransportActionName; + args: Record; +}) { + const result = await params.env.transport.handleAction({ + action: params.action, + args: params.args, + cfg: params.env.cfg, + }); + return extractQaToolPayload(result as Parameters[0]); +} + +export { callPluginToolsMcp, findSkill, handleQaAction, writeWorkspaceSkill }; diff --git a/extensions/qa-lab/src/suite-runtime-agent.ts b/extensions/qa-lab/src/suite-runtime-agent.ts new file mode 100644 index 00000000000..ea6ed9f050c --- /dev/null +++ b/extensions/qa-lab/src/suite-runtime-agent.ts @@ -0,0 +1,26 @@ +export { + createSession, + readEffectiveTools, + readRawQaSessionStore, + readSkillStatus, +} from "./suite-runtime-agent-session.js"; +export { + forceMemoryIndex, + listCronJobs, + readDoctorMemoryStatus, + runAgentPrompt, + runQaCli, + startAgentRun, + waitForAgentRun, +} from "./suite-runtime-agent-process.js"; +export { + ensureImageGenerationConfigured, + extractMediaPathFromText, + resolveGeneratedImagePath, +} from "./suite-runtime-agent-media.js"; +export { + callPluginToolsMcp, + findSkill, + handleQaAction, + writeWorkspaceSkill, +} from "./suite-runtime-agent-tools.js"; diff --git a/extensions/qa-lab/src/suite-runtime-flow.test.ts b/extensions/qa-lab/src/suite-runtime-flow.test.ts new file mode 100644 index 00000000000..f71e2d47ba5 --- /dev/null +++ b/extensions/qa-lab/src/suite-runtime-flow.test.ts @@ -0,0 +1,264 @@ +import { describe, expect, it, vi } from "vitest"; + +const createQaScenarioRuntimeApi = vi.hoisted(() => vi.fn()); +const waitForOutboundMessage = vi.hoisted(() => vi.fn()); +const waitForTransportOutboundMessage = vi.hoisted(() => vi.fn()); +const waitForChannelOutboundMessage = vi.hoisted(() => vi.fn()); +const waitForNoOutbound = vi.hoisted(() => vi.fn()); +const waitForNoTransportOutbound = vi.hoisted(() => vi.fn()); +const recentOutboundSummary = vi.hoisted(() => vi.fn()); +const formatConversationTranscript = vi.hoisted(() => vi.fn()); +const readTransportTranscript = vi.hoisted(() => vi.fn()); +const formatTransportTranscript = vi.hoisted(() => vi.fn()); +const fetchJson = vi.hoisted(() => vi.fn()); +const waitForGatewayHealthy = vi.hoisted(() => vi.fn()); +const waitForTransportReady = vi.hoisted(() => vi.fn()); +const waitForQaChannelReady = vi.hoisted(() => vi.fn()); +const patchConfig = vi.hoisted(() => vi.fn()); +const applyConfig = vi.hoisted(() => vi.fn()); +const readConfigSnapshot = vi.hoisted(() => vi.fn()); +const waitForConfigRestartSettle = vi.hoisted(() => vi.fn()); +const createSession = vi.hoisted(() => vi.fn()); +const readEffectiveTools = vi.hoisted(() => vi.fn()); +const readSkillStatus = vi.hoisted(() => vi.fn()); +const readRawQaSessionStore = vi.hoisted(() => vi.fn()); +const runQaCli = vi.hoisted(() => vi.fn()); +const extractMediaPathFromText = vi.hoisted(() => vi.fn()); +const resolveGeneratedImagePath = vi.hoisted(() => vi.fn()); +const startAgentRun = vi.hoisted(() => vi.fn()); +const waitForAgentRun = vi.hoisted(() => vi.fn()); +const listCronJobs = vi.hoisted(() => vi.fn()); +const waitForCronRunCompletion = vi.hoisted(() => vi.fn()); +const readDoctorMemoryStatus = vi.hoisted(() => vi.fn()); +const forceMemoryIndex = vi.hoisted(() => vi.fn()); +const findSkill = vi.hoisted(() => vi.fn()); +const writeWorkspaceSkill = vi.hoisted(() => vi.fn()); +const callPluginToolsMcp = vi.hoisted(() => vi.fn()); +const runAgentPrompt = vi.hoisted(() => vi.fn()); +const ensureImageGenerationConfigured = vi.hoisted(() => vi.fn()); +const handleQaAction = vi.hoisted(() => vi.fn()); +const extractQaToolPayload = vi.hoisted(() => vi.fn()); +const browserRequest = vi.hoisted(() => vi.fn()); +const waitForBrowserReady = vi.hoisted(() => vi.fn()); +const browserOpenTab = vi.hoisted(() => vi.fn()); +const browserSnapshot = vi.hoisted(() => vi.fn()); +const browserAct = vi.hoisted(() => vi.fn()); +const webOpenPage = vi.hoisted(() => vi.fn(async () => ({ pageId: "page-1" }))); +const webWait = vi.hoisted(() => vi.fn()); +const webType = vi.hoisted(() => vi.fn()); +const webSnapshot = vi.hoisted(() => vi.fn()); +const webEvaluate = vi.hoisted(() => vi.fn()); +const hasDiscoveryLabels = vi.hoisted(() => vi.fn()); +const reportsDiscoveryScopeLeak = vi.hoisted(() => vi.fn()); +const reportsMissingDiscoveryFiles = vi.hoisted(() => vi.fn()); +const hasModelSwitchContinuityEvidence = vi.hoisted(() => vi.fn()); +const qaChannelPlugin = vi.hoisted(() => ({ id: "qa-channel" })); + +vi.mock("./scenario-runtime-api.js", () => ({ + createQaScenarioRuntimeApi, +})); + +vi.mock("./suite-runtime-transport.js", () => ({ + waitForOutboundMessage, + waitForTransportOutboundMessage, + waitForChannelOutboundMessage, + waitForNoOutbound, + waitForNoTransportOutbound, + recentOutboundSummary, + formatConversationTranscript, + readTransportTranscript, + formatTransportTranscript, +})); + +vi.mock("./suite-runtime-gateway.js", () => ({ + fetchJson, + waitForGatewayHealthy, + waitForTransportReady, + waitForQaChannelReady, + waitForConfigRestartSettle, + patchConfig, + applyConfig, + readConfigSnapshot, +})); + +vi.mock("./suite-runtime-agent.js", () => ({ + createSession, + readEffectiveTools, + readSkillStatus, + readRawQaSessionStore, + runQaCli, + extractMediaPathFromText, + resolveGeneratedImagePath, + startAgentRun, + waitForAgentRun, + listCronJobs, + readDoctorMemoryStatus, + forceMemoryIndex, + findSkill, + writeWorkspaceSkill, + callPluginToolsMcp, + runAgentPrompt, + ensureImageGenerationConfigured, + handleQaAction, +})); + +vi.mock("./browser-runtime.js", () => ({ + callQaBrowserRequest: browserRequest, + waitForQaBrowserReady: waitForBrowserReady, + qaBrowserOpenTab: browserOpenTab, + qaBrowserSnapshot: browserSnapshot, + qaBrowserAct: browserAct, +})); + +vi.mock("./web-runtime.js", () => ({ + qaWebOpenPage: webOpenPage, + qaWebWait: webWait, + qaWebType: webType, + qaWebSnapshot: webSnapshot, + qaWebEvaluate: webEvaluate, +})); + +vi.mock("./cron-run-wait.js", () => ({ + waitForCronRunCompletion, +})); + +vi.mock("./discovery-eval.js", () => ({ + hasDiscoveryLabels, + reportsDiscoveryScopeLeak, + reportsMissingDiscoveryFiles, +})); + +vi.mock("./extract-tool-payload.js", () => ({ + extractQaToolPayload, +})); + +vi.mock("./model-switch-eval.js", () => ({ + hasModelSwitchContinuityEvidence, +})); + +vi.mock("./runtime-api.js", () => ({ + qaChannelPlugin, +})); + +import { createQaSuiteScenarioFlowApi } from "./suite-runtime-flow.js"; +import type { QaSuiteRuntimeEnv } from "./suite-runtime-types.js"; + +describe("qa suite runtime flow", () => { + it("wires the split suite runtime deps into the scenario runtime api", async () => { + const env = { + lab: { baseUrl: "http://127.0.0.1:4444" }, + webSessionIds: new Set(), + gateway: {} as QaSuiteRuntimeEnv["gateway"], + transport: { + id: "qa-channel", + label: "QA Channel", + accountId: "qa-channel", + waitReady: vi.fn(), + createGatewayConfig: vi.fn(), + buildAgentDelivery: vi.fn(), + requiredPluginIds: [], + handleAction: vi.fn(), + createReportNotes: vi.fn(), + state: { + reset: vi.fn(), + getSnapshot: vi.fn(), + addInboundMessage: vi.fn(), + addOutboundMessage: vi.fn(), + readMessage: vi.fn(), + searchMessages: vi.fn(), + waitFor: vi.fn(), + }, + capabilities: { + waitForOutboundMessage: vi.fn(), + waitForCondition: vi.fn(), + getNormalizedMessageState: vi.fn(), + resetNormalizedMessageState: vi.fn(), + sendInboundMessage: vi.fn(), + injectOutboundMessage: vi.fn(), + readNormalizedMessage: vi.fn(), + executeGenericAction: vi.fn(), + waitForReady: vi.fn(), + assertNoFailureReplies: vi.fn(), + }, + }, + repoRoot: "/repo", + providerMode: "mock-openai", + primaryModel: "openai/gpt-5.4", + alternateModel: "openai/gpt-5.4-mini", + mock: null, + cfg: {} as QaSuiteRuntimeEnv["cfg"], + } satisfies Parameters[0]["env"]; + const scenario = { + id: "session-memory-ranking", + title: "Session memory ranking", + sourcePath: "qa/scenarios/session-memory-ranking.md", + surface: "qa-channel", + objective: "test", + successCriteria: ["test"], + execution: { + kind: "flow" as const, + config: { expected: "value" }, + flow: { steps: [] }, + }, + }; + const runScenario = vi.fn(); + const splitModelRef = vi.fn(); + const formatErrorMessage = vi.fn(); + const liveTurnTimeoutMs = vi.fn(); + const resolveQaLiveTurnTimeoutMs = vi.fn(); + createQaScenarioRuntimeApi.mockReturnValue({ api: "ok" }); + + const result = createQaSuiteScenarioFlowApi({ + env, + scenario, + runScenario, + splitModelRef, + formatErrorMessage, + liveTurnTimeoutMs, + resolveQaLiveTurnTimeoutMs, + constants: { + imageUnderstandingPngBase64: "small", + imageUnderstandingLargePngBase64: "large", + imageUnderstandingValidPngBase64: "valid", + }, + }); + + expect(result).toEqual({ api: "ok" }); + expect(createQaScenarioRuntimeApi).toHaveBeenCalledTimes(1); + const call = createQaScenarioRuntimeApi.mock.calls[0]?.[0] as { + env: typeof env; + scenario: typeof scenario; + deps: { + runScenario: typeof runScenario; + waitForQaChannelReady: typeof waitForQaChannelReady; + waitForOutboundMessage: typeof waitForOutboundMessage; + forceMemoryIndex: typeof forceMemoryIndex; + runAgentPrompt: typeof runAgentPrompt; + qaChannelPlugin: typeof qaChannelPlugin; + webOpenPage: (params: { url: string }) => Promise; + }; + constants: { + imageUnderstandingPngBase64: string; + imageUnderstandingLargePngBase64: string; + imageUnderstandingValidPngBase64: string; + }; + }; + expect(call.env).toBe(env); + expect(call.scenario).toBe(scenario); + expect(call.deps.runScenario).toBe(runScenario); + expect(call.deps.waitForQaChannelReady).toBe(waitForQaChannelReady); + expect(call.deps.waitForOutboundMessage).toBe(waitForOutboundMessage); + expect(call.deps.forceMemoryIndex).toBe(forceMemoryIndex); + expect(call.deps.runAgentPrompt).toBe(runAgentPrompt); + expect(call.deps.qaChannelPlugin).toBe(qaChannelPlugin); + expect(call.constants).toEqual({ + imageUnderstandingPngBase64: "small", + imageUnderstandingLargePngBase64: "large", + imageUnderstandingValidPngBase64: "valid", + }); + + await call.deps.webOpenPage({ url: "https://openclaw.ai" }); + expect(webOpenPage).toHaveBeenCalledWith({ url: "https://openclaw.ai" }); + expect(env.webSessionIds.has("page-1")).toBe(true); + }); +}); diff --git a/extensions/qa-lab/src/suite-runtime-flow.ts b/extensions/qa-lab/src/suite-runtime-flow.ts new file mode 100644 index 00000000000..c87af1617d9 --- /dev/null +++ b/extensions/qa-lab/src/suite-runtime-flow.ts @@ -0,0 +1,221 @@ +import { randomUUID } from "node:crypto"; +import fs from "node:fs/promises"; +import path from "node:path"; +import { setTimeout as sleep } from "node:timers/promises"; +import { + formatMemoryDreamingDay, + resolveSessionTranscriptsDirForAgent, +} from "openclaw/plugin-sdk/memory-core"; +import { buildAgentSessionKey } from "openclaw/plugin-sdk/routing"; +import { normalizeLowercaseStringOrEmpty } from "openclaw/plugin-sdk/text-runtime"; +import { + callQaBrowserRequest, + qaBrowserAct, + qaBrowserOpenTab, + qaBrowserSnapshot, + waitForQaBrowserReady, +} from "./browser-runtime.js"; +import { waitForCronRunCompletion } from "./cron-run-wait.js"; +import { + hasDiscoveryLabels, + reportsDiscoveryScopeLeak, + reportsMissingDiscoveryFiles, +} from "./discovery-eval.js"; +import { extractQaToolPayload } from "./extract-tool-payload.js"; +import { hasModelSwitchContinuityEvidence } from "./model-switch-eval.js"; +import { qaChannelPlugin } from "./runtime-api.js"; +import type { QaSeedScenarioWithSource } from "./scenario-catalog.js"; +import { createQaScenarioRuntimeApi, type QaScenarioRuntimeEnv } from "./scenario-runtime-api.js"; +import { + callPluginToolsMcp, + createSession, + ensureImageGenerationConfigured, + extractMediaPathFromText, + findSkill, + forceMemoryIndex, + handleQaAction, + listCronJobs, + readDoctorMemoryStatus, + readEffectiveTools, + readRawQaSessionStore, + readSkillStatus, + resolveGeneratedImagePath, + runAgentPrompt, + runQaCli, + startAgentRun, + waitForAgentRun, + writeWorkspaceSkill, +} from "./suite-runtime-agent.js"; +import { + applyConfig, + fetchJson, + patchConfig, + readConfigSnapshot, + waitForConfigRestartSettle, + waitForGatewayHealthy, + waitForQaChannelReady, + waitForTransportReady, +} from "./suite-runtime-gateway.js"; +import { + formatConversationTranscript, + formatTransportTranscript, + readTransportTranscript, + recentOutboundSummary, + waitForChannelOutboundMessage, + waitForNoOutbound, + waitForNoTransportOutbound, + waitForOutboundMessage, + waitForTransportOutboundMessage, +} from "./suite-runtime-transport.js"; +import type { QaSuiteRuntimeEnv } from "./suite-runtime-types.js"; +import { + qaWebEvaluate, + qaWebOpenPage, + qaWebSnapshot, + qaWebType, + qaWebWait, +} from "./web-runtime.js"; + +type QaSuiteScenarioFlowEnv = { + lab: unknown; + webSessionIds: Set; + transport: QaSuiteRuntimeEnv["transport"] & QaScenarioRuntimeEnv["transport"]; +} & Omit; + +type QaSuiteStep = { + name: string; + run: () => Promise; +}; + +type QaSuiteScenarioResult = { + name: string; + status: "pass" | "fail"; + steps: Array<{ + name: string; + status: "pass" | "fail" | "skip"; + details?: string; + }>; + details?: string; +}; + +function createQaSuiteScenarioDeps(params: { + env: QaSuiteScenarioFlowEnv; + runScenario: (name: string, steps: QaSuiteStep[]) => Promise; + splitModelRef: (ref: string) => { provider: string; model: string } | null; + formatErrorMessage: (error: unknown) => string; + liveTurnTimeoutMs: ( + env: Pick, + fallbackMs: number, + ) => number; + resolveQaLiveTurnTimeoutMs: ( + env: Pick, + fallbackMs: number, + ) => number; +}) { + return { + fs, + path, + sleep, + randomUUID, + runScenario: params.runScenario, + waitForOutboundMessage, + waitForTransportOutboundMessage, + waitForChannelOutboundMessage, + waitForNoOutbound, + waitForNoTransportOutbound, + recentOutboundSummary, + formatConversationTranscript, + readTransportTranscript, + formatTransportTranscript, + fetchJson, + waitForGatewayHealthy, + waitForTransportReady, + waitForQaChannelReady, + browserRequest: callQaBrowserRequest, + waitForBrowserReady: waitForQaBrowserReady, + browserOpenTab: qaBrowserOpenTab, + browserSnapshot: qaBrowserSnapshot, + browserAct: qaBrowserAct, + webOpenPage: async (webParams: Parameters[0]) => { + const opened = await qaWebOpenPage(webParams); + params.env.webSessionIds.add(opened.pageId); + return opened; + }, + webWait: qaWebWait, + webType: qaWebType, + webSnapshot: qaWebSnapshot, + webEvaluate: qaWebEvaluate, + waitForConfigRestartSettle, + patchConfig, + applyConfig, + readConfigSnapshot, + createSession, + readEffectiveTools, + readSkillStatus, + readRawQaSessionStore, + runQaCli, + extractMediaPathFromText, + resolveGeneratedImagePath, + startAgentRun, + waitForAgentRun, + listCronJobs, + waitForCronRunCompletion, + readDoctorMemoryStatus, + forceMemoryIndex, + findSkill, + writeWorkspaceSkill, + callPluginToolsMcp, + runAgentPrompt, + ensureImageGenerationConfigured, + handleQaAction, + extractQaToolPayload, + formatMemoryDreamingDay, + resolveSessionTranscriptsDirForAgent, + buildAgentSessionKey, + normalizeLowercaseStringOrEmpty, + formatErrorMessage: params.formatErrorMessage, + liveTurnTimeoutMs: params.liveTurnTimeoutMs, + resolveQaLiveTurnTimeoutMs: params.resolveQaLiveTurnTimeoutMs, + splitModelRef: params.splitModelRef, + qaChannelPlugin, + hasDiscoveryLabels, + reportsDiscoveryScopeLeak, + reportsMissingDiscoveryFiles, + hasModelSwitchContinuityEvidence, + }; +} + +export function createQaSuiteScenarioFlowApi(params: { + env: QaSuiteScenarioFlowEnv; + scenario: QaSeedScenarioWithSource; + runScenario: (name: string, steps: QaSuiteStep[]) => Promise; + splitModelRef: (ref: string) => { provider: string; model: string } | null; + formatErrorMessage: (error: unknown) => string; + liveTurnTimeoutMs: ( + env: Pick, + fallbackMs: number, + ) => number; + resolveQaLiveTurnTimeoutMs: ( + env: Pick, + fallbackMs: number, + ) => number; + constants: { + imageUnderstandingPngBase64: string; + imageUnderstandingLargePngBase64: string; + imageUnderstandingValidPngBase64: string; + }; +}) { + return createQaScenarioRuntimeApi({ + env: params.env, + scenario: params.scenario, + deps: createQaSuiteScenarioDeps({ + env: params.env, + runScenario: params.runScenario, + splitModelRef: params.splitModelRef, + formatErrorMessage: params.formatErrorMessage, + liveTurnTimeoutMs: params.liveTurnTimeoutMs, + resolveQaLiveTurnTimeoutMs: params.resolveQaLiveTurnTimeoutMs, + }), + constants: params.constants, + }); +} diff --git a/extensions/qa-lab/src/suite-runtime-gateway.test.ts b/extensions/qa-lab/src/suite-runtime-gateway.test.ts new file mode 100644 index 00000000000..b5fc820b925 --- /dev/null +++ b/extensions/qa-lab/src/suite-runtime-gateway.test.ts @@ -0,0 +1,22 @@ +import { describe, expect, it } from "vitest"; +import { getGatewayRetryAfterMs, isConfigHashConflict } from "./suite-runtime-gateway.js"; + +describe("qa suite gateway helpers", () => { + it("reads retry-after from the primary gateway error before appended logs", () => { + const error = new Error( + "rate limit exceeded for config.patch; retry after 38s\nGateway logs:\nprevious config changed since last load", + ); + + expect(getGatewayRetryAfterMs(error)).toBe(38_000); + expect(isConfigHashConflict(error)).toBe(false); + }); + + it("ignores stale retry-after text that only appears in appended gateway logs", () => { + const error = new Error( + "config changed since last load; re-run config.get and retry\nGateway logs:\nold rate limit exceeded for config.patch; retry after 38s", + ); + + expect(getGatewayRetryAfterMs(error)).toBe(null); + expect(isConfigHashConflict(error)).toBe(true); + }); +}); diff --git a/extensions/qa-lab/src/suite-runtime-gateway.ts b/extensions/qa-lab/src/suite-runtime-gateway.ts new file mode 100644 index 00000000000..e0a1241d242 --- /dev/null +++ b/extensions/qa-lab/src/suite-runtime-gateway.ts @@ -0,0 +1,247 @@ +import { setTimeout as sleep } from "node:timers/promises"; +import { formatErrorMessage } from "openclaw/plugin-sdk/error-runtime"; +import { fetchWithSsrFGuard } from "openclaw/plugin-sdk/ssrf-runtime"; +import type { QaConfigSnapshot, QaSuiteRuntimeEnv } from "./suite-runtime-types.js"; + +async function fetchJson(url: string): Promise { + const { response, release } = await fetchWithSsrFGuard({ + url, + policy: { allowPrivateNetwork: true }, + auditContext: "qa-lab-suite-fetch-json", + }); + try { + if (!response.ok) { + throw new Error(`request failed ${response.status}: ${url}`); + } + return (await response.json()) as T; + } finally { + await release(); + } +} + +async function waitForGatewayHealthy(env: Pick, timeoutMs = 45_000) { + const startedAt = Date.now(); + while (Date.now() - startedAt < timeoutMs) { + try { + const { response, release } = await fetchWithSsrFGuard({ + url: `${env.gateway.baseUrl}/readyz`, + policy: { allowPrivateNetwork: true }, + auditContext: "qa-lab-suite-wait-for-gateway-healthy", + }); + try { + if (response.ok) { + return; + } + } finally { + await release(); + } + } catch { + // retry + } + await sleep(250); + } + throw new Error(`timed out after ${timeoutMs}ms`); +} + +async function waitForTransportReady( + env: Pick, + timeoutMs = 45_000, +) { + await env.transport.waitReady({ + gateway: env.gateway, + timeoutMs, + }); +} + +async function waitForQaChannelReady( + env: Pick, + timeoutMs = 45_000, +) { + await waitForTransportReady(env, timeoutMs); +} + +async function waitForConfigRestartSettle( + env: Pick, + restartDelayMs = 1_000, + timeoutMs = 60_000, +) { + await sleep(restartDelayMs + 750); + await waitForGatewayHealthy(env, timeoutMs); +} + +function formatGatewayPrimaryErrorText(error: unknown) { + const text = formatErrorMessage(error); + const gatewayLogsIndex = text.indexOf("\nGateway logs:"); + return (gatewayLogsIndex >= 0 ? text.slice(0, gatewayLogsIndex) : text).trim(); +} + +function isGatewayRestartRace(error: unknown) { + const text = formatGatewayPrimaryErrorText(error); + return ( + text.includes("gateway closed (1012)") || + text.includes("gateway closed (1006") || + text.includes("abnormal closure") || + text.includes("service restart") + ); +} + +function isConfigHashConflict(error: unknown) { + return formatGatewayPrimaryErrorText(error).includes("config changed since last load"); +} + +function getGatewayRetryAfterMs(error: unknown) { + const text = formatGatewayPrimaryErrorText(error); + const millisecondsMatch = /retryAfterMs["=: ]+(\d+)/i.exec(text); + if (millisecondsMatch) { + const parsed = Number(millisecondsMatch[1]); + if (Number.isFinite(parsed) && parsed > 0) { + return parsed; + } + } + const secondsMatch = /retry after (\d+)s/i.exec(text); + if (secondsMatch) { + const parsed = Number(secondsMatch[1]); + if (Number.isFinite(parsed) && parsed > 0) { + return parsed * 1_000; + } + } + return null; +} + +async function readConfigSnapshot(env: Pick) { + const snapshot = (await env.gateway.call( + "config.get", + {}, + { timeoutMs: 60_000 }, + )) as QaConfigSnapshot; + if (!snapshot.hash || !snapshot.config) { + throw new Error("config.get returned no hash/config"); + } + return { + hash: snapshot.hash, + config: snapshot.config, + } satisfies { hash: string; config: Record }; +} + +async function runConfigMutation(params: { + env: Pick; + action: "config.patch" | "config.apply"; + raw: string; + sessionKey?: string; + deliveryContext?: { + channel?: string; + to?: string; + accountId?: string; + threadId?: string | number; + }; + note?: string; + restartDelayMs?: number; +}) { + const restartDelayMs = params.restartDelayMs ?? 1_000; + let lastConflict: unknown = null; + for (let attempt = 1; attempt <= 8; attempt += 1) { + const snapshot = await readConfigSnapshot(params.env); + try { + const result = await params.env.gateway.call( + params.action, + { + raw: params.raw, + baseHash: snapshot.hash, + ...(params.sessionKey ? { sessionKey: params.sessionKey } : {}), + ...(params.deliveryContext ? { deliveryContext: params.deliveryContext } : {}), + ...(params.note ? { note: params.note } : {}), + restartDelayMs, + }, + { timeoutMs: 45_000 }, + ); + await waitForConfigRestartSettle(params.env, restartDelayMs); + return result; + } catch (error) { + if (isConfigHashConflict(error)) { + lastConflict = error; + await waitForGatewayHealthy(params.env, Math.max(15_000, restartDelayMs + 10_000)).catch( + () => undefined, + ); + continue; + } + const retryAfterMs = getGatewayRetryAfterMs(error); + if (retryAfterMs && attempt < 8) { + await sleep(retryAfterMs + 500); + await waitForGatewayHealthy(params.env, Math.max(15_000, restartDelayMs + 10_000)).catch( + () => undefined, + ); + continue; + } + if (!isGatewayRestartRace(error)) { + throw error; + } + await waitForConfigRestartSettle(params.env, restartDelayMs); + return { ok: true, restarted: true }; + } + } + throw lastConflict ?? new Error(`${params.action} failed after retrying config hash conflicts`); +} + +async function patchConfig(params: { + env: Pick; + patch: Record; + sessionKey?: string; + deliveryContext?: { + channel?: string; + to?: string; + accountId?: string; + threadId?: string | number; + }; + note?: string; + restartDelayMs?: number; +}) { + return await runConfigMutation({ + env: params.env, + action: "config.patch", + raw: JSON.stringify(params.patch, null, 2), + sessionKey: params.sessionKey, + deliveryContext: params.deliveryContext, + note: params.note, + restartDelayMs: params.restartDelayMs, + }); +} + +async function applyConfig(params: { + env: Pick; + nextConfig: Record; + sessionKey?: string; + deliveryContext?: { + channel?: string; + to?: string; + accountId?: string; + threadId?: string | number; + }; + note?: string; + restartDelayMs?: number; +}) { + return await runConfigMutation({ + env: params.env, + action: "config.apply", + raw: JSON.stringify(params.nextConfig, null, 2), + sessionKey: params.sessionKey, + deliveryContext: params.deliveryContext, + note: params.note, + restartDelayMs: params.restartDelayMs, + }); +} + +export { + applyConfig, + fetchJson, + formatGatewayPrimaryErrorText, + getGatewayRetryAfterMs, + isConfigHashConflict, + isGatewayRestartRace, + patchConfig, + readConfigSnapshot, + runConfigMutation, + waitForConfigRestartSettle, + waitForGatewayHealthy, + waitForQaChannelReady, + waitForTransportReady, +}; diff --git a/extensions/qa-lab/src/suite-runtime-transport.test.ts b/extensions/qa-lab/src/suite-runtime-transport.test.ts new file mode 100644 index 00000000000..5d7bcf4250c --- /dev/null +++ b/extensions/qa-lab/src/suite-runtime-transport.test.ts @@ -0,0 +1,211 @@ +import { describe, expect, it } from "vitest"; +import { createQaBusState } from "./bus-state.js"; +import { + createScenarioWaitForCondition, + findFailureOutboundMessage, + formatTransportTranscript, + readTransportTranscript, + waitForOutboundMessage, + waitForTransportOutboundMessage, +} from "./suite-runtime-transport.js"; + +describe("qa suite transport helpers", () => { + it("detects classified failure replies before a success-only outbound predicate matches", () => { + const state = createQaBusState(); + state.addOutboundMessage({ + to: "dm:qa-operator", + text: "⚠️ Something went wrong while processing your request. Please try again, or use /new to start a fresh session.", + senderId: "openclaw", + senderName: "OpenClaw QA", + }); + + const message = findFailureOutboundMessage(state); + expect(message?.text).toContain("Something went wrong while processing your request."); + }); + + it("fails success-only waitForOutboundMessage calls when a classified failure reply arrives first", async () => { + const state = createQaBusState(); + const pending = waitForOutboundMessage( + state, + (candidate) => + candidate.conversation.id === "qa-operator" && + candidate.text.includes("Remembered ALPHA-7."), + 5_000, + ); + + state.addOutboundMessage({ + to: "dm:qa-operator", + text: '⚠️ No API key found for provider "openai". You are authenticated with OpenAI Codex OAuth. Use openai-codex/gpt-5.4 (OAuth) or set OPENAI_API_KEY to use openai/gpt-5.4.', + senderId: "openclaw", + senderName: "OpenClaw QA", + }); + + await expect(pending).rejects.toThrow('No API key found for provider "openai".'); + }); + + it("treats QA channel message delivery failures as failure replies", async () => { + const state = createQaBusState(); + const pending = waitForOutboundMessage( + state, + (candidate) => candidate.text.includes("QA-RESTART"), + 5_000, + ); + + state.addOutboundMessage({ + to: "channel:qa-room", + text: "⚠️ ✉️ Message failed", + senderId: "openclaw", + senderName: "OpenClaw QA", + }); + + await expect(pending).rejects.toThrow("Message failed"); + }); + + it("fails success-only waitForOutboundMessage calls when internal coordination text leaks", async () => { + const state = createQaBusState(); + const pending = waitForOutboundMessage( + state, + (candidate) => candidate.text.includes("QA_LEAK_OK"), + 5_000, + ); + + state.addOutboundMessage({ + to: "dm:qa-operator", + text: "checking thread context; then post a tight progress reply here.\nQA_LEAK_OK", + senderId: "openclaw", + senderName: "OpenClaw QA", + }); + + await expect(pending).rejects.toThrow("checking thread context"); + }); + + it("fails raw scenario waitForCondition calls when a classified failure reply arrives", async () => { + const state = createQaBusState(); + const waitForCondition = createScenarioWaitForCondition(state); + + const pending = waitForCondition( + () => + state + .getSnapshot() + .messages.findLast( + (message) => + message.direction === "outbound" && + message.conversation.id === "qa-operator" && + message.text.includes("ALPHA-7"), + ), + 5_000, + 10, + ); + + state.addOutboundMessage({ + to: "dm:qa-operator", + text: '⚠️ No API key found for provider "openai". You are authenticated with OpenAI Codex OAuth. Use openai-codex/gpt-5.4 (OAuth) or set OPENAI_API_KEY to use openai/gpt-5.4.', + senderId: "openclaw", + senderName: "OpenClaw QA", + }); + + await expect(pending).rejects.toThrow('No API key found for provider "openai".'); + }); + + it("fails raw scenario waitForCondition calls even when mixed traffic already exists", async () => { + const state = createQaBusState(); + state.addInboundMessage({ + conversation: { id: "qa-operator", kind: "direct" }, + senderId: "alice", + senderName: "Alice", + text: "hello", + }); + state.addOutboundMessage({ + to: "dm:qa-operator", + text: "working on it", + senderId: "openclaw", + senderName: "OpenClaw QA", + }); + state.addInboundMessage({ + conversation: { id: "qa-operator", kind: "direct" }, + senderId: "alice", + senderName: "Alice", + text: "ok do it", + }); + + const waitForCondition = createScenarioWaitForCondition(state); + const pending = waitForCondition( + () => + state + .getSnapshot() + .messages.slice(3) + .findLast( + (message) => + message.direction === "outbound" && + message.conversation.id === "qa-operator" && + message.text.includes("mission"), + ), + 150, + 10, + ); + + state.addOutboundMessage({ + to: "dm:qa-operator", + text: '⚠️ No API key found for provider "openai". You are authenticated with OpenAI Codex OAuth. Use openai-codex/gpt-5.4 (OAuth) or set OPENAI_API_KEY to use openai/gpt-5.4.', + senderId: "openclaw", + senderName: "OpenClaw QA", + }); + + await expect(pending).rejects.toThrow('No API key found for provider "openai".'); + }); + + it("reads transport transcripts with generic helper names", () => { + const state = createQaBusState(); + state.addInboundMessage({ + conversation: { id: "qa-operator", kind: "direct" }, + senderId: "alice", + senderName: "Alice", + text: "hello", + }); + state.addOutboundMessage({ + to: "dm:qa-operator", + text: "working on it", + senderId: "openclaw", + senderName: "OpenClaw QA", + }); + state.addOutboundMessage({ + to: "dm:qa-operator", + text: "done", + senderId: "openclaw", + senderName: "OpenClaw QA", + }); + + const messages = readTransportTranscript(state, { + conversationId: "qa-operator", + direction: "outbound", + }); + const formatted = formatTransportTranscript(state, { + conversationId: "qa-operator", + }); + + expect(messages.map((message: { text: string }) => message.text)).toEqual([ + "working on it", + "done", + ]); + expect(formatted).toContain("USER Alice: hello"); + expect(formatted).toContain("ASSISTANT OpenClaw QA: working on it"); + }); + + it("waits for outbound replies through the generic transport alias", async () => { + const state = createQaBusState(); + const pending = waitForTransportOutboundMessage( + state, + (candidate) => candidate.conversation.id === "qa-operator" && candidate.text.includes("done"), + 5_000, + ); + + state.addOutboundMessage({ + to: "dm:qa-operator", + text: "done", + senderId: "openclaw", + senderName: "OpenClaw QA", + }); + + await expect(pending).resolves.toMatchObject({ text: "done" }); + }); +}); diff --git a/extensions/qa-lab/src/suite-runtime-transport.ts b/extensions/qa-lab/src/suite-runtime-transport.ts new file mode 100644 index 00000000000..e1f805591f1 --- /dev/null +++ b/extensions/qa-lab/src/suite-runtime-transport.ts @@ -0,0 +1,175 @@ +import { setTimeout as sleep } from "node:timers/promises"; +import { + createFailureAwareTransportWaitForCondition, + findFailureOutboundMessage as findTransportFailureOutboundMessage, + type QaTransportState, +} from "./qa-transport.js"; +import { extractQaFailureReplyText } from "./reply-failure.js"; +import type { QaBusMessage } from "./runtime-api.js"; + +async function waitForCondition( + check: () => T | Promise | null | undefined, + timeoutMs = 15_000, + intervalMs = 100, +): Promise { + const startedAt = Date.now(); + while (Date.now() - startedAt < timeoutMs) { + const value = await check(); + if (value !== null && value !== undefined) { + return value; + } + await sleep(intervalMs); + } + throw new Error(`timed out after ${timeoutMs}ms`); +} + +function findFailureOutboundMessage( + state: QaTransportState, + options?: { sinceIndex?: number; cursorSpace?: "all" | "outbound" }, +) { + return findTransportFailureOutboundMessage(state, options); +} + +function createScenarioWaitForCondition(state: QaTransportState) { + return createFailureAwareTransportWaitForCondition(state); +} + +async function waitForOutboundMessage( + state: QaTransportState, + predicate: (message: QaBusMessage) => boolean, + timeoutMs = 15_000, + options?: { sinceIndex?: number }, +) { + return await waitForCondition(() => { + const failureMessage = findFailureOutboundMessage(state, options); + if (failureMessage) { + throw new Error(extractQaFailureReplyText(failureMessage.text) ?? failureMessage.text); + } + const match = state + .getSnapshot() + .messages.filter((message: QaBusMessage) => message.direction === "outbound") + .slice(options?.sinceIndex ?? 0) + .find(predicate); + if (!match) { + return undefined; + } + const failureReply = extractQaFailureReplyText(match.text); + if (failureReply) { + throw new Error(failureReply); + } + return match; + }, timeoutMs); +} + +async function waitForNoOutbound(state: QaTransportState, timeoutMs = 1_200) { + await sleep(timeoutMs); + const outbound = state + .getSnapshot() + .messages.filter((message: QaBusMessage) => message.direction === "outbound"); + if (outbound.length > 0) { + throw new Error(`expected no outbound messages, saw ${outbound.length}`); + } +} + +function recentOutboundSummary(state: QaTransportState, limit = 5) { + return state + .getSnapshot() + .messages.filter((message: QaBusMessage) => message.direction === "outbound") + .slice(-limit) + .map((message: QaBusMessage) => `${message.conversation.id}:${message.text}`) + .join(" | "); +} + +function readTransportTranscript( + state: QaTransportState, + params: { + conversationId: string; + threadId?: string; + direction?: "inbound" | "outbound"; + limit?: number; + }, +) { + const messages = state + .getSnapshot() + .messages.filter( + (message: QaBusMessage) => + message.conversation.id === params.conversationId && + (params.threadId ? message.threadId === params.threadId : true) && + (params.direction ? message.direction === params.direction : true), + ); + return params.limit ? messages.slice(-params.limit) : messages; +} + +function formatTransportTranscript( + state: QaTransportState, + params: { + conversationId: string; + threadId?: string; + direction?: "inbound" | "outbound"; + limit?: number; + }, +) { + const messages = readTransportTranscript(state, params); + return messages + .map((message: QaBusMessage) => { + const direction = message.direction === "inbound" ? "user" : "assistant"; + const speaker = message.senderName?.trim() || message.senderId; + const attachmentSummary = + message.attachments && message.attachments.length > 0 + ? ` [attachments: ${message.attachments + .map( + (attachment: NonNullable[number]) => + `${attachment.kind}:${attachment.fileName ?? attachment.id}`, + ) + .join(", ")}]` + : ""; + return `${direction.toUpperCase()} ${speaker}: ${message.text}${attachmentSummary}`; + }) + .join("\n\n"); +} + +function formatConversationTranscript( + state: QaTransportState, + params: { + conversationId: string; + threadId?: string; + limit?: number; + }, +) { + return formatTransportTranscript(state, params); +} + +async function waitForTransportOutboundMessage( + state: QaTransportState, + predicate: (message: QaBusMessage) => boolean, + timeoutMs?: number, +) { + return await waitForOutboundMessage(state, predicate, timeoutMs); +} + +async function waitForChannelOutboundMessage( + state: QaTransportState, + predicate: (message: QaBusMessage) => boolean, + timeoutMs?: number, +) { + return await waitForTransportOutboundMessage(state, predicate, timeoutMs); +} + +async function waitForNoTransportOutbound(state: QaTransportState, timeoutMs = 1_200) { + await waitForNoOutbound(state, timeoutMs); +} + +export { + createScenarioWaitForCondition, + findFailureOutboundMessage, + formatConversationTranscript, + formatTransportTranscript, + readTransportTranscript, + recentOutboundSummary, + waitForChannelOutboundMessage, + waitForCondition, + waitForNoOutbound, + waitForNoTransportOutbound, + waitForOutboundMessage, + waitForTransportOutboundMessage, +}; diff --git a/extensions/qa-lab/src/suite-runtime-types.ts b/extensions/qa-lab/src/suite-runtime-types.ts new file mode 100644 index 00000000000..e2186b096a2 --- /dev/null +++ b/extensions/qa-lab/src/suite-runtime-types.ts @@ -0,0 +1,70 @@ +import type { OpenClawConfig } from "openclaw/plugin-sdk/config-runtime"; +import type { QaTransportActionName, QaTransportAdapter } from "./qa-transport.js"; + +export type QaRuntimeGatewayClient = { + baseUrl: string; + tempRoot: string; + workspaceDir: string; + runtimeEnv: NodeJS.ProcessEnv; + call: ( + method: string, + params?: unknown, + options?: { + timeoutMs?: number; + }, + ) => Promise; +}; + +export type QaRuntimeTransport = QaTransportAdapter; + +export type QaSuiteRuntimeEnv = { + gateway: QaRuntimeGatewayClient; + transport: QaRuntimeTransport; + repoRoot: string; + providerMode: "mock-openai" | "live-frontier"; + primaryModel: string; + alternateModel: string; + mock: { + baseUrl: string; + } | null; + cfg: OpenClawConfig; +}; + +export type QaSkillStatusEntry = { + name?: string; + eligible?: boolean; + disabled?: boolean; + blockedByAllowlist?: boolean; +}; + +export type QaConfigSnapshot = { + hash?: string; + config?: Record; +}; + +export type QaDreamingStatus = { + enabled?: boolean; + shortTermCount?: number; + promotedTotal?: number; + phaseSignalCount?: number; + lightPhaseHitCount?: number; + remPhaseHitCount?: number; + phases?: { + deep?: { + managedCronPresent?: boolean; + nextRunAtMs?: number; + }; + }; +}; + +export type QaRawSessionStoreEntry = { + sessionId?: string; + status?: string; + spawnedBy?: string; + label?: string; + abortedLastRun?: boolean; + updatedAt?: number; +}; + +export type QaRuntimeActionHandlerEnv = Pick; +export type { QaTransportActionName }; diff --git a/extensions/qa-lab/src/suite-test-helpers.ts b/extensions/qa-lab/src/suite-test-helpers.ts new file mode 100644 index 00000000000..b8fe47403cb --- /dev/null +++ b/extensions/qa-lab/src/suite-test-helpers.ts @@ -0,0 +1,31 @@ +import { readQaBootstrapScenarioCatalog } from "./scenario-catalog.js"; + +type QaSuiteTestScenario = ReturnType["scenarios"][number]; + +export function makeQaSuiteTestScenario( + id: string, + params: { + config?: Record; + plugins?: string[]; + gatewayConfigPatch?: Record; + gatewayRuntime?: { forwardHostHome?: boolean }; + surface?: string; + } = {}, +): QaSuiteTestScenario { + return { + id, + title: id, + surface: params.surface ?? "test", + objective: "test", + successCriteria: ["test"], + ...(params.plugins ? { plugins: params.plugins } : {}), + ...(params.gatewayConfigPatch ? { gatewayConfigPatch: params.gatewayConfigPatch } : {}), + ...(params.gatewayRuntime ? { gatewayRuntime: params.gatewayRuntime } : {}), + sourcePath: `qa/scenarios/${id}.md`, + execution: { + kind: "flow", + ...(params.config ? { config: params.config } : {}), + flow: { steps: [{ name: "noop", actions: [{ assert: "true" }] }] }, + }, + } as QaSuiteTestScenario; +} diff --git a/extensions/qa-lab/src/suite.test.ts b/extensions/qa-lab/src/suite.test.ts index 81e4caf1693..7f5a5e1fa06 100644 --- a/extensions/qa-lab/src/suite.test.ts +++ b/extensions/qa-lab/src/suite.test.ts @@ -1,91 +1,7 @@ -import { lstat, mkdir, mkdtemp, rm, symlink } from "node:fs/promises"; -import os from "node:os"; -import path from "node:path"; import { describe, expect, it, vi } from "vitest"; -import { createQaBusState } from "./bus-state.js"; -import { qaSuiteTesting, runQaSuite } from "./suite.js"; - -describe("qa suite failure reply handling", () => { - const makeScenario = ( - id: string, - config?: Record, - plugins?: string[], - gatewayConfigPatch?: Record, - gatewayRuntime?: { forwardHostHome?: boolean }, - ): Parameters[0]["scenarios"][number] => - ({ - id, - title: id, - surface: "test", - objective: "test", - successCriteria: ["test"], - plugins, - gatewayConfigPatch, - gatewayRuntime, - sourcePath: `qa/scenarios/${id}.md`, - execution: { - kind: "flow", - config, - flow: { steps: [{ name: "noop", actions: [{ assert: "true" }] }] }, - }, - }) as Parameters[0]["scenarios"][number]; - - it("normalizes suite concurrency to a bounded integer", () => { - const previous = process.env.OPENCLAW_QA_SUITE_CONCURRENCY; - delete process.env.OPENCLAW_QA_SUITE_CONCURRENCY; - try { - expect(qaSuiteTesting.normalizeQaSuiteConcurrency(undefined, 10)).toBe(10); - expect(qaSuiteTesting.normalizeQaSuiteConcurrency(undefined, 80)).toBe(64); - expect(qaSuiteTesting.normalizeQaSuiteConcurrency(2.8, 10)).toBe(2); - expect(qaSuiteTesting.normalizeQaSuiteConcurrency(20, 3)).toBe(3); - expect(qaSuiteTesting.normalizeQaSuiteConcurrency(0, 3)).toBe(1); - } finally { - if (previous === undefined) { - delete process.env.OPENCLAW_QA_SUITE_CONCURRENCY; - } else { - process.env.OPENCLAW_QA_SUITE_CONCURRENCY = previous; - } - } - }); - - it("keeps programmatic suite output dirs within the repo root", async () => { - const repoRoot = await mkdtemp(path.join(os.tmpdir(), "qa-suite-existing-root-")); - try { - await expect( - qaSuiteTesting.resolveQaSuiteOutputDir( - repoRoot, - path.join(repoRoot, ".artifacts", "qa-e2e", "custom"), - ), - ).resolves.toBe(path.join(repoRoot, ".artifacts", "qa-e2e", "custom")); - await expect( - lstat(path.join(repoRoot, ".artifacts", "qa-e2e", "custom")).then((stats) => - stats.isDirectory(), - ), - ).resolves.toBe(true); - await expect( - qaSuiteTesting.resolveQaSuiteOutputDir(repoRoot, "/tmp/outside"), - ).rejects.toThrow("QA suite outputDir must stay within the repo root."); - } finally { - await rm(repoRoot, { recursive: true, force: true }); - } - }); - - it("rejects symlinked suite output dirs that escape the repo root", async () => { - const repoRoot = await mkdtemp(path.join(os.tmpdir(), "qa-suite-root-")); - const outsideRoot = await mkdtemp(path.join(os.tmpdir(), "qa-suite-outside-")); - try { - await mkdir(path.join(repoRoot, ".artifacts"), { recursive: true }); - await symlink(outsideRoot, path.join(repoRoot, ".artifacts", "qa-e2e"), "dir"); - - await expect( - qaSuiteTesting.resolveQaSuiteOutputDir(repoRoot, ".artifacts/qa-e2e/custom"), - ).rejects.toThrow("QA suite outputDir must not traverse symlinks."); - } finally { - await rm(repoRoot, { recursive: true, force: true }); - await rm(outsideRoot, { recursive: true, force: true }); - } - }); +import { runQaSuite } from "./suite.js"; +describe("qa suite", () => { it("rejects unsupported transport ids before starting the lab", async () => { const startLab = vi.fn(); @@ -98,379 +14,4 @@ describe("qa suite failure reply handling", () => { expect(startLab).not.toHaveBeenCalled(); }); - - it("maps suite work with bounded concurrency while preserving order", async () => { - let active = 0; - let maxActive = 0; - const result = await qaSuiteTesting.mapQaSuiteWithConcurrency([1, 2, 3, 4], 2, async (item) => { - active += 1; - maxActive = Math.max(maxActive, active); - await new Promise((resolve) => setTimeout(resolve, 10)); - active -= 1; - return item * 10; - }); - - expect(maxActive).toBe(2); - expect(result).toEqual([10, 20, 30, 40]); - }); - - it("keeps explicitly requested provider-specific scenarios", () => { - const scenarios = [ - makeScenario("generic"), - makeScenario("anthropic-only", { - requiredProvider: "anthropic", - requiredModel: "claude-opus-4-6", - }), - ]; - - expect( - qaSuiteTesting - .selectQaSuiteScenarios({ - scenarios, - scenarioIds: ["anthropic-only"], - providerMode: "live-frontier", - primaryModel: "openai/gpt-5.4", - }) - .map((scenario) => scenario.id), - ).toEqual(["anthropic-only"]); - }); - - it("collects unique scenario-declared bundled plugins in encounter order", () => { - const scenarios = [ - makeScenario("generic", undefined, ["active-memory", "memory-wiki"]), - makeScenario("other", undefined, ["memory-wiki", "openai"]), - makeScenario("plain"), - ]; - - expect(qaSuiteTesting.collectQaSuitePluginIds(scenarios)).toEqual([ - "active-memory", - "memory-wiki", - "openai", - ]); - }); - - it("merge-patches scenario startup config in encounter order", () => { - const scenarios = [ - makeScenario("active-memory", undefined, ["active-memory"], { - plugins: { - entries: { - "active-memory": { - config: { - enabled: true, - agents: ["qa"], - }, - }, - }, - }, - }), - makeScenario("live-defaults", undefined, undefined, { - agents: { - defaults: { - thinkingDefault: "minimal", - }, - }, - plugins: { - entries: { - "active-memory": { - config: { - transcriptDir: "qa-memory-e2e", - }, - }, - }, - }, - }), - ]; - - expect(qaSuiteTesting.collectQaSuiteGatewayConfigPatch(scenarios)).toEqual({ - agents: { - defaults: { - thinkingDefault: "minimal", - }, - }, - plugins: { - entries: { - "active-memory": { - config: { - enabled: true, - agents: ["qa"], - transcriptDir: "qa-memory-e2e", - }, - }, - }, - }, - }); - }); - - it("collects gateway runtime options across selected scenarios", () => { - const scenarios = [ - makeScenario("plain"), - makeScenario("browser-ui", undefined, ["browser"], undefined, { - forwardHostHome: true, - }), - ]; - - expect(qaSuiteTesting.collectQaSuiteGatewayRuntimeOptions(scenarios)).toEqual({ - forwardHostHome: true, - }); - }); - - it("enables Control UI only for Control UI scenario workers", () => { - expect( - qaSuiteTesting.scenarioRequiresControlUi({ - ...makeScenario("control-ui"), - surface: "control-ui", - }), - ).toBe(true); - expect(qaSuiteTesting.scenarioRequiresControlUi(makeScenario("plain"))).toBe(false); - }); - - it("filters provider-specific scenarios from an implicit live lane", () => { - const scenarios = [ - makeScenario("generic"), - makeScenario("openai-only", { requiredProvider: "openai", requiredModel: "gpt-5.4" }), - makeScenario("anthropic-only", { - requiredProvider: "anthropic", - requiredModel: "claude-opus-4-6", - }), - makeScenario("claude-subscription", { - requiredProvider: "claude-cli", - authMode: "subscription", - }), - ]; - - expect( - qaSuiteTesting - .selectQaSuiteScenarios({ - scenarios, - providerMode: "live-frontier", - primaryModel: "openai/gpt-5.4", - }) - .map((scenario) => scenario.id), - ).toEqual(["generic", "openai-only"]); - - expect( - qaSuiteTesting - .selectQaSuiteScenarios({ - scenarios, - providerMode: "live-frontier", - primaryModel: "claude-cli/claude-sonnet-4-6", - claudeCliAuthMode: "subscription", - }) - .map((scenario) => scenario.id), - ).toEqual(["generic", "claude-subscription"]); - }); - - it("reads retry-after from the primary gateway error before appended logs", () => { - const error = new Error( - "rate limit exceeded for config.patch; retry after 38s\nGateway logs:\nprevious config changed since last load", - ); - - expect(qaSuiteTesting.getGatewayRetryAfterMs(error)).toBe(38_000); - expect(qaSuiteTesting.isConfigHashConflict(error)).toBe(false); - }); - - it("ignores stale retry-after text that only appears in appended gateway logs", () => { - const error = new Error( - "config changed since last load; re-run config.get and retry\nGateway logs:\nold rate limit exceeded for config.patch; retry after 38s", - ); - - expect(qaSuiteTesting.getGatewayRetryAfterMs(error)).toBe(null); - expect(qaSuiteTesting.isConfigHashConflict(error)).toBe(true); - }); - - it("detects classified failure replies before a success-only outbound predicate matches", async () => { - const state = createQaBusState(); - state.addOutboundMessage({ - to: "dm:qa-operator", - text: "⚠️ Something went wrong while processing your request. Please try again, or use /new to start a fresh session.", - senderId: "openclaw", - senderName: "OpenClaw QA", - }); - - const message = qaSuiteTesting.findFailureOutboundMessage(state); - expect(message?.text).toContain("Something went wrong while processing your request."); - }); - - it("fails success-only waitForOutboundMessage calls when a classified failure reply arrives first", async () => { - const state = createQaBusState(); - const pending = qaSuiteTesting.waitForOutboundMessage( - state, - (candidate) => - candidate.conversation.id === "qa-operator" && - candidate.text.includes("Remembered ALPHA-7."), - 5_000, - ); - - state.addOutboundMessage({ - to: "dm:qa-operator", - text: '⚠️ No API key found for provider "openai". You are authenticated with OpenAI Codex OAuth. Use openai-codex/gpt-5.4 (OAuth) or set OPENAI_API_KEY to use openai/gpt-5.4.', - senderId: "openclaw", - senderName: "OpenClaw QA", - }); - - await expect(pending).rejects.toThrow('No API key found for provider "openai".'); - }); - - it("treats QA channel message delivery failures as failure replies", async () => { - const state = createQaBusState(); - const pending = qaSuiteTesting.waitForOutboundMessage( - state, - (candidate) => candidate.text.includes("QA-RESTART"), - 5_000, - ); - - state.addOutboundMessage({ - to: "channel:qa-room", - text: "⚠️ ✉️ Message failed", - senderId: "openclaw", - senderName: "OpenClaw QA", - }); - - await expect(pending).rejects.toThrow("Message failed"); - }); - - it("fails success-only waitForOutboundMessage calls when internal coordination text leaks", async () => { - const state = createQaBusState(); - const pending = qaSuiteTesting.waitForOutboundMessage( - state, - (candidate) => candidate.text.includes("QA_LEAK_OK"), - 5_000, - ); - - state.addOutboundMessage({ - to: "dm:qa-operator", - text: "checking thread context; then post a tight progress reply here.\nQA_LEAK_OK", - senderId: "openclaw", - senderName: "OpenClaw QA", - }); - - await expect(pending).rejects.toThrow("checking thread context"); - }); - - it("fails raw scenario waitForCondition calls when a classified failure reply arrives", async () => { - const state = createQaBusState(); - const waitForCondition = qaSuiteTesting.createScenarioWaitForCondition(state); - - const pending = waitForCondition( - () => - state - .getSnapshot() - .messages.findLast( - (message) => - message.direction === "outbound" && - message.conversation.id === "qa-operator" && - message.text.includes("ALPHA-7"), - ), - 5_000, - 10, - ); - - state.addOutboundMessage({ - to: "dm:qa-operator", - text: '⚠️ No API key found for provider "openai". You are authenticated with OpenAI Codex OAuth. Use openai-codex/gpt-5.4 (OAuth) or set OPENAI_API_KEY to use openai/gpt-5.4.', - senderId: "openclaw", - senderName: "OpenClaw QA", - }); - - await expect(pending).rejects.toThrow('No API key found for provider "openai".'); - }); - - it("fails raw scenario waitForCondition calls even when mixed traffic already exists", async () => { - const state = createQaBusState(); - state.addInboundMessage({ - conversation: { id: "qa-operator", kind: "direct" }, - senderId: "alice", - senderName: "Alice", - text: "hello", - }); - state.addOutboundMessage({ - to: "dm:qa-operator", - text: "working on it", - senderId: "openclaw", - senderName: "OpenClaw QA", - }); - state.addInboundMessage({ - conversation: { id: "qa-operator", kind: "direct" }, - senderId: "alice", - senderName: "Alice", - text: "ok do it", - }); - - const waitForCondition = qaSuiteTesting.createScenarioWaitForCondition(state); - const pending = waitForCondition( - () => - state - .getSnapshot() - .messages.slice(3) - .findLast( - (message) => - message.direction === "outbound" && - message.conversation.id === "qa-operator" && - message.text.includes("mission"), - ), - 150, - 10, - ); - - state.addOutboundMessage({ - to: "dm:qa-operator", - text: '⚠️ No API key found for provider "openai". You are authenticated with OpenAI Codex OAuth. Use openai-codex/gpt-5.4 (OAuth) or set OPENAI_API_KEY to use openai/gpt-5.4.', - senderId: "openclaw", - senderName: "OpenClaw QA", - }); - - await expect(pending).rejects.toThrow('No API key found for provider "openai".'); - }); - - it("reads transport transcripts with generic helper names", () => { - const state = createQaBusState(); - state.addInboundMessage({ - conversation: { id: "qa-operator", kind: "direct" }, - senderId: "alice", - senderName: "Alice", - text: "hello", - }); - state.addOutboundMessage({ - to: "dm:qa-operator", - text: "working on it", - senderId: "openclaw", - senderName: "OpenClaw QA", - }); - state.addOutboundMessage({ - to: "dm:qa-operator", - text: "done", - senderId: "openclaw", - senderName: "OpenClaw QA", - }); - - const messages = qaSuiteTesting.readTransportTranscript(state, { - conversationId: "qa-operator", - direction: "outbound", - }); - const formatted = qaSuiteTesting.formatTransportTranscript(state, { - conversationId: "qa-operator", - }); - - expect(messages.map((message) => message.text)).toEqual(["working on it", "done"]); - expect(formatted).toContain("USER Alice: hello"); - expect(formatted).toContain("ASSISTANT OpenClaw QA: working on it"); - }); - - it("waits for outbound replies through the generic transport alias", async () => { - const state = createQaBusState(); - const pending = qaSuiteTesting.waitForTransportOutboundMessage( - state, - (candidate) => candidate.conversation.id === "qa-operator" && candidate.text.includes("done"), - 5_000, - ); - - state.addOutboundMessage({ - to: "dm:qa-operator", - text: "done", - senderId: "openclaw", - senderName: "OpenClaw QA", - }); - - await expect(pending).resolves.toMatchObject({ text: "done" }); - }); }); diff --git a/extensions/qa-lab/src/suite.ts b/extensions/qa-lab/src/suite.ts index 0528c25f0da..6685b84304c 100644 --- a/extensions/qa-lab/src/suite.ts +++ b/extensions/qa-lab/src/suite.ts @@ -1,35 +1,9 @@ -import { spawn } from "node:child_process"; -import { randomUUID } from "node:crypto"; import fs from "node:fs/promises"; import path from "node:path"; import { setTimeout as sleep } from "node:timers/promises"; -import { Client } from "@modelcontextprotocol/sdk/client/index.js"; -import { StdioClientTransport } from "@modelcontextprotocol/sdk/client/stdio.js"; import { disposeRegisteredAgentHarnesses } from "openclaw/plugin-sdk/agent-harness"; import type { OpenClawConfig } from "openclaw/plugin-sdk/config-runtime"; import { formatErrorMessage } from "openclaw/plugin-sdk/error-runtime"; -import { - formatMemoryDreamingDay, - resolveSessionTranscriptsDirForAgent, -} from "openclaw/plugin-sdk/memory-core"; -import { buildAgentSessionKey } from "openclaw/plugin-sdk/routing"; -import { fetchWithSsrFGuard } from "openclaw/plugin-sdk/ssrf-runtime"; -import { normalizeLowercaseStringOrEmpty } from "openclaw/plugin-sdk/text-runtime"; -import { - callQaBrowserRequest, - qaBrowserAct, - qaBrowserOpenTab, - qaBrowserSnapshot, - waitForQaBrowserReady, -} from "./browser-runtime.js"; -import { ensureRepoBoundDirectory, resolveRepoRelativeOutputDir } from "./cli-paths.js"; -import { waitForCronRunCompletion } from "./cron-run-wait.js"; -import { - hasDiscoveryLabels, - reportsDiscoveryScopeLeak, - reportsMissingDiscoveryFiles, -} from "./discovery-eval.js"; -import { extractQaToolPayload } from "./extract-tool-payload.js"; import { startQaGatewayChild, type QaCliBackendAuthMode } from "./gateway-child.js"; import type { QaLabLatestReport, @@ -44,37 +18,34 @@ import { normalizeQaProviderMode, type QaProviderMode, } from "./model-selection.js"; -import { hasModelSwitchContinuityEvidence } from "./model-switch-eval.js"; import type { QaThinkingLevel } from "./qa-gateway-config.js"; import { createQaTransportAdapter, + defaultQaSuiteConcurrencyForTransport, normalizeQaTransportId, type QaTransportId, } from "./qa-transport-registry.js"; -import type { - QaTransportAdapter, - QaTransportActionName, - QaTransportState, -} from "./qa-transport.js"; -import { - createFailureAwareTransportWaitForCondition, - findFailureOutboundMessage as findTransportFailureOutboundMessage, -} from "./qa-transport.js"; -import { extractQaFailureReplyText } from "./reply-failure.js"; +import type { QaTransportAdapter } from "./qa-transport.js"; import { renderQaMarkdownReport, type QaReportCheck, type QaReportScenario } from "./report.js"; import { defaultQaModelForMode } from "./run-config.js"; -import { qaChannelPlugin, type QaBusMessage } from "./runtime-api.js"; import { readQaBootstrapScenarioCatalog } from "./scenario-catalog.js"; import { runScenarioFlow } from "./scenario-flow-runner.js"; -import { createQaScenarioRuntimeApi } from "./scenario-runtime-api.js"; import { - closeQaWebSessions, - qaWebEvaluate, - qaWebOpenPage, - qaWebSnapshot, - qaWebType, - qaWebWait, -} from "./web-runtime.js"; + applyQaMergePatch, + collectQaSuiteGatewayConfigPatch, + collectQaSuiteGatewayRuntimeOptions, + collectQaSuitePluginIds, + mapQaSuiteWithConcurrency, + normalizeQaSuiteConcurrency, + resolveQaSuiteOutputDir, + scenarioRequiresControlUi, + selectQaSuiteScenarios, + splitModelRef, +} from "./suite-planning.js"; +import { createQaSuiteScenarioFlowApi } from "./suite-runtime-flow.js"; +import { waitForGatewayHealthy, waitForTransportReady } from "./suite-runtime-gateway.js"; +import type { QaSuiteRuntimeEnv } from "./suite-runtime-types.js"; +import { closeQaWebSessions } from "./web-runtime.js"; type QaSuiteStep = { name: string; @@ -90,16 +61,8 @@ export type QaSuiteScenarioResult = { type QaSuiteEnvironment = { lab: QaLabServerHandle; - mock: Awaited> | null; - gateway: Awaited>; - cfg: OpenClawConfig; - transport: QaTransportAdapter; - repoRoot: string; - providerMode: "mock-openai" | "live-frontier"; - primaryModel: string; - alternateModel: string; webSessionIds: Set; -}; +} & QaSuiteRuntimeEnv; export type QaSuiteStartLabFn = (params?: QaLabServerStartParams) => Promise; @@ -131,244 +94,20 @@ function requireQaSuiteStartLab(startLab: QaSuiteStartLabFn | undefined): QaSuit const _QA_IMAGE_UNDERSTANDING_PNG_BASE64 = "iVBORw0KGgoAAAANSUhEUgAAAQAAAAEACAYAAABccqhmAAAAAklEQVR4AewaftIAAAK4SURBVO3BAQEAMAwCIG//znsQgXfJBZjUALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsl9wFmNQAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwP4TIF+7ciPkoAAAAASUVORK5CYII="; + const _QA_IMAGE_UNDERSTANDING_LARGE_PNG_BASE64 = "iVBORw0KGgoAAAANSUhEUgAAAQAAAAEACAYAAABccqhmAAACuklEQVR4Ae3BAQEAMAwCIG//znsQgXfJBZjUALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsl9wFmNQAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwP4TIF+2YE/z8AAAAASUVORK5CYII="; const QA_IMAGE_UNDERSTANDING_VALID_PNG_BASE64 = "iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAALklEQVR4nO3OoQEAAAyDsP7/9HYGJgJNdtuVDQAAAAAAACAHxH8AAAAAAACAHvBX0fhq85dN7QAAAABJRU5ErkJggg=="; -type QaSkillStatusEntry = { - name?: string; - eligible?: boolean; - disabled?: boolean; - blockedByAllowlist?: boolean; -}; - -type QaConfigSnapshot = { - hash?: string; - config?: Record; -}; - -type QaDreamingStatus = { - enabled?: boolean; - shortTermCount?: number; - promotedTotal?: number; - phaseSignalCount?: number; - lightPhaseHitCount?: number; - remPhaseHitCount?: number; - phases?: { - deep?: { - managedCronPresent?: boolean; - nextRunAtMs?: number; - }; - }; -}; - -type QaRawSessionStoreEntry = { - sessionId?: string; - status?: string; - spawnedBy?: string; - label?: string; - abortedLastRun?: boolean; - updatedAt?: number; -}; - -const DEFAULT_QA_SUITE_CONCURRENCY = 64; - -function normalizeQaSuiteConcurrency(value: number | undefined, scenarioCount: number) { - const envValue = Number(process.env.OPENCLAW_QA_SUITE_CONCURRENCY); - const raw = - typeof value === "number" && Number.isFinite(value) - ? value - : Number.isFinite(envValue) - ? envValue - : DEFAULT_QA_SUITE_CONCURRENCY; - return Math.max(1, Math.min(Math.floor(raw), Math.max(1, scenarioCount))); -} - -async function mapQaSuiteWithConcurrency( - items: readonly T[], - concurrency: number, - mapper: (item: T, index: number) => Promise, +function liveTurnTimeoutMs( + env: Pick, + fallbackMs: number, ) { - const results = Array.from({ length: items.length }); - let nextIndex = 0; - const workerCount = Math.min(Math.max(1, Math.floor(concurrency)), items.length); - const workers = Array.from({ length: workerCount }, async () => { - while (nextIndex < items.length) { - const index = nextIndex; - nextIndex += 1; - results[index] = await mapper(items[index], index); - } - }); - await Promise.all(workers); - return results; -} - -function splitModelRef(ref: string) { - const slash = ref.indexOf("/"); - if (slash <= 0 || slash === ref.length - 1) { - return null; - } - return { - provider: ref.slice(0, slash), - model: ref.slice(slash + 1), - }; -} - -function normalizeQaConfigString(value: unknown): string | undefined { - return typeof value === "string" && value.trim() ? value.trim() : undefined; -} - -function scenarioMatchesLiveLane(params: { - scenario: ReturnType["scenarios"][number]; - primaryModel: string; - providerMode: "mock-openai" | "live-frontier"; - claudeCliAuthMode?: QaCliBackendAuthMode; -}) { - if (params.providerMode !== "live-frontier") { - return true; - } - const selected = splitModelRef(params.primaryModel); - const config = params.scenario.execution.config ?? {}; - const requiredProvider = normalizeQaConfigString(config.requiredProvider); - if (requiredProvider && selected?.provider !== requiredProvider) { - return false; - } - const requiredModel = normalizeQaConfigString(config.requiredModel); - if (requiredModel && selected?.model !== requiredModel) { - return false; - } - const requiredAuthMode = normalizeQaConfigString(config.authMode); - if (requiredAuthMode && params.claudeCliAuthMode !== requiredAuthMode) { - return false; - } - return true; -} - -function selectQaSuiteScenarios(params: { - scenarios: ReturnType["scenarios"]; - scenarioIds?: string[]; - providerMode: "mock-openai" | "live-frontier"; - primaryModel: string; - claudeCliAuthMode?: QaCliBackendAuthMode; -}) { - const requestedScenarioIds = - params.scenarioIds && params.scenarioIds.length > 0 ? new Set(params.scenarioIds) : null; - const requestedScenarios = requestedScenarioIds - ? params.scenarios.filter((scenario) => requestedScenarioIds.has(scenario.id)) - : params.scenarios; - if (requestedScenarioIds) { - const foundScenarioIds = new Set(requestedScenarios.map((scenario) => scenario.id)); - const missingScenarioIds = [...requestedScenarioIds].filter( - (scenarioId) => !foundScenarioIds.has(scenarioId), - ); - if (missingScenarioIds.length > 0) { - throw new Error(`unknown QA scenario id(s): ${missingScenarioIds.join(", ")}`); - } - return requestedScenarios; - } - return requestedScenarios.filter((scenario) => - scenarioMatchesLiveLane({ - scenario, - providerMode: params.providerMode, - primaryModel: params.primaryModel, - claudeCliAuthMode: params.claudeCliAuthMode, - }), - ); -} - -function collectQaSuitePluginIds( - scenarios: ReturnType["scenarios"], -) { - return [ - ...new Set( - scenarios.flatMap((scenario) => - Array.isArray(scenario.plugins) - ? scenario.plugins - .map((pluginId) => pluginId.trim()) - .filter((pluginId) => pluginId.length > 0) - : [], - ), - ), - ]; -} - -function isQaPlainObject(value: unknown): value is Record { - return value !== null && typeof value === "object" && !Array.isArray(value); -} - -function applyQaMergePatch(base: unknown, patch: unknown): unknown { - if (!isQaPlainObject(patch)) { - return patch; - } - const result = isQaPlainObject(base) ? { ...base } : {}; - for (const [key, value] of Object.entries(patch)) { - if (value === null) { - delete result[key]; - continue; - } - result[key] = isQaPlainObject(value) ? applyQaMergePatch(result[key], value) : value; - } - return result; -} - -function collectQaSuiteGatewayConfigPatch( - scenarios: ReturnType["scenarios"], -): Record | undefined { - let merged: Record | undefined; - for (const scenario of scenarios) { - if (!isQaPlainObject(scenario.gatewayConfigPatch)) { - continue; - } - merged = applyQaMergePatch(merged ?? {}, scenario.gatewayConfigPatch) as Record< - string, - unknown - >; - } - return merged; -} - -function collectQaSuiteGatewayRuntimeOptions( - scenarios: ReturnType["scenarios"], -) { - let forwardHostHome = false; - for (const scenario of scenarios) { - if (scenario.gatewayRuntime?.forwardHostHome === true) { - forwardHostHome = true; - } - } - return forwardHostHome ? { forwardHostHome: true } : undefined; -} - -function scenarioRequiresControlUi( - scenario: ReturnType["scenarios"][number], -) { - return normalizeLowercaseStringOrEmpty(scenario.surface) === "control-ui"; -} - -function liveTurnTimeoutMs(env: QaSuiteEnvironment, fallbackMs: number) { return resolveQaLiveTurnTimeoutMs(env, fallbackMs); } -async function resolveQaSuiteOutputDir(repoRoot: string, outputDir?: string) { - const targetDir = !outputDir - ? path.join(repoRoot, ".artifacts", "qa-e2e", `suite-${Date.now().toString(36)}`) - : outputDir; - if (!path.isAbsolute(targetDir)) { - const resolved = resolveRepoRelativeOutputDir(repoRoot, targetDir); - if (!resolved) { - throw new Error("QA suite outputDir must be set."); - } - return await ensureRepoBoundDirectory(repoRoot, resolved, "QA suite outputDir", { - mode: 0o700, - }); - } - return await ensureRepoBoundDirectory(repoRoot, targetDir, "QA suite outputDir", { - mode: 0o700, - }); -} - export type QaSuiteResult = { outputDir: string; reportPath: string; @@ -378,155 +117,6 @@ export type QaSuiteResult = { watchUrl: string; }; -async function waitForCondition( - check: () => T | Promise | null | undefined, - timeoutMs = 15_000, - intervalMs = 100, -): Promise { - const startedAt = Date.now(); - while (Date.now() - startedAt < timeoutMs) { - const value = await check(); - if (value !== null && value !== undefined) { - return value; - } - await sleep(intervalMs); - } - throw new Error(`timed out after ${timeoutMs}ms`); -} - -function findFailureOutboundMessage( - state: QaTransportState, - options?: { sinceIndex?: number; cursorSpace?: "all" | "outbound" }, -) { - return findTransportFailureOutboundMessage(state, options); -} - -function createScenarioWaitForCondition(state: QaTransportState) { - return createFailureAwareTransportWaitForCondition(state); -} - -async function waitForOutboundMessage( - state: QaTransportState, - predicate: (message: QaBusMessage) => boolean, - timeoutMs = 15_000, - options?: { sinceIndex?: number }, -) { - return await waitForCondition(() => { - const failureMessage = findFailureOutboundMessage(state, options); - if (failureMessage) { - throw new Error(extractQaFailureReplyText(failureMessage.text) ?? failureMessage.text); - } - const match = state - .getSnapshot() - .messages.filter((message) => message.direction === "outbound") - .slice(options?.sinceIndex ?? 0) - .find(predicate); - if (!match) { - return undefined; - } - const failureReply = extractQaFailureReplyText(match.text); - if (failureReply) { - throw new Error(failureReply); - } - return match; - }, timeoutMs); -} - -async function waitForNoOutbound(state: QaTransportState, timeoutMs = 1_200) { - await sleep(timeoutMs); - const outbound = state - .getSnapshot() - .messages.filter((message) => message.direction === "outbound"); - if (outbound.length > 0) { - throw new Error(`expected no outbound messages, saw ${outbound.length}`); - } -} - -function recentOutboundSummary(state: QaTransportState, limit = 5) { - return state - .getSnapshot() - .messages.filter((message) => message.direction === "outbound") - .slice(-limit) - .map((message) => `${message.conversation.id}:${message.text}`) - .join(" | "); -} - -function formatConversationTranscript( - state: QaTransportState, - params: { - conversationId: string; - threadId?: string; - limit?: number; - }, -) { - return formatTransportTranscript(state, params); -} - -function readTransportTranscript( - state: QaTransportState, - params: { - conversationId: string; - threadId?: string; - direction?: "inbound" | "outbound"; - limit?: number; - }, -) { - const messages = state - .getSnapshot() - .messages.filter( - (message) => - message.conversation.id === params.conversationId && - (params.threadId ? message.threadId === params.threadId : true) && - (params.direction ? message.direction === params.direction : true), - ); - return params.limit ? messages.slice(-params.limit) : messages; -} - -function formatTransportTranscript( - state: QaTransportState, - params: { - conversationId: string; - threadId?: string; - direction?: "inbound" | "outbound"; - limit?: number; - }, -) { - const messages = readTransportTranscript(state, params); - return messages - .map((message) => { - const direction = message.direction === "inbound" ? "user" : "assistant"; - const speaker = message.senderName?.trim() || message.senderId; - const attachmentSummary = - message.attachments && message.attachments.length > 0 - ? ` [attachments: ${message.attachments - .map((attachment) => `${attachment.kind}:${attachment.fileName ?? attachment.id}`) - .join(", ")}]` - : ""; - return `${direction.toUpperCase()} ${speaker}: ${message.text}${attachmentSummary}`; - }) - .join("\n\n"); -} - -async function waitForTransportOutboundMessage( - state: QaTransportState, - predicate: (message: QaBusMessage) => boolean, - timeoutMs?: number, -) { - return await waitForOutboundMessage(state, predicate, timeoutMs); -} - -async function waitForChannelOutboundMessage( - state: QaTransportState, - predicate: (message: QaBusMessage) => boolean, - timeoutMs?: number, -) { - return await waitForTransportOutboundMessage(state, predicate, timeoutMs); -} - -async function waitForNoTransportOutbound(state: QaTransportState, timeoutMs = 1_200) { - await waitForNoOutbound(state, timeoutMs); -} - async function runScenario(name: string, steps: QaSuiteStep[]): Promise { const stepResults: QaReportCheck[] = []; for (const step of steps) { @@ -568,760 +158,18 @@ async function runScenario(name: string, steps: QaSuiteStep[]): Promise(url: string): Promise { - const { response, release } = await fetchWithSsrFGuard({ - url, - policy: { allowPrivateNetwork: true }, - auditContext: "qa-lab-suite-fetch-json", - }); - try { - if (!response.ok) { - throw new Error(`request failed ${response.status}: ${url}`); - } - return (await response.json()) as T; - } finally { - await release(); - } -} - -async function waitForGatewayHealthy(env: QaSuiteEnvironment, timeoutMs = 45_000) { - await waitForCondition( - async () => { - try { - const { response, release } = await fetchWithSsrFGuard({ - url: `${env.gateway.baseUrl}/readyz`, - policy: { allowPrivateNetwork: true }, - auditContext: "qa-lab-suite-wait-for-gateway-healthy", - }); - try { - return response.ok ? true : undefined; - } finally { - await release(); - } - } catch { - return undefined; - } - }, - timeoutMs, - 250, - ); -} - -async function waitForTransportReady(env: QaSuiteEnvironment, timeoutMs = 45_000) { - await env.transport.waitReady({ - gateway: env.gateway, - timeoutMs, - }); -} - -async function waitForQaChannelReady(env: QaSuiteEnvironment, timeoutMs = 45_000) { - // Compatibility alias for existing markdown scenarios while qa-channel - // remains the only suite transport. - await waitForTransportReady(env, timeoutMs); -} - -async function waitForConfigRestartSettle( - env: QaSuiteEnvironment, - restartDelayMs = 1_000, - timeoutMs = 60_000, -) { - // config.patch/config.apply can still restart asynchronously after the RPC returns - // in reload-off or restart-required hot-mode paths. Give that window time to fire. - await sleep(restartDelayMs + 750); - await waitForGatewayHealthy(env, timeoutMs); -} - -function isGatewayRestartRace(error: unknown) { - const text = formatGatewayPrimaryErrorText(error); - return ( - text.includes("gateway closed (1012)") || - text.includes("gateway closed (1006") || - text.includes("abnormal closure") || - text.includes("service restart") - ); -} - -function isConfigHashConflict(error: unknown) { - return formatGatewayPrimaryErrorText(error).includes("config changed since last load"); -} - -function formatGatewayPrimaryErrorText(error: unknown) { - const text = formatErrorMessage(error); - const gatewayLogsIndex = text.indexOf("\nGateway logs:"); - return (gatewayLogsIndex >= 0 ? text.slice(0, gatewayLogsIndex) : text).trim(); -} - -function getGatewayRetryAfterMs(error: unknown) { - const text = formatGatewayPrimaryErrorText(error); - const millisecondsMatch = /retryAfterMs["=: ]+(\d+)/i.exec(text); - if (millisecondsMatch) { - const parsed = Number(millisecondsMatch[1]); - if (Number.isFinite(parsed) && parsed > 0) { - return parsed; - } - } - const secondsMatch = /retry after (\d+)s/i.exec(text); - if (secondsMatch) { - const parsed = Number(secondsMatch[1]); - if (Number.isFinite(parsed) && parsed > 0) { - return parsed * 1_000; - } - } - return null; -} - -async function readConfigSnapshot(env: QaSuiteEnvironment) { - const snapshot = (await env.gateway.call( - "config.get", - {}, - { timeoutMs: 60_000 }, - )) as QaConfigSnapshot; - if (!snapshot.hash || !snapshot.config) { - throw new Error("config.get returned no hash/config"); - } - return { - hash: snapshot.hash, - config: snapshot.config, - } satisfies { hash: string; config: Record }; -} - -async function runConfigMutation(params: { - env: QaSuiteEnvironment; - action: "config.patch" | "config.apply"; - raw: string; - sessionKey?: string; - deliveryContext?: { - channel?: string; - to?: string; - accountId?: string; - threadId?: string | number; - }; - note?: string; - restartDelayMs?: number; -}) { - const restartDelayMs = params.restartDelayMs ?? 1_000; - let lastConflict: unknown = null; - for (let attempt = 1; attempt <= 8; attempt += 1) { - const snapshot = await readConfigSnapshot(params.env); - try { - const result = await params.env.gateway.call( - params.action, - { - raw: params.raw, - baseHash: snapshot.hash, - ...(params.sessionKey ? { sessionKey: params.sessionKey } : {}), - ...(params.deliveryContext ? { deliveryContext: params.deliveryContext } : {}), - ...(params.note ? { note: params.note } : {}), - restartDelayMs, - }, - { timeoutMs: 45_000 }, - ); - await waitForConfigRestartSettle(params.env, restartDelayMs); - return result; - } catch (error) { - if (isConfigHashConflict(error)) { - lastConflict = error; - await waitForGatewayHealthy(params.env, Math.max(15_000, restartDelayMs + 10_000)).catch( - () => undefined, - ); - continue; - } - const retryAfterMs = getGatewayRetryAfterMs(error); - if (retryAfterMs && attempt < 8) { - await sleep(retryAfterMs + 500); - await waitForGatewayHealthy(params.env, Math.max(15_000, restartDelayMs + 10_000)).catch( - () => undefined, - ); - continue; - } - if (!isGatewayRestartRace(error)) { - throw error; - } - await waitForConfigRestartSettle(params.env, restartDelayMs); - return { ok: true, restarted: true }; - } - } - throw lastConflict ?? new Error(`${params.action} failed after retrying config hash conflicts`); -} - -async function patchConfig(params: { - env: QaSuiteEnvironment; - patch: Record; - sessionKey?: string; - deliveryContext?: { - channel?: string; - to?: string; - accountId?: string; - threadId?: string | number; - }; - note?: string; - restartDelayMs?: number; -}) { - return await runConfigMutation({ - env: params.env, - action: "config.patch", - raw: JSON.stringify(params.patch, null, 2), - sessionKey: params.sessionKey, - deliveryContext: params.deliveryContext, - note: params.note, - restartDelayMs: params.restartDelayMs, - }); -} - -async function applyConfig(params: { - env: QaSuiteEnvironment; - nextConfig: Record; - sessionKey?: string; - deliveryContext?: { - channel?: string; - to?: string; - accountId?: string; - threadId?: string | number; - }; - note?: string; - restartDelayMs?: number; -}) { - return await runConfigMutation({ - env: params.env, - action: "config.apply", - raw: JSON.stringify(params.nextConfig, null, 2), - sessionKey: params.sessionKey, - deliveryContext: params.deliveryContext, - note: params.note, - restartDelayMs: params.restartDelayMs, - }); -} - -async function createSession(env: QaSuiteEnvironment, label: string, key?: string) { - const created = (await env.gateway.call( - "sessions.create", - { - label, - ...(key ? { key } : {}), - }, - { - timeoutMs: liveTurnTimeoutMs(env, 60_000), - }, - )) as { key?: string }; - const sessionKey = created.key?.trim(); - if (!sessionKey) { - throw new Error("sessions.create returned no key"); - } - return sessionKey; -} - -async function readEffectiveTools(env: QaSuiteEnvironment, sessionKey: string) { - const payload = (await env.gateway.call( - "tools.effective", - { - sessionKey, - }, - { - timeoutMs: liveTurnTimeoutMs(env, 90_000), - }, - )) as { - groups?: Array<{ tools?: Array<{ id?: string }> }>; - }; - const ids = new Set(); - for (const group of payload.groups ?? []) { - for (const tool of group.tools ?? []) { - if (tool.id?.trim()) { - ids.add(tool.id.trim()); - } - } - } - return ids; -} - -async function readSkillStatus(env: QaSuiteEnvironment, agentId = "qa") { - const payload = (await env.gateway.call( - "skills.status", - { - agentId, - }, - { - timeoutMs: liveTurnTimeoutMs(env, 45_000), - }, - )) as { - skills?: QaSkillStatusEntry[]; - }; - return payload.skills ?? []; -} - -async function readRawQaSessionStore(env: QaSuiteEnvironment) { - const storePath = path.join( - env.gateway.tempRoot, - "state", - "agents", - "qa", - "sessions", - "sessions.json", - ); - try { - const raw = await fs.readFile(storePath, "utf8"); - return JSON.parse(raw) as Record; - } catch (error) { - if ((error as NodeJS.ErrnoException).code === "ENOENT") { - return {}; - } - throw error; - } -} - -async function runQaCli( - env: QaSuiteEnvironment, - args: string[], - opts?: { timeoutMs?: number; json?: boolean }, -) { - const stdout: Buffer[] = []; - const stderr: Buffer[] = []; - const distEntryPath = path.join(env.repoRoot, "dist", "index.js"); - await new Promise((resolve, reject) => { - const child = spawn(process.execPath, [distEntryPath, ...args], { - cwd: env.gateway.tempRoot, - env: env.gateway.runtimeEnv, - stdio: ["ignore", "pipe", "pipe"], - }); - const timeout = setTimeout(() => { - child.kill("SIGKILL"); - reject(new Error(`qa cli timed out: openclaw ${args.join(" ")}`)); - }, opts?.timeoutMs ?? 60_000); - child.stdout.on("data", (chunk) => stdout.push(Buffer.from(chunk))); - child.stderr.on("data", (chunk) => stderr.push(Buffer.from(chunk))); - child.once("error", (error) => { - clearTimeout(timeout); - reject(error); - }); - child.once("exit", (code) => { - clearTimeout(timeout); - if (code === 0) { - resolve(); - return; - } - reject( - new Error( - `qa cli failed (${code ?? "unknown"}): ${Buffer.concat(stderr).toString("utf8").trim()}`, - ), - ); - }); - }); - const text = Buffer.concat(stdout).toString("utf8").trim(); - if (!opts?.json) { - return text; - } - return text ? (JSON.parse(text) as unknown) : {}; -} - -function extractMediaPathFromText(text: string | undefined): string | undefined { - return /MEDIA:([^\n]+)/.exec(text ?? "")?.[1]?.trim(); -} - -async function resolveGeneratedImagePath(params: { - env: QaSuiteEnvironment; - promptSnippet: string; - startedAtMs: number; - timeoutMs: number; -}) { - return await waitForCondition( - async () => { - if (params.env.mock) { - const requests = await fetchJson>( - `${params.env.mock.baseUrl}/debug/requests`, - ); - for (let index = requests.length - 1; index >= 0; index -= 1) { - const request = requests[index]; - if (!(request.allInputText ?? "").includes(params.promptSnippet)) { - continue; - } - const mediaPath = extractMediaPathFromText(request.toolOutput); - if (mediaPath) { - return mediaPath; - } - } - } - - const mediaDir = path.join( - params.env.gateway.tempRoot, - "state", - "media", - "tool-image-generation", - ); - const entries = await fs.readdir(mediaDir).catch(() => []); - const candidates = await Promise.all( - entries.map(async (entry) => { - const fullPath = path.join(mediaDir, entry); - const stat = await fs.stat(fullPath).catch(() => null); - if (!stat?.isFile()) { - return null; - } - return { - fullPath, - mtimeMs: stat.mtimeMs, - }; - }), - ); - return candidates - .filter((entry): entry is NonNullable => Boolean(entry)) - .filter((entry) => entry.mtimeMs >= params.startedAtMs - 1_000) - .toSorted((left, right) => right.mtimeMs - left.mtimeMs) - .at(0)?.fullPath; - }, - params.timeoutMs, - 250, - ); -} - -async function startAgentRun( - env: QaSuiteEnvironment, - params: { - sessionKey: string; - message: string; - to?: string; - threadId?: string; - provider?: string; - model?: string; - timeoutMs?: number; - attachments?: Array<{ - mimeType: string; - fileName: string; - content: string; - }>; - }, -) { - const target = params.to ?? "dm:qa-operator"; - const delivery = env.transport.buildAgentDelivery({ target }); - const started = (await env.gateway.call( - "agent", - { - idempotencyKey: randomUUID(), - agentId: "qa", - sessionKey: params.sessionKey, - message: params.message, - deliver: true, - channel: delivery.channel, - to: target, - replyChannel: delivery.replyChannel, - replyTo: delivery.replyTo, - ...(params.threadId ? { threadId: params.threadId } : {}), - ...(params.provider ? { provider: params.provider } : {}), - ...(params.model ? { model: params.model } : {}), - ...(params.attachments ? { attachments: params.attachments } : {}), - }, - { - timeoutMs: params.timeoutMs ?? 30_000, - }, - )) as { runId?: string; status?: string }; - if (!started.runId) { - throw new Error(`agent call did not return a runId: ${JSON.stringify(started)}`); - } - return started; -} - -async function waitForAgentRun(env: QaSuiteEnvironment, runId: string, timeoutMs = 30_000) { - return (await env.gateway.call( - "agent.wait", - { - runId, - timeoutMs, - }, - { - timeoutMs: timeoutMs + 5_000, - }, - )) as { status?: string; error?: string }; -} - -async function listCronJobs(env: QaSuiteEnvironment) { - const payload = (await env.gateway.call( - "cron.list", - { - includeDisabled: true, - limit: 200, - sortBy: "name", - sortDir: "asc", - }, - { timeoutMs: 30_000 }, - )) as { - jobs?: Array<{ - id?: string; - name?: string; - payload?: { kind?: string; text?: string }; - state?: { nextRunAtMs?: number }; - }>; - }; - return payload.jobs ?? []; -} - -async function readDoctorMemoryStatus(env: QaSuiteEnvironment) { - return (await env.gateway.call("doctor.memory.status", {}, { timeoutMs: 30_000 })) as { - dreaming?: QaDreamingStatus; - }; -} - -async function forceMemoryIndex(params: { - env: QaSuiteEnvironment; - query: string; - expectedNeedle: string; -}) { - await waitForGatewayHealthy(params.env, 60_000); - await waitForTransportReady(params.env, 60_000); - await runQaCli(params.env, ["memory", "index", "--agent", "qa", "--force"], { - timeoutMs: liveTurnTimeoutMs(params.env, 60_000), - }); - const payload = await waitForCondition( - async () => { - const result = (await runQaCli( - params.env, - ["memory", "search", "--agent", "qa", "--json", "--query", params.query], - { - timeoutMs: liveTurnTimeoutMs(params.env, 60_000), - json: true, - }, - )) as { results?: Array<{ snippet?: string; text?: string; path?: string }> }; - const haystack = JSON.stringify(result.results ?? []); - return haystack.includes(params.expectedNeedle) ? result : undefined; - }, - liveTurnTimeoutMs(params.env, 20_000), - 500, - ); - const haystack = JSON.stringify(payload.results ?? []); - if (!haystack.includes(params.expectedNeedle)) { - throw new Error(`memory index missing expected fact after reindex: ${haystack}`); - } -} - -function findSkill(skills: QaSkillStatusEntry[], name: string) { - return skills.find((skill) => skill.name === name); -} - -async function writeWorkspaceSkill(params: { - env: QaSuiteEnvironment; - name: string; - body: string; -}) { - const skillDir = path.join(params.env.gateway.workspaceDir, "skills", params.name); - await fs.mkdir(skillDir, { recursive: true }); - const skillPath = path.join(skillDir, "SKILL.md"); - await fs.writeFile(skillPath, `${params.body.trim()}\n`, "utf8"); - return skillPath; -} - -async function callPluginToolsMcp(params: { - env: QaSuiteEnvironment; - toolName: string; - args: Record; -}) { - const transportEnv = Object.fromEntries( - Object.entries(params.env.gateway.runtimeEnv).filter( - (entry): entry is [string, string] => typeof entry[1] === "string", - ), - ); - const transport = new StdioClientTransport({ - command: process.execPath, - args: ["--import", "tsx", "src/mcp/plugin-tools-serve.ts"], - stderr: "pipe", - env: transportEnv, - }); - const client = new Client({ name: "openclaw-qa-suite", version: "0.0.0" }, {}); - try { - await client.connect(transport); - const listed = await client.listTools(); - const tool = listed.tools.find((entry) => entry.name === params.toolName); - if (!tool) { - throw new Error(`MCP tool missing: ${params.toolName}`); - } - return await client.callTool({ - name: params.toolName, - arguments: params.args, - }); - } finally { - await client.close().catch(() => {}); - } -} - -async function runAgentPrompt( - env: QaSuiteEnvironment, - params: { - sessionKey: string; - message: string; - to?: string; - threadId?: string; - provider?: string; - model?: string; - timeoutMs?: number; - attachments?: Array<{ - mimeType: string; - fileName: string; - content: string; - }>; - }, -) { - const started = await startAgentRun(env, params); - const waited = await waitForAgentRun(env, started.runId!, params.timeoutMs ?? 30_000); - if (waited.status !== "ok") { - throw new Error( - `agent.wait returned ${waited.status ?? "unknown"}: ${waited.error ?? "no error"}`, - ); - } - return { - started, - waited, - }; -} - -async function ensureImageGenerationConfigured(env: QaSuiteEnvironment) { - const imageModelRef = "openai/gpt-image-1"; - await patchConfig({ - env, - patch: - env.providerMode === "mock-openai" - ? { - plugins: { - allow: [...new Set(["memory-core", "openai", ...env.transport.requiredPluginIds])], - entries: { - openai: { - enabled: true, - }, - }, - }, - models: { - providers: { - openai: { - baseUrl: `${env.mock?.baseUrl}/v1`, - apiKey: "test", - api: "openai-responses", - models: [ - { - id: "gpt-image-1", - name: "gpt-image-1", - api: "openai-responses", - reasoning: false, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128_000, - maxTokens: 4096, - }, - ], - }, - }, - }, - agents: { - defaults: { - imageGenerationModel: { - primary: imageModelRef, - }, - }, - }, - } - : { - agents: { - defaults: { - imageGenerationModel: { - primary: imageModelRef, - }, - }, - }, - }, - }); - await waitForGatewayHealthy(env); - await waitForTransportReady(env, 60_000); -} - -async function handleQaAction(params: { - env: QaSuiteEnvironment; - action: QaTransportActionName; - args: Record; -}) { - const result = await params.env.transport.handleAction({ - action: params.action, - args: params.args, - cfg: params.env.cfg, - }); - return extractQaToolPayload(result as Parameters[0]); -} - function createScenarioFlowApi( env: QaSuiteEnvironment, scenario: ReturnType["scenarios"][number], ) { - return createQaScenarioRuntimeApi({ + return createQaSuiteScenarioFlowApi({ env, scenario, - deps: { - fs, - path, - sleep, - randomUUID, - runScenario, - waitForOutboundMessage, - waitForTransportOutboundMessage, - waitForChannelOutboundMessage, - waitForNoOutbound, - waitForNoTransportOutbound, - recentOutboundSummary, - formatConversationTranscript, - readTransportTranscript, - formatTransportTranscript, - fetchJson, - waitForGatewayHealthy, - waitForTransportReady, - waitForQaChannelReady, - browserRequest: callQaBrowserRequest, - waitForBrowserReady: waitForQaBrowserReady, - browserOpenTab: qaBrowserOpenTab, - browserSnapshot: qaBrowserSnapshot, - browserAct: qaBrowserAct, - webOpenPage: async (params: Parameters[0]) => { - const opened = await qaWebOpenPage(params); - env.webSessionIds.add(opened.pageId); - return opened; - }, - webWait: qaWebWait, - webType: qaWebType, - webSnapshot: qaWebSnapshot, - webEvaluate: qaWebEvaluate, - waitForConfigRestartSettle, - patchConfig, - applyConfig, - readConfigSnapshot, - createSession, - readEffectiveTools, - readSkillStatus, - readRawQaSessionStore, - runQaCli, - extractMediaPathFromText, - resolveGeneratedImagePath, - startAgentRun, - waitForAgentRun, - listCronJobs, - waitForCronRunCompletion, - readDoctorMemoryStatus, - forceMemoryIndex, - findSkill, - writeWorkspaceSkill, - callPluginToolsMcp, - runAgentPrompt, - ensureImageGenerationConfigured, - handleQaAction, - extractQaToolPayload, - formatMemoryDreamingDay, - resolveSessionTranscriptsDirForAgent, - buildAgentSessionKey, - normalizeLowercaseStringOrEmpty, - formatErrorMessage, - liveTurnTimeoutMs, - resolveQaLiveTurnTimeoutMs, - splitModelRef, - qaChannelPlugin, - hasDiscoveryLabels, - reportsDiscoveryScopeLeak, - reportsMissingDiscoveryFiles, - hasModelSwitchContinuityEvidence, - }, + runScenario, + splitModelRef, + formatErrorMessage, + liveTurnTimeoutMs, + resolveQaLiveTurnTimeoutMs, constants: { imageUnderstandingPngBase64: _QA_IMAGE_UNDERSTANDING_PNG_BASE64, imageUnderstandingLargePngBase64: _QA_IMAGE_UNDERSTANDING_LARGE_PNG_BASE64, @@ -1330,27 +178,6 @@ function createScenarioFlowApi( }); } -export const qaSuiteTesting = { - collectQaSuiteGatewayConfigPatch, - collectQaSuiteGatewayRuntimeOptions, - collectQaSuitePluginIds, - createScenarioWaitForCondition, - findFailureOutboundMessage, - getGatewayRetryAfterMs, - isConfigHashConflict, - mapQaSuiteWithConcurrency, - normalizeQaSuiteConcurrency, - scenarioMatchesLiveLane, - scenarioRequiresControlUi, - selectQaSuiteScenarios, - readTransportTranscript, - formatTransportTranscript, - resolveQaSuiteOutputDir, - waitForTransportOutboundMessage, - waitForNoTransportOutbound, - waitForOutboundMessage, -}; - async function runScenarioDefinition( env: QaSuiteEnvironment, scenario: ReturnType["scenarios"][number], @@ -1527,6 +354,7 @@ export async function runQaSuite(params?: QaSuiteRunParams): Promise 1 && selectedCatalogScenarios.length > 1) { diff --git a/extensions/qa-matrix/src/runners/contract/scenario-catalog.ts b/extensions/qa-matrix/src/runners/contract/scenario-catalog.ts index 97b26e4ac9c..202e96d24f1 100644 --- a/extensions/qa-matrix/src/runners/contract/scenario-catalog.ts +++ b/extensions/qa-matrix/src/runners/contract/scenario-catalog.ts @@ -21,6 +21,8 @@ export type MatrixQaScenarioId = | "matrix-room-thread-reply-override" | "matrix-room-quiet-streaming-preview" | "matrix-room-block-streaming" + | "matrix-room-image-understanding-attachment" + | "matrix-room-generated-image-delivery" | "matrix-dm-reply-shape" | "matrix-dm-shared-session-notice" | "matrix-dm-thread-reply-override" @@ -47,6 +49,7 @@ export const MATRIX_QA_BLOCK_ROOM_KEY = "block"; export const MATRIX_QA_DRIVER_DM_ROOM_KEY = "driver-dm"; export const MATRIX_QA_DRIVER_DM_SHARED_ROOM_KEY = "driver-dm-shared"; export const MATRIX_QA_HOMESERVER_ROOM_KEY = "homeserver"; +export const MATRIX_QA_MEDIA_ROOM_KEY = "media"; export const MATRIX_QA_MEMBERSHIP_ROOM_KEY = "membership"; export const MATRIX_QA_RESTART_ROOM_KEY = "restart"; export const MATRIX_QA_SECONDARY_ROOM_KEY = "secondary"; @@ -123,6 +126,12 @@ const MATRIX_QA_MEMBERSHIP_ROOM_TOPOLOGY = buildMatrixQaSingleGroupTopology({ requireMention: true, }); +const MATRIX_QA_MEDIA_ROOM_TOPOLOGY = buildMatrixQaSingleGroupTopology({ + key: MATRIX_QA_MEDIA_ROOM_KEY, + name: "Matrix QA Media Room", + requireMention: true, +}); + const MATRIX_QA_RESTART_ROOM_TOPOLOGY = buildMatrixQaSingleGroupTopology({ key: MATRIX_QA_RESTART_ROOM_KEY, name: "Matrix QA Restart Room", @@ -202,6 +211,18 @@ export const MATRIX_QA_SCENARIOS: MatrixQaScenarioDefinition[] = [ streaming: "quiet", }, }, + { + id: "matrix-room-image-understanding-attachment", + timeoutMs: 60_000, + title: "Matrix image attachments reach the model vision path", + topology: MATRIX_QA_MEDIA_ROOM_TOPOLOGY, + }, + { + id: "matrix-room-generated-image-delivery", + timeoutMs: 60_000, + title: "Matrix generated images deliver as real image attachments", + topology: MATRIX_QA_MEDIA_ROOM_TOPOLOGY, + }, { id: "matrix-dm-reply-shape", timeoutMs: 45_000, diff --git a/extensions/qa-matrix/src/runners/contract/scenario-runtime-room.ts b/extensions/qa-matrix/src/runners/contract/scenario-runtime-room.ts index a2fb894ff86..ca71b82eb7a 100644 --- a/extensions/qa-matrix/src/runners/contract/scenario-runtime-room.ts +++ b/extensions/qa-matrix/src/runners/contract/scenario-runtime-room.ts @@ -1,8 +1,10 @@ import { randomUUID } from "node:crypto"; +import { encodePngRgba, fillPixel } from "openclaw/plugin-sdk/media-runtime"; import type { MatrixQaObservedEvent } from "../../substrate/events.js"; import { MATRIX_QA_BLOCK_ROOM_KEY, MATRIX_QA_HOMESERVER_ROOM_KEY, + MATRIX_QA_MEDIA_ROOM_KEY, MATRIX_QA_MEMBERSHIP_ROOM_KEY, MATRIX_QA_RESTART_ROOM_KEY, resolveMatrixQaScenarioRoomId, @@ -32,6 +34,62 @@ import { import type { MatrixQaCanaryArtifact, MatrixQaScenarioExecution } from "./scenario-types.js"; type MatrixQaThreadScenarioResult = Awaited>; +const MATRIX_QA_IMAGE_ATTACHMENT_FILENAME = "red-top-blue-bottom.png"; +const MATRIX_QA_IMAGE_COLOR_GROUPS = [["red"], ["blue"]] as const; + +function createMatrixQaSplitColorImagePng() { + const width = 16; + const height = 16; + const rgba = Buffer.alloc(width * height * 4); + for (let y = 0; y < height; y += 1) { + const isTopHalf = y < height / 2; + for (let x = 0; x < width; x += 1) { + if (isTopHalf) { + fillPixel(rgba, x, y, width, 255, 0, 0); + continue; + } + fillPixel(rgba, x, y, width, 0, 0, 255); + } + } + return encodePngRgba(rgba, width, height); +} + +function buildMatrixQaImageUnderstandingPrompt(sutUserId: string) { + return `${sutUserId} Image understanding check: describe the top and bottom colors in the attached image in one short sentence.`; +} + +function buildMatrixQaImageGenerationPrompt(sutUserId: string) { + return `${sutUserId} Image generation check: generate a QA lighthouse image and summarize it in one short sentence.`; +} + +function hasMatrixQaExpectedColorReply(body: string | undefined) { + const normalizedBody = body?.toLowerCase() ?? ""; + return MATRIX_QA_IMAGE_COLOR_GROUPS.every((group) => + group.some((color) => normalizedBody.includes(color)), + ); +} + +function requireMatrixQaImageAttachment(event: MatrixQaObservedEvent, scenarioLabel: string) { + if (event.msgtype !== "m.image" || event.attachment?.kind !== "image") { + throw new Error( + `${scenarioLabel} expected an m.image attachment but saw ${event.msgtype ?? ""}`, + ); + } + return event.attachment; +} + +function buildMatrixQaAttachmentDetailLines(params: { + attachmentEvent: MatrixQaObservedEvent; + label: string; +}) { + return [ + `${params.label} event: ${params.attachmentEvent.eventId}`, + `${params.label} msgtype: ${params.attachmentEvent.msgtype ?? ""}`, + `${params.label} attachment kind: ${params.attachmentEvent.attachment?.kind ?? ""}`, + `${params.label} attachment filename: ${params.attachmentEvent.attachment?.filename ?? ""}`, + `${params.label} body preview: ${params.attachmentEvent.body?.slice(0, 200) ?? ""}`, + ]; +} function assertMatrixQaInReplyTarget(params: { actualEventId?: string; @@ -545,6 +603,110 @@ export async function runBlockStreamingScenario(context: MatrixQaScenarioContext } satisfies MatrixQaScenarioExecution; } +export async function runImageUnderstandingAttachmentScenario(context: MatrixQaScenarioContext) { + const roomId = resolveMatrixQaScenarioRoomId(context, MATRIX_QA_MEDIA_ROOM_KEY); + const { client, startSince } = await primeMatrixQaDriverScenarioClient(context); + const triggerBody = buildMatrixQaImageUnderstandingPrompt(context.sutUserId); + const driverEventId = await client.sendMediaMessage({ + body: triggerBody, + buffer: createMatrixQaSplitColorImagePng(), + contentType: "image/png", + fileName: MATRIX_QA_IMAGE_ATTACHMENT_FILENAME, + kind: "image", + mentionUserIds: [context.sutUserId], + roomId, + }); + const matched = await client.waitForRoomEvent({ + observedEvents: context.observedEvents, + predicate: (event) => + event.roomId === roomId && + event.sender === context.sutUserId && + event.type === "m.room.message" && + event.relatesTo === undefined && + isMatrixQaMessageLikeKind(event.kind) && + hasMatrixQaExpectedColorReply(event.body), + roomId, + since: startSince, + timeoutMs: context.timeoutMs, + }); + advanceMatrixQaActorCursor({ + actorId: "driver", + syncState: context.syncState, + nextSince: matched.since, + startSince, + }); + const reply = buildMatrixReplyArtifact(matched.event); + return { + artifacts: { + attachmentFilename: MATRIX_QA_IMAGE_ATTACHMENT_FILENAME, + driverEventId, + reply, + roomId, + triggerBody, + }, + details: [ + `room id: ${roomId}`, + `driver attachment event: ${driverEventId}`, + `sent attachment filename: ${MATRIX_QA_IMAGE_ATTACHMENT_FILENAME}`, + ...buildMatrixReplyDetails("reply", reply), + ].join("\n"), + } satisfies MatrixQaScenarioExecution; +} + +export async function runGeneratedImageDeliveryScenario(context: MatrixQaScenarioContext) { + const roomId = resolveMatrixQaScenarioRoomId(context, MATRIX_QA_MEDIA_ROOM_KEY); + const { client, startSince } = await primeMatrixQaDriverScenarioClient(context); + const triggerBody = buildMatrixQaImageGenerationPrompt(context.sutUserId); + const driverEventId = await client.sendTextMessage({ + body: triggerBody, + mentionUserIds: [context.sutUserId], + roomId, + }); + const matched = await client.waitForRoomEvent({ + observedEvents: context.observedEvents, + predicate: (event) => + event.roomId === roomId && + event.sender === context.sutUserId && + event.type === "m.room.message" && + event.relatesTo === undefined && + event.msgtype === "m.image" && + event.attachment?.kind === "image", + roomId, + since: startSince, + timeoutMs: context.timeoutMs, + }); + advanceMatrixQaActorCursor({ + actorId: "driver", + syncState: context.syncState, + nextSince: matched.since, + startSince, + }); + const attachment = requireMatrixQaImageAttachment( + matched.event, + "Matrix generated image delivery scenario", + ); + return { + artifacts: { + attachmentBodyPreview: matched.event.body?.slice(0, 200), + attachmentEventId: matched.event.eventId, + attachmentFilename: attachment.filename, + attachmentKind: attachment.kind, + attachmentMsgtype: matched.event.msgtype, + driverEventId, + roomId, + triggerBody, + }, + details: [ + `room id: ${roomId}`, + `driver event: ${driverEventId}`, + ...buildMatrixQaAttachmentDetailLines({ + attachmentEvent: matched.event, + label: "generated image", + }), + ].join("\n"), + } satisfies MatrixQaScenarioExecution; +} + export async function runRoomAutoJoinInviteScenario(context: MatrixQaScenarioContext) { const { client, startSince } = await primeMatrixQaDriverScenarioClient(context); const dynamicRoomId = await client.createPrivateRoom({ diff --git a/extensions/qa-matrix/src/runners/contract/scenario-runtime.ts b/extensions/qa-matrix/src/runners/contract/scenario-runtime.ts index 559460a4229..a420f775ce5 100644 --- a/extensions/qa-matrix/src/runners/contract/scenario-runtime.ts +++ b/extensions/qa-matrix/src/runners/contract/scenario-runtime.ts @@ -11,7 +11,9 @@ import { } from "./scenario-runtime-dm.js"; import { runBlockStreamingScenario, + runGeneratedImageDeliveryScenario, runHomeserverRestartResumeScenario, + runImageUnderstandingAttachmentScenario, runMatrixQaCanary, runMembershipLossScenario, runObserverAllowlistOverrideScenario, @@ -119,6 +121,10 @@ export async function runMatrixQaScenario( return await runQuietStreamingPreviewScenario(context); case "matrix-room-block-streaming": return await runBlockStreamingScenario(context); + case "matrix-room-image-understanding-attachment": + return await runImageUnderstandingAttachmentScenario(context); + case "matrix-room-generated-image-delivery": + return await runGeneratedImageDeliveryScenario(context); case "matrix-dm-reply-shape": return await runDriverTopologyScopedScenario({ context, diff --git a/extensions/qa-matrix/src/runners/contract/scenario-types.ts b/extensions/qa-matrix/src/runners/contract/scenario-types.ts index 81454ab95d6..a03b30a41f0 100644 --- a/extensions/qa-matrix/src/runners/contract/scenario-types.ts +++ b/extensions/qa-matrix/src/runners/contract/scenario-types.ts @@ -16,6 +16,11 @@ export type MatrixQaCanaryArtifact = { }; export type MatrixQaScenarioArtifacts = { + attachmentBodyPreview?: string; + attachmentEventId?: string; + attachmentFilename?: string; + attachmentKind?: string; + attachmentMsgtype?: string; actorUserId?: string; driverEventId?: string; expectedNoReplyWindowMs?: number; diff --git a/extensions/qa-matrix/src/runners/contract/scenarios.test.ts b/extensions/qa-matrix/src/runners/contract/scenarios.test.ts index f711b5ad61e..9ab774a0648 100644 --- a/extensions/qa-matrix/src/runners/contract/scenarios.test.ts +++ b/extensions/qa-matrix/src/runners/contract/scenarios.test.ts @@ -32,6 +32,8 @@ describe("matrix live qa scenarios", () => { "matrix-room-thread-reply-override", "matrix-room-quiet-streaming-preview", "matrix-room-block-streaming", + "matrix-room-image-understanding-attachment", + "matrix-room-generated-image-delivery", "matrix-dm-reply-shape", "matrix-dm-shared-session-notice", "matrix-dm-thread-reply-override", @@ -748,6 +750,171 @@ describe("matrix live qa scenarios", () => { ); }); + it("sends a real Matrix image attachment for image-understanding prompts", async () => { + const primeRoom = vi.fn().mockResolvedValue("driver-sync-start"); + const sendMediaMessage = vi.fn().mockResolvedValue("$image-understanding-trigger"); + const waitForRoomEvent = vi.fn().mockResolvedValue({ + event: { + kind: "message", + roomId: "!media:matrix-qa.test", + eventId: "$sut-image-reply", + sender: "@sut:matrix-qa.test", + type: "m.room.message", + body: "Protocol note: the attached image is split horizontally, with red on top and blue on the bottom.", + }, + since: "driver-sync-next", + }); + + createMatrixQaClient.mockReturnValue({ + primeRoom, + sendMediaMessage, + waitForRoomEvent, + }); + + const scenario = MATRIX_QA_SCENARIOS.find( + (entry) => entry.id === "matrix-room-image-understanding-attachment", + ); + expect(scenario).toBeDefined(); + + await expect( + runMatrixQaScenario(scenario!, { + baseUrl: "http://127.0.0.1:28008/", + canary: undefined, + driverAccessToken: "driver-token", + driverUserId: "@driver:matrix-qa.test", + observedEvents: [], + observerAccessToken: "observer-token", + observerUserId: "@observer:matrix-qa.test", + roomId: "!main:matrix-qa.test", + restartGateway: undefined, + syncState: {}, + sutAccessToken: "sut-token", + sutUserId: "@sut:matrix-qa.test", + timeoutMs: 8_000, + topology: { + defaultRoomId: "!main:matrix-qa.test", + defaultRoomKey: "main", + rooms: [ + { + key: scenarioTesting.MATRIX_QA_MEDIA_ROOM_KEY, + kind: "group", + memberRoles: ["driver", "observer", "sut"], + memberUserIds: [ + "@driver:matrix-qa.test", + "@observer:matrix-qa.test", + "@sut:matrix-qa.test", + ], + name: "Media", + requireMention: true, + roomId: "!media:matrix-qa.test", + }, + ], + }, + }), + ).resolves.toMatchObject({ + artifacts: { + attachmentFilename: "red-top-blue-bottom.png", + driverEventId: "$image-understanding-trigger", + reply: { + eventId: "$sut-image-reply", + }, + }, + }); + + expect(sendMediaMessage).toHaveBeenCalledWith( + expect.objectContaining({ + contentType: "image/png", + fileName: "red-top-blue-bottom.png", + kind: "image", + mentionUserIds: ["@sut:matrix-qa.test"], + roomId: "!media:matrix-qa.test", + }), + ); + }); + + it("waits for a real Matrix image attachment after image generation", async () => { + const primeRoom = vi.fn().mockResolvedValue("driver-sync-start"); + const sendTextMessage = vi.fn().mockResolvedValue("$image-generate-trigger"); + const waitForRoomEvent = vi.fn().mockResolvedValue({ + event: { + kind: "message", + roomId: "!media:matrix-qa.test", + eventId: "$sut-image", + sender: "@sut:matrix-qa.test", + type: "m.room.message", + body: "Protocol note: generated the QA lighthouse image successfully.", + msgtype: "m.image", + attachment: { + kind: "image", + filename: "qa-lighthouse.png", + }, + }, + since: "driver-sync-next", + }); + + createMatrixQaClient.mockReturnValue({ + primeRoom, + sendTextMessage, + waitForRoomEvent, + }); + + const scenario = MATRIX_QA_SCENARIOS.find( + (entry) => entry.id === "matrix-room-generated-image-delivery", + ); + expect(scenario).toBeDefined(); + + await expect( + runMatrixQaScenario(scenario!, { + baseUrl: "http://127.0.0.1:28008/", + canary: undefined, + driverAccessToken: "driver-token", + driverUserId: "@driver:matrix-qa.test", + observedEvents: [], + observerAccessToken: "observer-token", + observerUserId: "@observer:matrix-qa.test", + roomId: "!main:matrix-qa.test", + restartGateway: undefined, + syncState: {}, + sutAccessToken: "sut-token", + sutUserId: "@sut:matrix-qa.test", + timeoutMs: 8_000, + topology: { + defaultRoomId: "!main:matrix-qa.test", + defaultRoomKey: "main", + rooms: [ + { + key: scenarioTesting.MATRIX_QA_MEDIA_ROOM_KEY, + kind: "group", + memberRoles: ["driver", "observer", "sut"], + memberUserIds: [ + "@driver:matrix-qa.test", + "@observer:matrix-qa.test", + "@sut:matrix-qa.test", + ], + name: "Media", + requireMention: true, + roomId: "!media:matrix-qa.test", + }, + ], + }, + }), + ).resolves.toMatchObject({ + artifacts: { + attachmentEventId: "$sut-image", + attachmentFilename: "qa-lighthouse.png", + attachmentKind: "image", + attachmentMsgtype: "m.image", + driverEventId: "$image-generate-trigger", + }, + }); + + expect(sendTextMessage).toHaveBeenCalledWith({ + body: expect.stringContaining("Image generation check: generate a QA lighthouse image"), + mentionUserIds: ["@sut:matrix-qa.test"], + roomId: "!media:matrix-qa.test", + }); + }); + it("uses DM thread override scenarios against the provisioned DM room", async () => { const primeRoom = vi.fn().mockResolvedValue("driver-sync-start"); const sendTextMessage = vi.fn().mockResolvedValue("$dm-thread-trigger"); diff --git a/extensions/qa-matrix/src/runners/contract/scenarios.ts b/extensions/qa-matrix/src/runners/contract/scenarios.ts index 6431897c8ba..6a07c1a467e 100644 --- a/extensions/qa-matrix/src/runners/contract/scenarios.ts +++ b/extensions/qa-matrix/src/runners/contract/scenarios.ts @@ -1,6 +1,7 @@ import { MATRIX_QA_DRIVER_DM_ROOM_KEY, MATRIX_QA_DRIVER_DM_SHARED_ROOM_KEY, + MATRIX_QA_MEDIA_ROOM_KEY, MATRIX_QA_MEMBERSHIP_ROOM_KEY, MATRIX_QA_SCENARIOS, MATRIX_QA_SECONDARY_ROOM_KEY, @@ -54,6 +55,7 @@ export type { MatrixQaScenarioContext, MatrixQaSyncState }; export const __testing = { MATRIX_QA_DRIVER_DM_ROOM_KEY, MATRIX_QA_DRIVER_DM_SHARED_ROOM_KEY, + MATRIX_QA_MEDIA_ROOM_KEY, MATRIX_QA_MEMBERSHIP_ROOM_KEY, MATRIX_QA_SECONDARY_ROOM_KEY, MATRIX_QA_STANDARD_SCENARIO_IDS, diff --git a/extensions/qa-matrix/src/substrate/artifacts.test.ts b/extensions/qa-matrix/src/substrate/artifacts.test.ts index f95f825a3c6..951d5676e6c 100644 --- a/extensions/qa-matrix/src/substrate/artifacts.test.ts +++ b/extensions/qa-matrix/src/substrate/artifacts.test.ts @@ -15,8 +15,13 @@ describe("matrix observed event artifacts", () => { type: "m.room.message", body: "secret", formattedBody: "

secret

", - msgtype: "m.text", + msgtype: "m.image", originServerTs: 1_700_000_000_000, + attachment: { + kind: "image", + caption: "secret", + filename: "qa-lighthouse.png", + }, relatesTo: { relType: "m.thread", eventId: "$root", @@ -33,8 +38,12 @@ describe("matrix observed event artifacts", () => { eventId: "$event", sender: "@sut:matrix-qa.test", type: "m.room.message", - msgtype: "m.text", + msgtype: "m.image", originServerTs: 1_700_000_000_000, + attachment: { + kind: "image", + filename: "qa-lighthouse.png", + }, relatesTo: { relType: "m.thread", eventId: "$root", diff --git a/extensions/qa-matrix/src/substrate/artifacts.ts b/extensions/qa-matrix/src/substrate/artifacts.ts index a8da02d598d..250fbb83527 100644 --- a/extensions/qa-matrix/src/substrate/artifacts.ts +++ b/extensions/qa-matrix/src/substrate/artifacts.ts @@ -20,6 +20,12 @@ export function buildMatrixQaObservedEventsArtifact(params: { relatesTo: event.relatesTo, mentions: event.mentions, reaction: event.reaction, + attachment: event.attachment + ? { + kind: event.attachment.kind, + ...(event.attachment.filename ? { filename: event.attachment.filename } : {}), + } + : undefined, }, ); } diff --git a/extensions/qa-matrix/src/substrate/client.test.ts b/extensions/qa-matrix/src/substrate/client.test.ts index 0a9aedba443..55d49de2eab 100644 --- a/extensions/qa-matrix/src/substrate/client.test.ts +++ b/extensions/qa-matrix/src/substrate/client.test.ts @@ -189,6 +189,79 @@ describe("matrix driver client", () => { ).resolves.toBe("$reaction-1"); }); + it("uploads Matrix media before sending the room event", async () => { + const requests: Array<{ + body: RequestInit["body"]; + headers: HeadersInit | undefined; + url: string; + }> = []; + const fetchImpl: typeof fetch = async (input, init) => { + requests.push({ + body: init?.body, + headers: init?.headers, + url: resolveRequestUrl(input), + }); + if (requests.length === 1) { + return new Response( + JSON.stringify({ content_uri: "mxc://matrix-qa.test/red-top-blue-bottom" }), + { + status: 200, + headers: { "content-type": "application/json" }, + }, + ); + } + return new Response(JSON.stringify({ event_id: "$media-1" }), { + status: 200, + headers: { "content-type": "application/json" }, + }); + }; + + const client = createMatrixQaClient({ + accessToken: "token", + baseUrl: "http://127.0.0.1:28008/", + fetchImpl, + }); + + await expect( + client.sendMediaMessage({ + body: "@sut:matrix-qa.test Image understanding check", + buffer: Buffer.from("png-bytes"), + contentType: "image/png", + fileName: "red-top-blue-bottom.png", + kind: "image", + mentionUserIds: ["@sut:matrix-qa.test"], + roomId: "!room:matrix-qa.test", + }), + ).resolves.toBe("$media-1"); + + expect(requests).toHaveLength(2); + expect(requests[0]?.url).toBe( + "http://127.0.0.1:28008/_matrix/media/v3/upload?filename=red-top-blue-bottom.png", + ); + expect(requests[0]?.body).toBeInstanceOf(Uint8Array); + expect(Array.from(requests[0]?.body as Uint8Array)).toEqual( + Array.from(Buffer.from("png-bytes")), + ); + expect(requests[1]?.url).toContain( + "/_matrix/client/v3/rooms/!room%3Amatrix-qa.test/send/m.room.message/", + ); + expect( + typeof requests[1]?.body === "string" ? JSON.parse(requests[1].body) : requests[1]?.body, + ).toMatchObject({ + body: "@sut:matrix-qa.test Image understanding check", + msgtype: "m.image", + filename: "red-top-blue-bottom.png", + url: "mxc://matrix-qa.test/red-top-blue-bottom", + info: { + mimetype: "image/png", + size: "png-bytes".length, + }, + "m.mentions": { + user_ids: ["@sut:matrix-qa.test"], + }, + }); + }); + it("provisions a three-member room so Matrix QA runs in a group context", async () => { const createRoomBodies: Array> = []; const fetchImpl: typeof fetch = async (input, init) => { diff --git a/extensions/qa-matrix/src/substrate/client.ts b/extensions/qa-matrix/src/substrate/client.ts index 3db48d28e32..92f30aa27d4 100644 --- a/extensions/qa-matrix/src/substrate/client.ts +++ b/extensions/qa-matrix/src/substrate/client.ts @@ -52,6 +52,18 @@ type MatrixQaSendMessageContent = { msgtype: "m.text"; }; +type MatrixQaMediaMessageType = "m.audio" | "m.file" | "m.image" | "m.video"; + +type MatrixQaSendMediaMessageContent = Omit & { + filename?: string; + info?: { + mimetype?: string; + size?: number; + }; + msgtype: MatrixQaMediaMessageType; + url: string; +}; + type MatrixQaSendReactionContent = { "m.relates_to": { event_id: string; @@ -189,6 +201,96 @@ function buildMatrixQaMessageContent(params: { }; } +function resolveMatrixQaMediaMsgtype(params: { + contentType?: string; + kind?: "audio" | "file" | "image" | "video"; +}): MatrixQaMediaMessageType { + if (params.kind === "audio" || params.contentType?.startsWith("audio/")) { + return "m.audio"; + } + if (params.kind === "video" || params.contentType?.startsWith("video/")) { + return "m.video"; + } + if (params.kind === "image" || params.contentType?.startsWith("image/")) { + return "m.image"; + } + return "m.file"; +} + +function buildMatrixQaMediaMessageContent(params: { + body?: string; + contentType?: string; + fileName?: string; + kind?: "audio" | "file" | "image" | "video"; + mentionUserIds?: string[]; + replyToEventId?: string; + size: number; + threadRootEventId?: string; + url: string; +}): MatrixQaSendMediaMessageContent { + const normalizedBody = params.body?.trim() || params.fileName?.trim() || "(file)"; + const content = buildMatrixQaMessageContent({ + body: normalizedBody, + mentionUserIds: params.mentionUserIds, + replyToEventId: params.replyToEventId, + threadRootEventId: params.threadRootEventId, + }); + return { + ...content, + filename: params.fileName?.trim() || undefined, + info: { + ...(params.contentType ? { mimetype: params.contentType } : {}), + size: params.size, + }, + msgtype: resolveMatrixQaMediaMsgtype({ + contentType: params.contentType, + kind: params.kind, + }), + url: params.url, + }; +} + +async function uploadMatrixQaContent(params: { + accessToken?: string; + baseUrl: string; + buffer: Buffer; + contentType?: string; + fetchImpl: MatrixQaFetchLike; + fileName?: string; +}) { + const url = new URL("/_matrix/media/v3/upload", params.baseUrl); + const fileName = params.fileName?.trim(); + if (fileName) { + url.searchParams.set("filename", fileName); + } + const uploadBody: Uint8Array = + params.buffer.buffer instanceof ArrayBuffer + ? new Uint8Array(params.buffer.buffer, params.buffer.byteOffset, params.buffer.byteLength) + : Uint8Array.from(params.buffer); + const response = await params.fetchImpl(url, { + method: "POST", + headers: { + accept: "application/json", + "content-type": params.contentType ?? "application/octet-stream", + ...(params.accessToken ? { authorization: `Bearer ${params.accessToken}` } : {}), + }, + body: uploadBody, + signal: AbortSignal.timeout(20_000), + }); + const body = (await response.json().catch(() => ({}))) as { + content_uri?: string; + error?: string; + }; + if (response.status !== 200) { + throw new Error(body.error ?? `Matrix media upload failed with status ${response.status}`); + } + const contentUri = body.content_uri?.trim(); + if (!contentUri) { + throw new Error("Matrix media upload did not return content_uri."); + } + return contentUri; +} + export function resolveNextRegistrationAuth(params: { registrationToken: string; response: MatrixQaUiaaResponse; @@ -371,6 +473,50 @@ export function createMatrixQaClient(params: { } return eventId; }, + async sendMediaMessage(opts: { + body?: string; + buffer: Buffer; + contentType?: string; + fileName?: string; + kind?: "audio" | "file" | "image" | "video"; + mentionUserIds?: string[]; + replyToEventId?: string; + roomId: string; + threadRootEventId?: string; + }) { + const contentUri = await uploadMatrixQaContent({ + accessToken: params.accessToken, + baseUrl: params.baseUrl, + buffer: opts.buffer, + contentType: opts.contentType, + fetchImpl, + fileName: opts.fileName, + }); + const txnId = randomUUID(); + const result = await requestMatrixJson<{ event_id?: string }>({ + accessToken: params.accessToken, + baseUrl: params.baseUrl, + body: buildMatrixQaMediaMessageContent({ + body: opts.body, + contentType: opts.contentType, + fileName: opts.fileName, + kind: opts.kind, + mentionUserIds: opts.mentionUserIds, + replyToEventId: opts.replyToEventId, + size: opts.buffer.byteLength, + threadRootEventId: opts.threadRootEventId, + url: contentUri, + }), + endpoint: `/_matrix/client/v3/rooms/${encodeURIComponent(opts.roomId)}/send/m.room.message/${encodeURIComponent(txnId)}`, + fetchImpl, + method: "PUT", + }); + const eventId = result.body.event_id?.trim(); + if (!eventId) { + throw new Error("Matrix sendMediaMessage did not return event_id."); + } + return eventId; + }, async sendReaction(opts: { emoji: string; messageId: string; roomId: string }) { const txnId = randomUUID(); const result = await requestMatrixJson<{ event_id?: string }>({ diff --git a/extensions/qa-matrix/src/substrate/events.test.ts b/extensions/qa-matrix/src/substrate/events.test.ts index a8e0cd346ab..fcbb2513f36 100644 --- a/extensions/qa-matrix/src/substrate/events.test.ts +++ b/extensions/qa-matrix/src/substrate/events.test.ts @@ -133,6 +133,53 @@ describe("matrix observed event normalization", () => { }); }); + it("normalizes Matrix image messages with attachment metadata", () => { + expect( + normalizeMatrixQaObservedEvent("!room:matrix-qa.test", { + event_id: "$image", + sender: "@sut:matrix-qa.test", + type: "m.room.message", + content: { + body: "Protocol note: generated the QA lighthouse image successfully.", + filename: "qa-lighthouse.png", + msgtype: "m.image", + }, + }), + ).toEqual( + expect.objectContaining({ + kind: "message", + eventId: "$image", + msgtype: "m.image", + attachment: { + kind: "image", + caption: "Protocol note: generated the QA lighthouse image successfully.", + filename: "qa-lighthouse.png", + }, + }), + ); + }); + + it("treats filename-like Matrix media bodies as attachment filenames", () => { + expect( + normalizeMatrixQaObservedEvent("!room:matrix-qa.test", { + event_id: "$image", + sender: "@sut:matrix-qa.test", + type: "m.room.message", + content: { + body: "qa-lighthouse.png", + msgtype: "m.image", + }, + }), + ).toEqual( + expect.objectContaining({ + attachment: { + kind: "image", + filename: "qa-lighthouse.png", + }, + }), + ); + }); + it("normalizes membership events with explicit membership kind", () => { expect( normalizeMatrixQaObservedEvent("!room:matrix-qa.test", { diff --git a/extensions/qa-matrix/src/substrate/events.ts b/extensions/qa-matrix/src/substrate/events.ts index c6bb1182094..b15a53c90a9 100644 --- a/extensions/qa-matrix/src/substrate/events.ts +++ b/extensions/qa-matrix/src/substrate/events.ts @@ -15,6 +15,12 @@ export type MatrixQaObservedEventKind = | "reaction" | "room-event"; +export type MatrixQaObservedEventAttachment = { + caption?: string; + filename?: string; + kind: "audio" | "file" | "image" | "sticker" | "video"; +}; + export type MatrixQaObservedEvent = { kind: MatrixQaObservedEventKind; roomId: string; @@ -41,6 +47,7 @@ export type MatrixQaObservedEvent = { eventId?: string; key?: string; }; + attachment?: MatrixQaObservedEventAttachment; }; function normalizeMentionUserIds(value: unknown) { @@ -80,6 +87,49 @@ function resolveMatrixQaObservedEventKind(params: { msgtype?: string; type: stri return "room-event" as const; } +function resolveMatrixQaAttachmentKind(msgtype: string | undefined) { + switch (msgtype) { + case "m.audio": + return "audio" as const; + case "m.file": + return "file" as const; + case "m.image": + return "image" as const; + case "m.sticker": + return "sticker" as const; + case "m.video": + return "video" as const; + default: + return undefined; + } +} + +function isLikelyMatrixQaFilenameBody(value: string) { + return !value.includes("\n") && /\.[a-z0-9][a-z0-9._-]{0,24}$/i.test(value); +} + +function resolveMatrixQaAttachmentSummary(params: { + body?: string; + filename?: string; + msgtype?: string; +}): MatrixQaObservedEventAttachment | undefined { + const kind = resolveMatrixQaAttachmentKind(params.msgtype); + if (!kind) { + return undefined; + } + const body = params.body?.trim() ?? ""; + const explicitFilename = params.filename?.trim() ?? ""; + const inferredFilename = + !explicitFilename && body && isLikelyMatrixQaFilenameBody(body) ? body : ""; + const filename = explicitFilename || inferredFilename; + const caption = body && body !== filename ? body : ""; + return { + kind, + ...(caption ? { caption } : {}), + ...(filename ? { filename } : {}), + }; +} + export function normalizeMatrixQaObservedEvent( roomId: string, event: MatrixQaRoomEvent, @@ -104,6 +154,12 @@ export function normalizeMatrixQaObservedEvent( const messageContent = resolveMatrixQaMessageContent(content, relatesTo); const normalizedMsgtype = typeof messageContent.msgtype === "string" ? messageContent.msgtype : msgtype; + const normalizedFilename = + typeof messageContent.filename === "string" + ? messageContent.filename + : typeof content.filename === "string" + ? content.filename + : undefined; const mentionsRaw = messageContent["m.mentions"] ?? content["m.mentions"]; const mentions = typeof mentionsRaw === "object" && mentionsRaw !== null @@ -116,6 +172,11 @@ export function normalizeMatrixQaObservedEvent( type === "m.reaction" && typeof relatesTo?.event_id === "string" ? relatesTo.event_id : undefined; + const attachment = resolveMatrixQaAttachmentSummary({ + body: typeof messageContent.body === "string" ? messageContent.body : undefined, + filename: normalizedFilename, + msgtype: normalizedMsgtype, + }); return { kind: resolveMatrixQaObservedEventKind({ msgtype: normalizedMsgtype, type }), @@ -160,5 +221,6 @@ export function normalizeMatrixQaObservedEvent( }, } : {}), + ...(attachment ? { attachment } : {}), }; } diff --git a/qa/scenarios/memory-dreaming-sweep.md b/qa/scenarios/memory-dreaming-sweep.md index e931d7e2e57..acd01a3c640 100644 --- a/qa/scenarios/memory-dreaming-sweep.md +++ b/qa/scenarios/memory-dreaming-sweep.md @@ -141,6 +141,12 @@ steps: - set: dailyPath value: expr: "path.join(env.gateway.workspaceDir, 'memory', `${dreamingDay}.md`)" + - set: lightReportPath + value: + expr: "path.join(env.gateway.workspaceDir, 'memory', 'dreaming', 'light', `${dreamingDay}.md`)" + - set: remReportPath + value: + expr: "path.join(env.gateway.workspaceDir, 'memory', 'dreaming', 'rem', `${dreamingDay}.md`)" - set: memoryPath value: expr: "path.join(env.gateway.workspaceDir, 'MEMORY.md')" @@ -250,7 +256,7 @@ steps: args: - lambda: async: true - expr: "(async () => { const status = await readDoctorMemoryStatus(env); const dailyMemory = await fs.readFile(dailyPath, 'utf8').catch(() => ''); const promotedMemory = await fs.readFile(memoryPath, 'utf8').catch(() => ''); if (!dailyMemory.includes('## Light Sleep') || !dailyMemory.includes('## REM Sleep')) return undefined; if (!promotedMemory.includes(config.expectedNeedle)) return undefined; if (status.dreaming?.phases?.deep?.managedCronPresent !== true) return undefined; if ((status.dreaming?.promotedTotal ?? 0) < 1) return undefined; if ((status.dreaming?.phaseSignalCount ?? 0) < 1) return undefined; return { status, dailyMemory, promotedMemory }; })()" + expr: "(async () => { const status = await readDoctorMemoryStatus(env); const lightReport = await fs.readFile(lightReportPath, 'utf8').catch(() => ''); const remReport = await fs.readFile(remReportPath, 'utf8').catch(() => ''); const promotedMemory = await fs.readFile(memoryPath, 'utf8').catch(() => ''); if (!lightReport.includes('# Light Sleep')) return undefined; if (!remReport.includes('# REM Sleep')) return undefined; if (!promotedMemory.includes(config.expectedNeedle)) return undefined; if (status.dreaming?.phases?.deep?.managedCronPresent !== true) return undefined; if ((status.dreaming?.promotedTotal ?? 0) < 1) return undefined; return { status, lightReport, remReport, promotedMemory }; })()" - expr: liveTurnTimeoutMs(env, 90000) - 1000 finally: @@ -272,5 +278,5 @@ steps: args: - ref: env - 60000 - detailsExpr: "JSON.stringify({ promotedTotal: promoted.status.dreaming?.promotedTotal ?? 0, shortTermCount: promoted.status.dreaming?.shortTermCount ?? 0, phaseSignalCount: promoted.status.dreaming?.phaseSignalCount ?? 0, lightSleep: promoted.dailyMemory.includes('## Light Sleep'), remSleep: promoted.dailyMemory.includes('## REM Sleep') })" + detailsExpr: "JSON.stringify({ promotedTotal: promoted.status.dreaming?.promotedTotal ?? 0, shortTermCount: promoted.status.dreaming?.shortTermCount ?? 0, phaseSignalCount: promoted.status.dreaming?.phaseSignalCount ?? 0, lightSleep: promoted.lightReport.includes('# Light Sleep'), remSleep: promoted.remReport.includes('# REM Sleep') })" ```