diff --git a/CHANGELOG.md b/CHANGELOG.md index 05fe86a1019..9e5290e71c8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ Docs: https://docs.openclaw.ai - QA/Telegram: add a live `openclaw qa telegram` lane for private-group bot-to-bot checks, harden its artifact handling, and preserve native Telegram command reply threading for QA verification. (#64303) Thanks @obviyus. - Models/Codex: add the bundled Codex provider and plugin-owned app-server harness so `codex/gpt-*` models use Codex-managed auth, native threads, model discovery, and compaction while `openai/gpt-*` stays on the normal OpenAI provider path. (#64298) Thanks @steipete. - Agents: add an opt-in strict-agentic embedded Pi execution contract for GPT-5-family runs so plan-only or filler turns keep acting until they hit a real blocker. (#64241) Thanks @100yenadmin. +- QA/Matrix: add a live `openclaw qa matrix` lane backed by a disposable Matrix homeserver, shared live-transport seams, and Matrix-specific transport coverage for threading, reactions, restart, and allowlist behavior. (#64489) Thanks @gumadeiras. ### Fixes diff --git a/docs/concepts/qa-e2e-automation.md b/docs/concepts/qa-e2e-automation.md index 83dda8ece41..fa2e022f92b 100644 --- a/docs/concepts/qa-e2e-automation.md +++ b/docs/concepts/qa-e2e-automation.md @@ -52,6 +52,47 @@ pnpm qa:lab:watch rebuilds that bundle on change, and the browser auto-reloads when the QA Lab asset hash changes. +For a transport-real Matrix smoke lane, run: + +```bash +pnpm openclaw qa matrix +``` + +That lane provisions a disposable Tuwunel homeserver in Docker, registers +temporary driver, SUT, and observer users, creates one private room, then runs +the real Matrix plugin inside a QA gateway child. The live transport lane keeps +the child config scoped to the transport under test, so Matrix runs without +`qa-channel` in the child config. + +For a transport-real Telegram smoke lane, run: + +```bash +pnpm openclaw qa telegram +``` + +That lane targets one real private Telegram group instead of provisioning a +disposable server. It requires `OPENCLAW_QA_TELEGRAM_GROUP_ID`, +`OPENCLAW_QA_TELEGRAM_DRIVER_BOT_TOKEN`, and +`OPENCLAW_QA_TELEGRAM_SUT_BOT_TOKEN`, plus two distinct bots in the same +private group. The SUT bot must have a Telegram username, and bot-to-bot +observation works best when both bots have Bot-to-Bot Communication Mode +enabled in `@BotFather`. + +Live transport lanes now share one smaller contract instead of each inventing +their own scenario list shape: + +`qa-channel` remains the broad synthetic product-behavior suite and is not part +of the live transport coverage matrix. + +| Lane | Canary | Mention gating | Allowlist block | Top-level reply | Restart resume | Thread follow-up | Thread isolation | Reaction observation | Help command | +| -------- | ------ | -------------- | --------------- | --------------- | -------------- | ---------------- | ---------------- | -------------------- | ------------ | +| Matrix | x | x | x | x | x | x | x | x | | +| Telegram | x | | | | | | | | x | + +This keeps `qa-channel` as the broad product-behavior suite while Matrix, +Telegram, and future live transports share one explicit transport-contract +checklist. + For a disposable Linux VM lane without bringing Docker into the QA path, run: ```bash diff --git a/docs/help/testing.md b/docs/help/testing.md index bfb784a360f..38f98bb203e 100644 --- a/docs/help/testing.md +++ b/docs/help/testing.md @@ -65,6 +65,27 @@ These commands sit beside the main test suites when you need QA-lab realism: `.artifacts/qa-e2e/...`. - `pnpm qa:lab:up` - Starts the Docker-backed QA site for operator-style QA work. +- `pnpm openclaw qa matrix` + - Runs the Matrix live QA lane against a disposable Docker-backed Tuwunel homeserver. + - Provisions three temporary Matrix users (`driver`, `sut`, `observer`) plus one private room, then starts a QA gateway child with the real Matrix plugin as the SUT transport. + - Uses the pinned stable Tuwunel image `ghcr.io/matrix-construct/tuwunel:v1.5.1` by default. Override with `OPENCLAW_QA_MATRIX_TUWUNEL_IMAGE` when you need to test a different image. + - Writes a Matrix QA report, summary, and observed-events artifact under `.artifacts/qa-e2e/...`. +- `pnpm openclaw qa telegram` + - Runs the Telegram live QA lane against a real private group using the driver and SUT bot tokens from env. + - Requires `OPENCLAW_QA_TELEGRAM_GROUP_ID`, `OPENCLAW_QA_TELEGRAM_DRIVER_BOT_TOKEN`, and `OPENCLAW_QA_TELEGRAM_SUT_BOT_TOKEN`. The group id must be the numeric Telegram chat id. + - Requires two distinct bots in the same private group, with the SUT bot exposing a Telegram username. + - For stable bot-to-bot observation, enable Bot-to-Bot Communication Mode in `@BotFather` for both bots and ensure the driver bot can observe group bot traffic. + - Writes a Telegram QA report, summary, and observed-messages artifact under `.artifacts/qa-e2e/...`. + +Live transport lanes share one standard contract so new transports do not drift: + +`qa-channel` remains the broad synthetic QA suite and is not part of the live +transport coverage matrix. + +| Lane | Canary | Mention gating | Allowlist block | Top-level reply | Restart resume | Thread follow-up | Thread isolation | Reaction observation | Help command | +| -------- | ------ | -------------- | --------------- | --------------- | -------------- | ---------------- | ---------------- | -------------------- | ------------ | +| Matrix | x | x | x | x | x | x | x | x | | +| Telegram | x | | | | | | | | x | ## Test suites (what runs where) diff --git a/extensions/qa-lab/src/cli-options.ts b/extensions/qa-lab/src/cli-options.ts new file mode 100644 index 00000000000..1576d43291f --- /dev/null +++ b/extensions/qa-lab/src/cli-options.ts @@ -0,0 +1,4 @@ +export function collectString(value: string, previous: string[]) { + const trimmed = value.trim(); + return trimmed ? [...previous, trimmed] : previous; +} diff --git a/extensions/qa-lab/src/cli-paths.ts b/extensions/qa-lab/src/cli-paths.ts new file mode 100644 index 00000000000..529527fdff6 --- /dev/null +++ b/extensions/qa-lab/src/cli-paths.ts @@ -0,0 +1,16 @@ +import path from "node:path"; + +export function resolveRepoRelativeOutputDir(repoRoot: string, outputDir?: string) { + if (!outputDir) { + return undefined; + } + if (path.isAbsolute(outputDir)) { + throw new Error("--output-dir must be a relative path inside the repo root."); + } + const resolved = path.resolve(repoRoot, outputDir); + const relative = path.relative(repoRoot, resolved); + if (relative.startsWith("..") || path.isAbsolute(relative)) { + throw new Error("--output-dir must stay within the repo root."); + } + return resolved; +} diff --git a/extensions/qa-lab/src/cli.runtime.test.ts b/extensions/qa-lab/src/cli.runtime.test.ts index 9bf40c087e4..c86b7f088eb 100644 --- a/extensions/qa-lab/src/cli.runtime.test.ts +++ b/extensions/qa-lab/src/cli.runtime.test.ts @@ -6,6 +6,7 @@ const { runQaSuiteFromRuntime, runQaCharacterEval, runQaMultipass, + runMatrixQaLive, runTelegramQaLive, startQaLabServer, writeQaDockerHarnessFiles, @@ -16,6 +17,7 @@ const { runQaSuiteFromRuntime: vi.fn(), runQaCharacterEval: vi.fn(), runQaMultipass: vi.fn(), + runMatrixQaLive: vi.fn(), runTelegramQaLive: vi.fn(), startQaLabServer: vi.fn(), writeQaDockerHarnessFiles: vi.fn(), @@ -39,7 +41,11 @@ vi.mock("./multipass.runtime.js", () => ({ runQaMultipass, })); -vi.mock("./telegram-live.runtime.js", () => ({ +vi.mock("./live-transports/matrix/matrix-live.runtime.js", () => ({ + runMatrixQaLive, +})); + +vi.mock("./live-transports/telegram/telegram-live.runtime.js", () => ({ runTelegramQaLive, })); @@ -56,8 +62,8 @@ vi.mock("./docker-up.runtime.js", () => ({ runQaDockerUp, })); +import { resolveRepoRelativeOutputDir } from "./cli-paths.js"; import { - __testing, runQaLabSelfCheckCommand, runQaDockerBuildImageCommand, runQaDockerScaffoldCommand, @@ -65,8 +71,9 @@ import { runQaCharacterEvalCommand, runQaManualLaneCommand, runQaSuiteCommand, - runQaTelegramCommand, } from "./cli.runtime.js"; +import { runQaMatrixCommand } from "./live-transports/matrix/cli.runtime.js"; +import { runQaTelegramCommand } from "./live-transports/telegram/cli.runtime.js"; describe("qa cli runtime", () => { let stdoutWrite: ReturnType; @@ -77,6 +84,7 @@ describe("qa cli runtime", () => { runQaCharacterEval.mockReset(); runQaManualLane.mockReset(); runQaMultipass.mockReset(); + runMatrixQaLive.mockReset(); runTelegramQaLive.mockReset(); startQaLabServer.mockReset(); writeQaDockerHarnessFiles.mockReset(); @@ -107,6 +115,13 @@ describe("qa cli runtime", () => { vmName: "openclaw-qa-test", scenarioIds: ["channel-chat-baseline"], }); + runMatrixQaLive.mockResolvedValue({ + outputDir: "/tmp/matrix", + reportPath: "/tmp/matrix/report.md", + summaryPath: "/tmp/matrix/summary.json", + observedEventsPath: "/tmp/matrix/observed.json", + scenarios: [], + }); runTelegramQaLive.mockResolvedValue({ outputDir: "/tmp/telegram", reportPath: "/tmp/telegram/report.md", @@ -186,13 +201,37 @@ describe("qa cli runtime", () => { }); }); + it("resolves matrix qa repo-root-relative paths before dispatching", async () => { + await runQaMatrixCommand({ + repoRoot: "/tmp/openclaw-repo", + outputDir: ".artifacts/qa/matrix", + providerMode: "live-frontier", + primaryModel: "openai/gpt-5.4", + alternateModel: "openai/gpt-5.4", + fastMode: true, + scenarioIds: ["matrix-thread-follow-up"], + sutAccountId: "sut-live", + }); + + expect(runMatrixQaLive).toHaveBeenCalledWith({ + repoRoot: path.resolve("/tmp/openclaw-repo"), + outputDir: path.resolve("/tmp/openclaw-repo", ".artifacts/qa/matrix"), + providerMode: "live-frontier", + primaryModel: "openai/gpt-5.4", + alternateModel: "openai/gpt-5.4", + fastMode: true, + scenarioIds: ["matrix-thread-follow-up"], + sutAccountId: "sut-live", + }); + }); + it("rejects output dirs that escape the repo root", () => { - expect(() => - __testing.resolveRepoRelativeOutputDir("/tmp/openclaw-repo", "../outside"), - ).toThrow("--output-dir must stay within the repo root."); - expect(() => - __testing.resolveRepoRelativeOutputDir("/tmp/openclaw-repo", "/tmp/outside"), - ).toThrow("--output-dir must be a relative path inside the repo root."); + expect(() => resolveRepoRelativeOutputDir("/tmp/openclaw-repo", "../outside")).toThrow( + "--output-dir must stay within the repo root.", + ); + expect(() => resolveRepoRelativeOutputDir("/tmp/openclaw-repo", "/tmp/outside")).toThrow( + "--output-dir must be a relative path inside the repo root.", + ); }); it("defaults telegram qa runs onto the live provider lane", async () => { @@ -209,6 +248,20 @@ describe("qa cli runtime", () => { ); }); + it("defaults matrix qa runs onto the live provider lane", async () => { + await runQaMatrixCommand({ + repoRoot: "/tmp/openclaw-repo", + scenarioIds: ["matrix-thread-follow-up"], + }); + + expect(runMatrixQaLive).toHaveBeenCalledWith( + expect.objectContaining({ + repoRoot: path.resolve("/tmp/openclaw-repo"), + providerMode: "live-frontier", + }), + ); + }); + it("normalizes legacy live-openai suite runs onto the frontier provider mode", async () => { await runQaSuiteCommand({ repoRoot: "/tmp/openclaw-repo", diff --git a/extensions/qa-lab/src/cli.runtime.ts b/extensions/qa-lab/src/cli.runtime.ts index 5c1ea4d0bbb..4d516157c15 100644 --- a/extensions/qa-lab/src/cli.runtime.ts +++ b/extensions/qa-lab/src/cli.runtime.ts @@ -1,5 +1,6 @@ import path from "node:path"; import { runQaCharacterEval, type QaCharacterModelOptions } from "./character-eval.js"; +import { resolveRepoRelativeOutputDir } from "./cli-paths.js"; import { buildQaDockerHarnessImage, writeQaDockerHarnessFiles } from "./docker-harness.js"; import { runQaDockerUp } from "./docker-up.runtime.js"; import type { QaCliBackendAuthMode } from "./gateway-child.js"; @@ -15,28 +16,12 @@ import { type QaProviderModeInput, } from "./run-config.js"; import { runQaSuiteFromRuntime } from "./suite-launch.runtime.js"; -import { runTelegramQaLive } from "./telegram-live.runtime.js"; type InterruptibleServer = { baseUrl: string; stop(): Promise; }; -function resolveRepoRelativeOutputDir(repoRoot: string, outputDir?: string) { - if (!outputDir) { - return undefined; - } - if (path.isAbsolute(outputDir)) { - throw new Error("--output-dir must be a relative path inside the repo root."); - } - const resolved = path.resolve(repoRoot, outputDir); - const relative = path.relative(repoRoot, resolved); - if (relative.startsWith("..") || path.isAbsolute(relative)) { - throw new Error("--output-dir must stay within the repo root."); - } - return resolved; -} - function resolveQaManualLaneModels(opts: { providerMode: QaProviderMode; primaryModel?: string; @@ -296,34 +281,6 @@ export async function runQaSuiteCommand(opts: { process.stdout.write(`QA suite summary: ${result.summaryPath}\n`); } -export async function runQaTelegramCommand(opts: { - repoRoot?: string; - outputDir?: string; - providerMode?: QaProviderModeInput; - primaryModel?: string; - alternateModel?: string; - fastMode?: boolean; - scenarioIds?: string[]; - sutAccountId?: string; -}) { - const repoRoot = path.resolve(opts.repoRoot ?? process.cwd()); - const providerMode: QaProviderMode = - opts.providerMode === undefined ? "live-frontier" : normalizeQaProviderMode(opts.providerMode); - const result = await runTelegramQaLive({ - repoRoot, - outputDir: resolveRepoRelativeOutputDir(repoRoot, opts.outputDir), - providerMode, - primaryModel: opts.primaryModel, - alternateModel: opts.alternateModel, - fastMode: opts.fastMode, - scenarioIds: opts.scenarioIds, - sutAccountId: opts.sutAccountId, - }); - process.stdout.write(`Telegram QA report: ${result.reportPath}\n`); - process.stdout.write(`Telegram QA summary: ${result.summaryPath}\n`); - process.stdout.write(`Telegram QA observed messages: ${result.observedMessagesPath}\n`); -} - export async function runQaCharacterEvalCommand(opts: { repoRoot?: string; outputDir?: string; diff --git a/extensions/qa-lab/src/cli.test.ts b/extensions/qa-lab/src/cli.test.ts new file mode 100644 index 00000000000..a051493870b --- /dev/null +++ b/extensions/qa-lab/src/cli.test.ts @@ -0,0 +1,92 @@ +import { Command } from "commander"; +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; + +const { runQaMatrixCommand, runQaTelegramCommand } = vi.hoisted(() => ({ + runQaMatrixCommand: vi.fn(), + runQaTelegramCommand: vi.fn(), +})); + +vi.mock("./live-transports/matrix/cli.runtime.js", () => ({ + runQaMatrixCommand, +})); + +vi.mock("./live-transports/telegram/cli.runtime.js", () => ({ + runQaTelegramCommand, +})); + +import { registerQaLabCli } from "./cli.js"; + +describe("qa cli registration", () => { + let program: Command; + + beforeEach(() => { + program = new Command(); + registerQaLabCli(program); + runQaMatrixCommand.mockReset(); + runQaTelegramCommand.mockReset(); + }); + + afterEach(() => { + vi.clearAllMocks(); + }); + + it("registers the matrix and telegram live transport subcommands", () => { + const qa = program.commands.find((command) => command.name() === "qa"); + expect(qa).toBeDefined(); + expect(qa?.commands.map((command) => command.name())).toEqual( + expect.arrayContaining(["matrix", "telegram"]), + ); + }); + + it("routes matrix CLI flags into the lane runtime", async () => { + await program.parseAsync([ + "node", + "openclaw", + "qa", + "matrix", + "--repo-root", + "/tmp/openclaw-repo", + "--output-dir", + ".artifacts/qa/matrix", + "--provider-mode", + "mock-openai", + "--model", + "mock-openai/gpt-5.4", + "--alt-model", + "mock-openai/gpt-5.4-alt", + "--scenario", + "matrix-thread-follow-up", + "--scenario", + "matrix-thread-isolation", + "--fast", + "--sut-account", + "sut-live", + ]); + + expect(runQaMatrixCommand).toHaveBeenCalledWith({ + repoRoot: "/tmp/openclaw-repo", + outputDir: ".artifacts/qa/matrix", + providerMode: "mock-openai", + primaryModel: "mock-openai/gpt-5.4", + alternateModel: "mock-openai/gpt-5.4-alt", + fastMode: true, + scenarioIds: ["matrix-thread-follow-up", "matrix-thread-isolation"], + sutAccountId: "sut-live", + }); + }); + + it("routes telegram CLI defaults into the lane runtime", async () => { + await program.parseAsync(["node", "openclaw", "qa", "telegram"]); + + expect(runQaTelegramCommand).toHaveBeenCalledWith({ + repoRoot: undefined, + outputDir: undefined, + providerMode: "live-frontier", + primaryModel: undefined, + alternateModel: undefined, + fastMode: false, + scenarioIds: [], + sutAccountId: "sut", + }); + }); +}); diff --git a/extensions/qa-lab/src/cli.ts b/extensions/qa-lab/src/cli.ts index e541e1ecb5c..b0e40ce649b 100644 --- a/extensions/qa-lab/src/cli.ts +++ b/extensions/qa-lab/src/cli.ts @@ -1,4 +1,6 @@ import type { Command } from "commander"; +import { collectString } from "./cli-options.js"; +import { LIVE_TRANSPORT_QA_CLI_REGISTRATIONS } from "./live-transports/cli.js"; import type { QaProviderModeInput } from "./run-config.js"; type QaLabCliRuntime = typeof import("./cli.runtime.js"); @@ -35,20 +37,6 @@ async function runQaSuite(opts: { await runtime.runQaSuiteCommand(opts); } -async function runQaTelegram(opts: { - repoRoot?: string; - outputDir?: string; - providerMode?: QaProviderModeInput; - primaryModel?: string; - alternateModel?: string; - fastMode?: boolean; - scenarioIds?: string[]; - sutAccountId?: string; -}) { - const runtime = await loadQaLabCliRuntime(); - await runtime.runQaTelegramCommand(opts); -} - async function runQaCharacterEval(opts: { repoRoot?: string; outputDir?: string; @@ -80,11 +68,6 @@ async function runQaManualLane(opts: { await runtime.runQaManualLaneCommand(opts); } -function collectString(value: string, previous: string[]) { - const trimmed = value.trim(); - return trimmed ? [...previous, trimmed] : previous; -} - async function runQaUi(opts: { repoRoot?: string; host?: string; @@ -216,52 +199,9 @@ export function registerQaLabCli(program: Command) { }, ); - qa.command("telegram") - .description("Run the manual Telegram live QA lane against a private bot-to-bot group harness") - .option("--repo-root ", "Repository root to target when running from a neutral cwd") - .option("--output-dir ", "Telegram QA artifact directory") - .option( - "--provider-mode ", - "Provider mode: mock-openai or live-frontier (legacy live-openai still works)", - "live-frontier", - ) - .option("--model ", "Primary provider/model ref") - .option("--alt-model ", "Alternate provider/model ref") - .option( - "--scenario ", - "Run only the named Telegram QA scenario (repeatable)", - collectString, - [], - ) - .option("--fast", "Enable provider fast mode where supported", false) - .option( - "--sut-account ", - "Temporary Telegram account id inside the QA gateway config", - "sut", - ) - .action( - async (opts: { - repoRoot?: string; - outputDir?: string; - providerMode?: QaProviderModeInput; - model?: string; - altModel?: string; - scenario?: string[]; - fast?: boolean; - sutAccount?: string; - }) => { - await runQaTelegram({ - repoRoot: opts.repoRoot, - outputDir: opts.outputDir, - providerMode: opts.providerMode, - primaryModel: opts.model, - alternateModel: opts.altModel, - fastMode: opts.fast, - scenarioIds: opts.scenario, - sutAccountId: opts.sutAccount, - }); - }, - ); + for (const lane of LIVE_TRANSPORT_QA_CLI_REGISTRATIONS) { + lane.register(qa); + } qa.command("character-eval") .description("Run the character QA scenario across live models and write a judged report") diff --git a/extensions/qa-lab/src/docker-runtime.ts b/extensions/qa-lab/src/docker-runtime.ts new file mode 100644 index 00000000000..f8b24032a8b --- /dev/null +++ b/extensions/qa-lab/src/docker-runtime.ts @@ -0,0 +1,283 @@ +import { execFile } from "node:child_process"; +import { createServer } from "node:net"; +import { fetchWithSsrFGuard } from "openclaw/plugin-sdk/ssrf-runtime"; + +export type RunCommand = ( + command: string, + args: string[], + cwd: string, +) => Promise<{ stdout: string; stderr: string }>; + +export type FetchLike = (input: string) => Promise<{ ok: boolean }>; + +export async function fetchHealthUrl(url: string): Promise<{ ok: boolean }> { + const { response, release } = await fetchWithSsrFGuard({ + url, + init: { + signal: AbortSignal.timeout(2_000), + }, + policy: { allowPrivateNetwork: true }, + auditContext: "qa-lab-docker-health-check", + }); + try { + return { ok: response.ok }; + } finally { + await release(); + } +} + +export function describeError(error: unknown) { + if (error instanceof Error) { + return error.message; + } + if (typeof error === "string") { + return error; + } + return JSON.stringify(error); +} + +async function isPortFree(port: number) { + return await new Promise((resolve) => { + const server = createServer(); + server.once("error", () => resolve(false)); + server.listen(port, "127.0.0.1", () => { + server.close(() => resolve(true)); + }); + }); +} + +async function findFreePort() { + return await new Promise((resolve, reject) => { + const server = createServer(); + server.once("error", reject); + server.listen(0, () => { + const address = server.address(); + if (!address || typeof address === "string") { + server.close(); + reject(new Error("failed to find free port")); + return; + } + server.close((error) => { + if (error) { + reject(error); + return; + } + resolve(address.port); + }); + }); + }); +} + +export async function resolveHostPort(preferredPort: number, pinned: boolean) { + if (pinned || (await isPortFree(preferredPort))) { + return preferredPort; + } + return await findFreePort(); +} + +function trimCommandOutput(output: string) { + const trimmed = output.trim(); + if (!trimmed) { + return ""; + } + const lines = trimmed.split("\n"); + return lines.length <= 120 ? trimmed : lines.slice(-120).join("\n"); +} + +export async function execCommand(command: string, args: string[], cwd: string) { + return await new Promise<{ stdout: string; stderr: string }>((resolve, reject) => { + execFile( + command, + args, + { cwd, encoding: "utf8", maxBuffer: 10 * 1024 * 1024 }, + (error, stdout, stderr) => { + if (error) { + const renderedStdout = trimCommandOutput(stdout); + const renderedStderr = trimCommandOutput(stderr); + reject( + new Error( + [ + `Command failed: ${[command, ...args].join(" ")}`, + renderedStderr ? `stderr:\n${renderedStderr}` : "", + renderedStdout ? `stdout:\n${renderedStdout}` : "", + ] + .filter(Boolean) + .join("\n\n"), + ), + ); + return; + } + resolve({ stdout, stderr }); + }, + ); + }); +} + +export async function waitForHealth( + url: string, + deps: { + label?: string; + composeFile?: string; + fetchImpl: FetchLike; + sleepImpl: (ms: number) => Promise; + timeoutMs?: number; + pollMs?: number; + }, +) { + const timeoutMs = deps.timeoutMs ?? 360_000; + const pollMs = deps.pollMs ?? 1_000; + const startMs = Date.now(); + const deadline = startMs + timeoutMs; + let lastError: unknown = null; + + while (Date.now() < deadline) { + try { + const response = await deps.fetchImpl(url); + if (response.ok) { + return; + } + lastError = new Error(`Health check returned non-OK for ${url}`); + } catch (error) { + lastError = error; + } + await deps.sleepImpl(pollMs); + } + + const elapsedSec = Math.round((Date.now() - startMs) / 1000); + const service = deps.label ?? url; + const lines = [ + `${service} did not become healthy within ${elapsedSec}s (limit ${Math.round(timeoutMs / 1000)}s).`, + lastError ? `Last error: ${describeError(lastError)}` : "", + `Hint: check container logs with \`docker compose -f ${deps.composeFile ?? ""} logs\` and verify the port is not already in use.`, + ]; + throw new Error(lines.filter(Boolean).join("\n")); +} + +async function isHealthy(url: string, fetchImpl: FetchLike) { + try { + const response = await fetchImpl(url); + return response.ok; + } catch { + return false; + } +} + +function normalizeDockerServiceStatus(row?: { Health?: string; State?: string }) { + const health = row?.Health?.trim(); + if (health) { + return health; + } + const state = row?.State?.trim(); + if (state) { + return state; + } + return "unknown"; +} + +function parseDockerComposePsRows(stdout: string) { + const trimmed = stdout.trim(); + if (!trimmed) { + return [] as Array<{ Health?: string; State?: string }>; + } + + try { + const parsed = JSON.parse(trimmed) as + | Array<{ Health?: string; State?: string }> + | { Health?: string; State?: string }; + if (Array.isArray(parsed)) { + return parsed; + } + return [parsed]; + } catch { + return trimmed + .split("\n") + .map((line) => line.trim()) + .filter(Boolean) + .map((line) => JSON.parse(line) as { Health?: string; State?: string }); + } +} + +export async function waitForDockerServiceHealth( + service: string, + composeFile: string, + repoRoot: string, + runCommand: RunCommand, + sleepImpl: (ms: number) => Promise, + timeoutMs = 360_000, + pollMs = 1_000, +) { + const startMs = Date.now(); + const deadline = startMs + timeoutMs; + let lastStatus = "unknown"; + + while (Date.now() < deadline) { + try { + const { stdout } = await runCommand( + "docker", + ["compose", "-f", composeFile, "ps", "--format", "json", service], + repoRoot, + ); + const rows = parseDockerComposePsRows(stdout); + const row = rows[0]; + lastStatus = normalizeDockerServiceStatus(row); + if (lastStatus === "healthy" || lastStatus === "running") { + return; + } + } catch (error) { + lastStatus = describeError(error); + } + await sleepImpl(pollMs); + } + + const elapsedSec = Math.round((Date.now() - startMs) / 1000); + throw new Error( + [ + `${service} did not become healthy within ${elapsedSec}s (limit ${Math.round(timeoutMs / 1000)}s).`, + `Last status: ${lastStatus}`, + `Hint: check container logs with \`docker compose -f ${composeFile} logs ${service}\`.`, + ].join("\n"), + ); +} + +export async function resolveComposeServiceUrl( + service: string, + port: number, + composeFile: string, + repoRoot: string, + runCommand: RunCommand, + fetchImpl?: FetchLike, +) { + const { stdout: containerStdout } = await runCommand( + "docker", + ["compose", "-f", composeFile, "ps", "-q", service], + repoRoot, + ); + const containerId = containerStdout.trim(); + if (!containerId) { + return null; + } + const { stdout: ipStdout } = await runCommand( + "docker", + [ + "inspect", + "--format", + "{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}", + containerId, + ], + repoRoot, + ); + const ip = ipStdout.trim(); + if (!ip) { + return null; + } + const baseUrl = `http://${ip}:${port}/`; + if (!fetchImpl) { + return baseUrl; + } + return (await isHealthy(`${baseUrl}healthz`, fetchImpl)) ? baseUrl : null; +} + +export const __testing = { + fetchHealthUrl, + normalizeDockerServiceStatus, +}; diff --git a/extensions/qa-lab/src/docker-up.runtime.test.ts b/extensions/qa-lab/src/docker-up.runtime.test.ts index d34d8395b29..320a6e6f357 100644 --- a/extensions/qa-lab/src/docker-up.runtime.test.ts +++ b/extensions/qa-lab/src/docker-up.runtime.test.ts @@ -49,7 +49,7 @@ describe("runQaDockerUp", () => { async runCommand(command, args, cwd) { calls.push([command, ...args, `@${cwd}`].join(" ")); if (args.join(" ").includes("ps --format json openclaw-qa-gateway")) { - return { stdout: '{"Health":"healthy","State":"running"}\n', stderr: "" }; + return { stdout: '[{"Health":"healthy","State":"running"}]\n', stderr: "" }; } return { stdout: "", stderr: "" }; }, diff --git a/extensions/qa-lab/src/docker-up.runtime.ts b/extensions/qa-lab/src/docker-up.runtime.ts index 7af118ca8af..3a1c272689a 100644 --- a/extensions/qa-lab/src/docker-up.runtime.ts +++ b/extensions/qa-lab/src/docker-up.runtime.ts @@ -1,8 +1,16 @@ -import { execFile } from "node:child_process"; -import { createServer } from "node:net"; import path from "node:path"; import { setTimeout as sleep } from "node:timers/promises"; import { writeQaDockerHarnessFiles } from "./docker-harness.js"; +import { + execCommand, + fetchHealthUrl, + resolveComposeServiceUrl, + resolveHostPort, + waitForDockerServiceHealth, + waitForHealth, + type FetchLike, + type RunCommand, +} from "./docker-runtime.js"; type QaDockerUpResult = { outputDir: string; @@ -12,234 +20,10 @@ type QaDockerUpResult = { stopCommand: string; }; -type RunCommand = ( - command: string, - args: string[], - cwd: string, -) => Promise<{ stdout: string; stderr: string }>; - -type FetchLike = (input: string) => Promise<{ ok: boolean }>; - function resolveDefaultQaDockerDir(repoRoot: string) { return path.resolve(repoRoot, ".artifacts/qa-docker"); } -function describeError(error: unknown) { - if (error instanceof Error) { - return error.message; - } - if (typeof error === "string") { - return error; - } - return JSON.stringify(error); -} - -async function isPortFree(port: number) { - return await new Promise((resolve) => { - const server = createServer(); - server.once("error", () => resolve(false)); - server.listen(port, "127.0.0.1", () => { - server.close(() => resolve(true)); - }); - }); -} - -async function findFreePort() { - return await new Promise((resolve, reject) => { - const server = createServer(); - server.once("error", reject); - server.listen(0, () => { - const address = server.address(); - if (!address || typeof address === "string") { - server.close(); - reject(new Error("failed to find free port")); - return; - } - server.close((error) => { - if (error) { - reject(error); - return; - } - resolve(address.port); - }); - }); - }); -} - -async function resolveHostPort(preferredPort: number, pinned: boolean) { - if (pinned || (await isPortFree(preferredPort))) { - return preferredPort; - } - return await findFreePort(); -} - -function trimCommandOutput(output: string) { - const trimmed = output.trim(); - if (!trimmed) { - return ""; - } - const lines = trimmed.split("\n"); - return lines.length <= 120 ? trimmed : lines.slice(-120).join("\n"); -} - -async function execCommand(command: string, args: string[], cwd: string) { - return await new Promise<{ stdout: string; stderr: string }>((resolve, reject) => { - execFile( - command, - args, - { cwd, encoding: "utf8", maxBuffer: 10 * 1024 * 1024 }, - (error, stdout, stderr) => { - if (error) { - const renderedStdout = trimCommandOutput(stdout); - const renderedStderr = trimCommandOutput(stderr); - reject( - new Error( - [ - `Command failed: ${[command, ...args].join(" ")}`, - renderedStderr ? `stderr:\n${renderedStderr}` : "", - renderedStdout ? `stdout:\n${renderedStdout}` : "", - ] - .filter(Boolean) - .join("\n\n"), - ), - ); - return; - } - resolve({ stdout, stderr }); - }, - ); - }); -} - -async function waitForHealth( - url: string, - deps: { - label?: string; - composeFile?: string; - fetchImpl: FetchLike; - sleepImpl: (ms: number) => Promise; - timeoutMs?: number; - pollMs?: number; - }, -) { - const timeoutMs = deps.timeoutMs ?? 360_000; - const pollMs = deps.pollMs ?? 1_000; - const startMs = Date.now(); - const deadline = startMs + timeoutMs; - let lastError: unknown = null; - - while (Date.now() < deadline) { - try { - const response = await deps.fetchImpl(url); - if (response.ok) { - return; - } - lastError = new Error(`Health check returned non-OK for ${url}`); - } catch (error) { - lastError = error; - } - await deps.sleepImpl(pollMs); - } - - const elapsedSec = Math.round((Date.now() - startMs) / 1000); - const service = deps.label ?? url; - const lines = [ - `${service} did not become healthy within ${elapsedSec}s (limit ${Math.round(timeoutMs / 1000)}s).`, - lastError ? `Last error: ${describeError(lastError)}` : "", - `Hint: check container logs with \`docker compose -f ${deps.composeFile ?? ""} logs\` and verify the port is not already in use.`, - ]; - throw new Error(lines.filter(Boolean).join("\n")); -} - -async function isHealthy(url: string, fetchImpl: FetchLike) { - try { - const response = await fetchImpl(url); - return response.ok; - } catch { - return false; - } -} - -async function waitForDockerServiceHealth( - service: string, - composeFile: string, - repoRoot: string, - runCommand: RunCommand, - sleepImpl: (ms: number) => Promise, - timeoutMs = 360_000, - pollMs = 1_000, -) { - const startMs = Date.now(); - const deadline = startMs + timeoutMs; - let lastStatus = "unknown"; - - while (Date.now() < deadline) { - try { - const { stdout } = await runCommand( - "docker", - ["compose", "-f", composeFile, "ps", "--format", "json", service], - repoRoot, - ); - const rows = stdout - .trim() - .split("\n") - .map((line) => line.trim()) - .filter(Boolean) - .map((line) => JSON.parse(line) as { Health?: string; State?: string }); - const row = rows[0]; - lastStatus = row?.Health ?? row?.State ?? "unknown"; - if (lastStatus === "healthy" || lastStatus === "running") { - return; - } - } catch (error) { - lastStatus = describeError(error); - } - await sleepImpl(pollMs); - } - - const elapsedSec = Math.round((Date.now() - startMs) / 1000); - throw new Error( - [ - `${service} did not become healthy within ${elapsedSec}s (limit ${Math.round(timeoutMs / 1000)}s).`, - `Last status: ${lastStatus}`, - `Hint: check container logs with \`docker compose -f ${composeFile} logs ${service}\`.`, - ].join("\n"), - ); -} - -async function resolveComposeServiceUrl( - service: string, - port: number, - composeFile: string, - repoRoot: string, - runCommand: RunCommand, -) { - const { stdout: containerStdout } = await runCommand( - "docker", - ["compose", "-f", composeFile, "ps", "-q", service], - repoRoot, - ); - const containerId = containerStdout.trim(); - if (!containerId) { - return null; - } - const { stdout: ipStdout } = await runCommand( - "docker", - [ - "inspect", - "--format", - "{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}", - containerId, - ], - repoRoot, - ); - const ip = ipStdout.trim(); - if (!ip) { - return null; - } - return `http://${ip}:${port}/`; -} - export async function runQaDockerUp( params: { repoRoot?: string; @@ -268,11 +52,7 @@ export async function runQaDockerUp( ); const qaLabPort = await resolveHostPortImpl(params.qaLabPort ?? 43124, params.qaLabPort != null); const runCommand = deps?.runCommand ?? execCommand; - const fetchImpl = - deps?.fetchImpl ?? - (async (input: string) => { - return await fetch(input); - }); + const fetchImpl = deps?.fetchImpl ?? fetchHealthUrl; const sleepImpl = deps?.sleepImpl ?? sleep; if (!params.skipUiBuild) { @@ -333,15 +113,20 @@ export async function runQaDockerUp( sleepImpl, ); let gatewayUrl = hostGatewayUrl; - if (!(await isHealthy(`${hostGatewayUrl}healthz`, fetchImpl))) { + if ( + !(await fetchImpl(`${hostGatewayUrl}healthz`) + .then((response) => response.ok) + .catch(() => false)) + ) { const containerGatewayUrl = await resolveComposeServiceUrl( "openclaw-qa-gateway", 18789, composeFile, repoRoot, runCommand, + fetchImpl, ); - if (containerGatewayUrl && (await isHealthy(`${containerGatewayUrl}healthz`, fetchImpl))) { + if (containerGatewayUrl) { gatewayUrl = containerGatewayUrl; } } diff --git a/extensions/qa-lab/src/gateway-child.ts b/extensions/qa-lab/src/gateway-child.ts index 5a282910b43..004f286ac8e 100644 --- a/extensions/qa-lab/src/gateway-child.ts +++ b/extensions/qa-lab/src/gateway-child.ts @@ -720,6 +720,7 @@ export async function startQaGatewayChild(params: { repoRoot: string; providerBaseUrl?: string; qaBusBaseUrl: string; + includeQaChannel?: boolean; controlUiAllowedOrigins?: string[]; providerMode?: "mock-openai" | "live-frontier"; primaryModel?: string; @@ -780,6 +781,7 @@ export async function startQaGatewayChild(params: { gatewayToken, providerBaseUrl: params.providerBaseUrl, qaBusBaseUrl: params.qaBusBaseUrl, + includeQaChannel: params.includeQaChannel, workspaceDir, controlUiRoot: resolveQaControlUiRoot({ repoRoot: params.repoRoot, diff --git a/extensions/qa-lab/src/live-transports/cli.ts b/extensions/qa-lab/src/live-transports/cli.ts new file mode 100644 index 00000000000..2038ff80a8c --- /dev/null +++ b/extensions/qa-lab/src/live-transports/cli.ts @@ -0,0 +1,8 @@ +import { matrixQaCliRegistration } from "./matrix/cli.js"; +import type { LiveTransportQaCliRegistration } from "./shared/live-transport-cli.js"; +import { telegramQaCliRegistration } from "./telegram/cli.js"; + +export const LIVE_TRANSPORT_QA_CLI_REGISTRATIONS: readonly LiveTransportQaCliRegistration[] = [ + telegramQaCliRegistration, + matrixQaCliRegistration, +]; diff --git a/extensions/qa-lab/src/live-transports/matrix/cli.runtime.ts b/extensions/qa-lab/src/live-transports/matrix/cli.runtime.ts new file mode 100644 index 00000000000..2b1e0c0275a --- /dev/null +++ b/extensions/qa-lab/src/live-transports/matrix/cli.runtime.ts @@ -0,0 +1,15 @@ +import type { LiveTransportQaCommandOptions } from "../shared/live-transport-cli.js"; +import { + printLiveTransportQaArtifacts, + resolveLiveTransportQaRunOptions, +} from "../shared/live-transport-cli.runtime.js"; +import { runMatrixQaLive } from "./matrix-live.runtime.js"; + +export async function runQaMatrixCommand(opts: LiveTransportQaCommandOptions) { + const result = await runMatrixQaLive(resolveLiveTransportQaRunOptions(opts)); + printLiveTransportQaArtifacts("Matrix QA", { + report: result.reportPath, + summary: result.summaryPath, + "observed events": result.observedEventsPath, + }); +} diff --git a/extensions/qa-lab/src/live-transports/matrix/cli.ts b/extensions/qa-lab/src/live-transports/matrix/cli.ts new file mode 100644 index 00000000000..9c9cfd88668 --- /dev/null +++ b/extensions/qa-lab/src/live-transports/matrix/cli.ts @@ -0,0 +1,32 @@ +import type { Command } from "commander"; +import { + createLazyCliRuntimeLoader, + createLiveTransportQaCliRegistration, + type LiveTransportQaCliRegistration, + type LiveTransportQaCommandOptions, +} from "../shared/live-transport-cli.js"; + +type MatrixQaCliRuntime = typeof import("./cli.runtime.js"); + +const loadMatrixQaCliRuntime = createLazyCliRuntimeLoader( + () => import("./cli.runtime.js"), +); + +async function runQaMatrix(opts: LiveTransportQaCommandOptions) { + const runtime = await loadMatrixQaCliRuntime(); + await runtime.runQaMatrixCommand(opts); +} + +export const matrixQaCliRegistration: LiveTransportQaCliRegistration = + createLiveTransportQaCliRegistration({ + commandName: "matrix", + description: "Run the Docker-backed Matrix live QA lane against a disposable homeserver", + outputDirHelp: "Matrix QA artifact directory", + scenarioHelp: "Run only the named Matrix QA scenario (repeatable)", + sutAccountHelp: "Temporary Matrix account id inside the QA gateway config", + run: runQaMatrix, + }); + +export function registerMatrixQaCli(qa: Command) { + matrixQaCliRegistration.register(qa); +} diff --git a/extensions/qa-lab/src/live-transports/matrix/matrix-driver-client.test.ts b/extensions/qa-lab/src/live-transports/matrix/matrix-driver-client.test.ts new file mode 100644 index 00000000000..8c524c7cd82 --- /dev/null +++ b/extensions/qa-lab/src/live-transports/matrix/matrix-driver-client.test.ts @@ -0,0 +1,349 @@ +import { describe, expect, it } from "vitest"; +import { + __testing, + createMatrixQaClient, + provisionMatrixQaRoom, + type MatrixQaObservedEvent, +} from "./matrix-driver-client.js"; + +function resolveRequestUrl(input: RequestInfo | URL) { + if (typeof input === "string") { + return input; + } + if (input instanceof URL) { + return input.toString(); + } + return input.url; +} + +function parseJsonRequestBody(init?: RequestInit) { + if (typeof init?.body !== "string") { + return {}; + } + return JSON.parse(init.body) as Record; +} + +describe("matrix driver client", () => { + it("builds Matrix HTML mentions for QA driver messages", () => { + expect( + __testing.buildMatrixQaMessageContent({ + body: "@sut:matrix-qa.test reply with exactly: TOKEN", + mentionUserIds: ["@sut:matrix-qa.test"], + }), + ).toEqual({ + body: "@sut:matrix-qa.test reply with exactly: TOKEN", + msgtype: "m.text", + format: "org.matrix.custom.html", + formatted_body: + '@sut:matrix-qa.test reply with exactly: TOKEN', + "m.mentions": { + user_ids: ["@sut:matrix-qa.test"], + }, + }); + }); + + it("omits Matrix HTML markup when the body has no visible mention token", () => { + expect( + __testing.buildMatrixQaMessageContent({ + body: "reply with exactly: TOKEN", + mentionUserIds: ["@sut:matrix-qa.test"], + }), + ).toEqual({ + body: "reply with exactly: TOKEN", + msgtype: "m.text", + "m.mentions": { + user_ids: ["@sut:matrix-qa.test"], + }, + }); + }); + + it("normalizes message events with thread metadata", () => { + expect( + __testing.normalizeMatrixQaObservedEvent("!room:matrix-qa.test", { + event_id: "$event", + sender: "@sut:matrix-qa.test", + type: "m.room.message", + origin_server_ts: 1_700_000_000_000, + content: { + body: "hello", + msgtype: "m.text", + "m.mentions": { + user_ids: ["@sut:matrix-qa.test"], + }, + "m.relates_to": { + rel_type: "m.thread", + event_id: "$root", + is_falling_back: true, + "m.in_reply_to": { + event_id: "$driver", + }, + }, + }, + }), + ).toEqual({ + roomId: "!room:matrix-qa.test", + eventId: "$event", + sender: "@sut:matrix-qa.test", + type: "m.room.message", + originServerTs: 1_700_000_000_000, + body: "hello", + msgtype: "m.text", + relatesTo: { + relType: "m.thread", + eventId: "$root", + inReplyToId: "$driver", + isFallingBack: true, + }, + mentions: { + userIds: ["@sut:matrix-qa.test"], + }, + }); + }); + + it("builds trimmed Matrix reaction relations for QA driver events", () => { + expect(__testing.buildMatrixReactionRelation(" $msg-1 ", " 👍 ")).toEqual({ + "m.relates_to": { + rel_type: "m.annotation", + event_id: "$msg-1", + key: "👍", + }, + }); + }); + + it("normalizes Matrix reaction events with target metadata", () => { + expect( + __testing.normalizeMatrixQaObservedEvent("!room:matrix-qa.test", { + event_id: "$reaction", + sender: "@driver:matrix-qa.test", + type: "m.reaction", + origin_server_ts: 1_700_000_000_000, + content: { + "m.relates_to": { + rel_type: "m.annotation", + event_id: "$msg", + key: "👍", + }, + }, + }), + ).toEqual({ + roomId: "!room:matrix-qa.test", + eventId: "$reaction", + sender: "@driver:matrix-qa.test", + type: "m.reaction", + originServerTs: 1_700_000_000_000, + relatesTo: { + eventId: "$msg", + relType: "m.annotation", + }, + reaction: { + eventId: "$msg", + key: "👍", + }, + }); + }); + + it("advances Matrix registration through token then dummy auth stages", () => { + const firstStage = __testing.resolveNextRegistrationAuth({ + registrationToken: "reg-token", + response: { + session: "uiaa-session", + flows: [{ stages: ["m.login.registration_token", "m.login.dummy"] }], + }, + }); + + expect(firstStage).toEqual({ + session: "uiaa-session", + type: "m.login.registration_token", + token: "reg-token", + }); + + expect( + __testing.resolveNextRegistrationAuth({ + registrationToken: "reg-token", + response: { + session: "uiaa-session", + completed: ["m.login.registration_token"], + flows: [{ stages: ["m.login.registration_token", "m.login.dummy"] }], + }, + }), + ).toEqual({ + session: "uiaa-session", + type: "m.login.dummy", + }); + }); + + it("rejects Matrix UIAA flows that require unsupported stages", () => { + expect(() => + __testing.resolveNextRegistrationAuth({ + registrationToken: "reg-token", + response: { + session: "uiaa-session", + flows: [{ stages: ["m.login.registration_token", "m.login.recaptcha", "m.login.dummy"] }], + }, + }), + ).toThrow("Matrix registration requires unsupported auth stages:"); + }); + + it("returns a typed no-match result while preserving the latest sync token", async () => { + const fetchImpl: typeof fetch = async () => + new Response( + JSON.stringify({ + next_batch: "next-batch-2", + rooms: { + join: { + "!room:matrix-qa.test": { + timeline: { + events: [ + { + event_id: "$driver", + sender: "@driver:matrix-qa.test", + type: "m.room.message", + content: { body: "hello", msgtype: "m.text" }, + }, + ], + }, + }, + }, + }, + }), + { status: 200, headers: { "content-type": "application/json" } }, + ); + + const client = createMatrixQaClient({ + accessToken: "token", + baseUrl: "http://127.0.0.1:28008/", + fetchImpl, + }); + const observedEvents: MatrixQaObservedEvent[] = []; + + const result = await client.waitForOptionalRoomEvent({ + observedEvents, + predicate: (event) => event.sender === "@sut:matrix-qa.test", + roomId: "!room:matrix-qa.test", + since: "start-batch", + timeoutMs: 1, + }); + + expect(result).toEqual({ + matched: false, + since: "next-batch-2", + }); + expect(observedEvents).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + body: "hello", + eventId: "$driver", + roomId: "!room:matrix-qa.test", + sender: "@driver:matrix-qa.test", + type: "m.room.message", + }), + ]), + ); + }); + + it("sends Matrix reactions through the protocol send endpoint", async () => { + const fetchImpl: typeof fetch = async (input, init) => { + expect(resolveRequestUrl(input)).toContain( + "/_matrix/client/v3/rooms/!room%3Amatrix-qa.test/send/m.reaction/", + ); + expect(parseJsonRequestBody(init)).toEqual({ + "m.relates_to": { + rel_type: "m.annotation", + event_id: "$msg-1", + key: "👍", + }, + }); + return new Response(JSON.stringify({ event_id: "$reaction-1" }), { + status: 200, + headers: { "content-type": "application/json" }, + }); + }; + + const client = createMatrixQaClient({ + accessToken: "token", + baseUrl: "http://127.0.0.1:28008/", + fetchImpl, + }); + + await expect( + client.sendReaction({ + emoji: "👍", + messageId: "$msg-1", + roomId: "!room:matrix-qa.test", + }), + ).resolves.toBe("$reaction-1"); + }); + + it("provisions a three-member room so Matrix QA runs in a group context", async () => { + const createRoomBodies: Array> = []; + const fetchImpl: typeof fetch = async (input, init) => { + const url = resolveRequestUrl(input); + const body = parseJsonRequestBody(init); + if (url.endsWith("/_matrix/client/v3/register")) { + const username = typeof body.username === "string" ? body.username : ""; + const auth = typeof body.auth === "object" && body.auth ? body.auth : undefined; + if (!auth) { + return new Response( + JSON.stringify({ + session: `session-${username}`, + flows: [{ stages: ["m.login.registration_token", "m.login.dummy"] }], + }), + { status: 401, headers: { "content-type": "application/json" } }, + ); + } + if ((auth as { type?: string }).type === "m.login.registration_token") { + return new Response( + JSON.stringify({ + session: `session-${username}`, + completed: ["m.login.registration_token"], + flows: [{ stages: ["m.login.registration_token", "m.login.dummy"] }], + }), + { status: 401, headers: { "content-type": "application/json" } }, + ); + } + return new Response( + JSON.stringify({ + access_token: `token-${username}`, + device_id: `device-${username}`, + user_id: `@${username}:matrix-qa.test`, + }), + { status: 200, headers: { "content-type": "application/json" } }, + ); + } + if (url.endsWith("/_matrix/client/v3/createRoom")) { + createRoomBodies.push(body); + return new Response(JSON.stringify({ room_id: "!room:matrix-qa.test" }), { + status: 200, + headers: { "content-type": "application/json" }, + }); + } + if (url.includes("/_matrix/client/v3/join/")) { + return new Response(JSON.stringify({ room_id: "!room:matrix-qa.test" }), { + status: 200, + headers: { "content-type": "application/json" }, + }); + } + throw new Error(`unexpected fetch ${url}`); + }; + + const result = await provisionMatrixQaRoom({ + baseUrl: "http://127.0.0.1:28008/", + driverLocalpart: "qa-driver", + observerLocalpart: "qa-observer", + registrationToken: "reg-token", + roomName: "OpenClaw Matrix QA", + sutLocalpart: "qa-sut", + fetchImpl, + }); + + expect(result.roomId).toBe("!room:matrix-qa.test"); + expect(result.observer.userId).toBe("@qa-observer:matrix-qa.test"); + expect(createRoomBodies).toEqual([ + expect.objectContaining({ + invite: ["@qa-sut:matrix-qa.test", "@qa-observer:matrix-qa.test"], + is_direct: false, + preset: "private_chat", + }), + ]); + }); +}); diff --git a/extensions/qa-lab/src/live-transports/matrix/matrix-driver-client.ts b/extensions/qa-lab/src/live-transports/matrix/matrix-driver-client.ts new file mode 100644 index 00000000000..21d52b4e7b5 --- /dev/null +++ b/extensions/qa-lab/src/live-transports/matrix/matrix-driver-client.ts @@ -0,0 +1,724 @@ +import { randomUUID } from "node:crypto"; +import { formatErrorMessage } from "openclaw/plugin-sdk/error-runtime"; + +type FetchLike = typeof fetch; + +type MatrixQaAuthStage = "m.login.dummy" | "m.login.registration_token"; + +type MatrixQaRequestResult = { + status: number; + body: T; +}; + +type MatrixQaRegisterResponse = { + access_token?: string; + device_id?: string; + user_id?: string; +}; + +type MatrixQaRoomCreateResponse = { + room_id?: string; +}; + +type MatrixQaSendMessageContent = { + body: string; + format?: "org.matrix.custom.html"; + formatted_body?: string; + "m.mentions"?: { + user_ids?: string[]; + }; + "m.relates_to"?: { + rel_type: "m.thread"; + event_id: string; + is_falling_back: true; + "m.in_reply_to": { + event_id: string; + }; + }; + msgtype: "m.text"; +}; + +type MatrixQaSendReactionContent = { + "m.relates_to": { + event_id: string; + key: string; + rel_type: "m.annotation"; + }; +}; + +type MatrixQaSyncResponse = { + next_batch?: string; + rooms?: { + join?: Record< + string, + { + timeline?: { + events?: MatrixQaRoomEvent[]; + }; + } + >; + }; +}; + +type MatrixQaUiaaResponse = { + completed?: string[]; + flows?: Array<{ stages?: string[] }>; + session?: string; +}; + +type MatrixQaRoomEvent = { + content?: Record; + event_id?: string; + origin_server_ts?: number; + sender?: string; + state_key?: string; + type?: string; +}; + +export type MatrixQaObservedEvent = { + roomId: string; + eventId: string; + sender?: string; + stateKey?: string; + type: string; + originServerTs?: number; + body?: string; + formattedBody?: string; + msgtype?: string; + membership?: string; + relatesTo?: { + eventId?: string; + inReplyToId?: string; + isFallingBack?: boolean; + relType?: string; + }; + mentions?: { + room?: boolean; + userIds?: string[]; + }; + reaction?: { + eventId?: string; + key?: string; + }; +}; + +export type MatrixQaRegisteredAccount = { + accessToken: string; + deviceId?: string; + localpart: string; + password: string; + userId: string; +}; + +export type MatrixQaProvisionResult = { + driver: MatrixQaRegisteredAccount; + observer: MatrixQaRegisteredAccount; + roomId: string; + sut: MatrixQaRegisteredAccount; +}; + +export type MatrixQaRoomEventWaitResult = + | { + event: MatrixQaObservedEvent; + matched: true; + since?: string; + } + | { + matched: false; + since?: string; + }; + +function buildMatrixThreadRelation(threadRootEventId: string, replyToEventId?: string) { + return { + "m.relates_to": { + rel_type: "m.thread" as const, + event_id: threadRootEventId, + is_falling_back: true as const, + "m.in_reply_to": { + event_id: replyToEventId?.trim() || threadRootEventId, + }, + }, + }; +} + +function buildMatrixReactionRelation( + messageId: string, + emoji: string, +): MatrixQaSendReactionContent { + const normalizedMessageId = messageId.trim(); + const normalizedEmoji = emoji.trim(); + if (!normalizedMessageId) { + throw new Error("Matrix reaction requires a messageId"); + } + if (!normalizedEmoji) { + throw new Error("Matrix reaction requires an emoji"); + } + return { + "m.relates_to": { + rel_type: "m.annotation", + event_id: normalizedMessageId, + key: normalizedEmoji, + }, + }; +} + +function escapeMatrixHtml(value: string): string { + return value.replace(/[&<>"']/g, (char) => { + switch (char) { + case "&": + return "&"; + case "<": + return "<"; + case ">": + return ">"; + case '"': + return """; + case "'": + return "'"; + default: + return char; + } + }); +} + +function buildMatrixMentionLink(userId: string) { + const href = `https://matrix.to/#/${encodeURIComponent(userId)}`; + const label = escapeMatrixHtml(userId); + return `${label}`; +} + +function buildMatrixQaMessageContent(params: { + body: string; + mentionUserIds?: string[]; + replyToEventId?: string; + threadRootEventId?: string; +}): MatrixQaSendMessageContent { + const body = params.body; + const uniqueMentionUserIds = [...new Set(params.mentionUserIds?.filter(Boolean) ?? [])]; + const formattedParts: string[] = []; + let cursor = 0; + let usedFormattedMention = false; + + while (cursor < body.length) { + let matchedUserId: string | null = null; + for (const userId of uniqueMentionUserIds) { + if (body.startsWith(userId, cursor)) { + matchedUserId = userId; + break; + } + } + if (matchedUserId) { + formattedParts.push(buildMatrixMentionLink(matchedUserId)); + cursor += matchedUserId.length; + usedFormattedMention = true; + continue; + } + formattedParts.push(escapeMatrixHtml(body[cursor] ?? "")); + cursor += 1; + } + + return { + body, + msgtype: "m.text", + ...(usedFormattedMention + ? { + format: "org.matrix.custom.html" as const, + formatted_body: formattedParts.join(""), + } + : {}), + ...(uniqueMentionUserIds.length > 0 + ? { "m.mentions": { user_ids: uniqueMentionUserIds } } + : {}), + ...(params.threadRootEventId + ? buildMatrixThreadRelation(params.threadRootEventId, params.replyToEventId) + : {}), + }; +} + +function normalizeMentionUserIds(value: unknown) { + return Array.isArray(value) + ? value.filter((entry): entry is string => typeof entry === "string" && entry.trim().length > 0) + : undefined; +} + +export function normalizeMatrixQaObservedEvent( + roomId: string, + event: MatrixQaRoomEvent, +): MatrixQaObservedEvent | null { + const eventId = event.event_id?.trim(); + const type = event.type?.trim(); + if (!eventId || !type) { + return null; + } + const content = event.content ?? {}; + const relatesToRaw = content["m.relates_to"]; + const relatesTo = + typeof relatesToRaw === "object" && relatesToRaw !== null + ? (relatesToRaw as Record) + : null; + const inReplyToRaw = relatesTo?.["m.in_reply_to"]; + const inReplyTo = + typeof inReplyToRaw === "object" && inReplyToRaw !== null + ? (inReplyToRaw as Record) + : null; + const mentionsRaw = content["m.mentions"]; + const mentions = + typeof mentionsRaw === "object" && mentionsRaw !== null + ? (mentionsRaw as Record) + : null; + const mentionUserIds = normalizeMentionUserIds(mentions?.user_ids); + const reactionKey = + type === "m.reaction" && typeof relatesTo?.key === "string" ? relatesTo.key : undefined; + const reactionEventId = + type === "m.reaction" && typeof relatesTo?.event_id === "string" + ? relatesTo.event_id + : undefined; + + return { + roomId, + eventId, + sender: typeof event.sender === "string" ? event.sender : undefined, + stateKey: typeof event.state_key === "string" ? event.state_key : undefined, + type, + originServerTs: + typeof event.origin_server_ts === "number" ? Math.floor(event.origin_server_ts) : undefined, + body: typeof content.body === "string" ? content.body : undefined, + formattedBody: typeof content.formatted_body === "string" ? content.formatted_body : undefined, + msgtype: typeof content.msgtype === "string" ? content.msgtype : undefined, + membership: typeof content.membership === "string" ? content.membership : undefined, + ...(relatesTo + ? { + relatesTo: { + eventId: typeof relatesTo.event_id === "string" ? relatesTo.event_id : undefined, + inReplyToId: typeof inReplyTo?.event_id === "string" ? inReplyTo.event_id : undefined, + isFallingBack: + typeof relatesTo.is_falling_back === "boolean" + ? relatesTo.is_falling_back + : undefined, + relType: typeof relatesTo.rel_type === "string" ? relatesTo.rel_type : undefined, + }, + } + : {}), + ...(mentions + ? { + mentions: { + ...(mentions.room === true ? { room: true } : {}), + ...(mentionUserIds ? { userIds: mentionUserIds } : {}), + }, + } + : {}), + ...(reactionEventId || reactionKey + ? { + reaction: { + ...(reactionEventId ? { eventId: reactionEventId } : {}), + ...(reactionKey ? { key: reactionKey } : {}), + }, + } + : {}), + }; +} + +export function resolveNextRegistrationAuth(params: { + registrationToken: string; + response: MatrixQaUiaaResponse; +}) { + const session = params.response.session?.trim(); + if (!session) { + throw new Error("Matrix registration UIAA response did not include a session id."); + } + + const completed = new Set( + (params.response.completed ?? []).filter( + (stage): stage is MatrixQaAuthStage => + stage === "m.login.dummy" || stage === "m.login.registration_token", + ), + ); + const supportedStages = new Set([ + "m.login.registration_token", + "m.login.dummy", + ]); + + for (const flow of params.response.flows ?? []) { + const flowStages = flow.stages ?? []; + if ( + flowStages.length === 0 || + flowStages.some((stage) => !supportedStages.has(stage as MatrixQaAuthStage)) + ) { + continue; + } + const stages = flowStages as MatrixQaAuthStage[]; + const nextStage = stages.find((stage) => !completed.has(stage)); + if (!nextStage) { + continue; + } + if (nextStage === "m.login.registration_token") { + return { + session, + type: nextStage, + token: params.registrationToken, + }; + } + return { + session, + type: nextStage, + }; + } + + throw new Error( + `Matrix registration requires unsupported auth stages: ${JSON.stringify(params.response.flows ?? [])}`, + ); +} + +async function requestMatrixJson(params: { + accessToken?: string; + baseUrl: string; + body?: unknown; + endpoint: string; + fetchImpl: FetchLike; + method: "GET" | "POST" | "PUT"; + okStatuses?: number[]; + query?: Record; + timeoutMs?: number; +}) { + const url = new URL(params.endpoint, params.baseUrl); + for (const [key, value] of Object.entries(params.query ?? {})) { + if (value !== undefined) { + url.searchParams.set(key, String(value)); + } + } + const response = await params.fetchImpl(url, { + method: params.method, + headers: { + accept: "application/json", + ...(params.body !== undefined ? { "content-type": "application/json" } : {}), + ...(params.accessToken ? { authorization: `Bearer ${params.accessToken}` } : {}), + }, + ...(params.body !== undefined ? { body: JSON.stringify(params.body) } : {}), + signal: AbortSignal.timeout(params.timeoutMs ?? 20_000), + }); + let body: unknown = {}; + try { + body = (await response.json()) as unknown; + } catch { + body = {}; + } + const okStatuses = params.okStatuses ?? [200]; + if (!okStatuses.includes(response.status)) { + const details = + typeof body === "object" && + body !== null && + typeof (body as { error?: unknown }).error === "string" + ? (body as { error: string }).error + : `${params.method} ${params.endpoint} failed with status ${response.status}`; + throw new Error(details); + } + return { + status: response.status, + body: body as T, + } satisfies MatrixQaRequestResult; +} + +function buildRegisteredAccount(params: { + localpart: string; + password: string; + response: MatrixQaRegisterResponse; +}) { + const userId = params.response.user_id?.trim(); + const accessToken = params.response.access_token?.trim(); + if (!userId || !accessToken) { + throw new Error("Matrix registration did not return both user_id and access_token."); + } + return { + accessToken, + deviceId: params.response.device_id?.trim() || undefined, + localpart: params.localpart, + password: params.password, + userId, + } satisfies MatrixQaRegisteredAccount; +} + +export function createMatrixQaClient(params: { + accessToken?: string; + baseUrl: string; + fetchImpl?: FetchLike; +}) { + const fetchImpl = params.fetchImpl ?? fetch; + + async function waitForOptionalRoomEvent(opts: { + observedEvents: MatrixQaObservedEvent[]; + predicate: (event: MatrixQaObservedEvent) => boolean; + roomId: string; + since?: string; + timeoutMs: number; + }): Promise { + const startedAt = Date.now(); + let since = opts.since; + while (Date.now() - startedAt < opts.timeoutMs) { + const remainingMs = Math.max(1_000, opts.timeoutMs - (Date.now() - startedAt)); + const response = await requestMatrixJson({ + accessToken: params.accessToken, + baseUrl: params.baseUrl, + endpoint: "/_matrix/client/v3/sync", + fetchImpl, + method: "GET", + query: { + ...(since ? { since } : {}), + timeout: Math.min(10_000, remainingMs), + }, + timeoutMs: Math.min(15_000, remainingMs + 5_000), + }); + since = response.body.next_batch?.trim() || since; + const roomEvents = response.body.rooms?.join?.[opts.roomId]?.timeline?.events ?? []; + for (const event of roomEvents) { + const normalized = normalizeMatrixQaObservedEvent(opts.roomId, event); + if (!normalized) { + continue; + } + opts.observedEvents.push(normalized); + if (opts.predicate(normalized)) { + return { event: normalized, matched: true, since }; + } + } + } + return { matched: false, since }; + } + + return { + async createPrivateRoom(opts: { inviteUserIds: string[]; name: string }) { + const result = await requestMatrixJson({ + accessToken: params.accessToken, + baseUrl: params.baseUrl, + body: { + creation_content: { "m.federate": false }, + initial_state: [ + { + type: "m.room.history_visibility", + state_key: "", + content: { history_visibility: "joined" }, + }, + ], + invite: opts.inviteUserIds, + is_direct: false, + name: opts.name, + preset: "private_chat", + }, + endpoint: "/_matrix/client/v3/createRoom", + fetchImpl, + method: "POST", + }); + const roomId = result.body.room_id?.trim(); + if (!roomId) { + throw new Error("Matrix createRoom did not return room_id."); + } + return roomId; + }, + async primeRoom() { + const response = await requestMatrixJson({ + accessToken: params.accessToken, + baseUrl: params.baseUrl, + endpoint: "/_matrix/client/v3/sync", + fetchImpl, + method: "GET", + query: { timeout: 0 }, + }); + return response.body.next_batch?.trim() || undefined; + }, + async registerWithToken(opts: { + deviceName: string; + localpart: string; + password: string; + registrationToken: string; + }) { + let auth: Record | undefined; + const baseBody = { + inhibit_login: false, + initial_device_display_name: opts.deviceName, + password: opts.password, + username: opts.localpart, + }; + for (let attempt = 0; attempt < 4; attempt += 1) { + const response = await requestMatrixJson({ + baseUrl: params.baseUrl, + body: { + ...baseBody, + ...(auth ? { auth } : {}), + }, + endpoint: "/_matrix/client/v3/register", + fetchImpl, + method: "POST", + okStatuses: [200, 401], + timeoutMs: 30_000, + }); + if (response.status === 200) { + return buildRegisteredAccount({ + localpart: opts.localpart, + password: opts.password, + response: response.body as MatrixQaRegisterResponse, + }); + } + auth = resolveNextRegistrationAuth({ + registrationToken: opts.registrationToken, + response: response.body as MatrixQaUiaaResponse, + }); + } + throw new Error( + `Matrix registration for ${opts.localpart} did not complete after 4 attempts.`, + ); + }, + async sendTextMessage(opts: { + body: string; + mentionUserIds?: string[]; + replyToEventId?: string; + roomId: string; + threadRootEventId?: string; + }) { + const txnId = randomUUID(); + const result = await requestMatrixJson<{ event_id?: string }>({ + accessToken: params.accessToken, + baseUrl: params.baseUrl, + body: buildMatrixQaMessageContent(opts), + endpoint: `/_matrix/client/v3/rooms/${encodeURIComponent(opts.roomId)}/send/m.room.message/${encodeURIComponent(txnId)}`, + fetchImpl, + method: "PUT", + }); + const eventId = result.body.event_id?.trim(); + if (!eventId) { + throw new Error("Matrix sendMessage did not return event_id."); + } + return eventId; + }, + async sendReaction(opts: { emoji: string; messageId: string; roomId: string }) { + const txnId = randomUUID(); + const result = await requestMatrixJson<{ event_id?: string }>({ + accessToken: params.accessToken, + baseUrl: params.baseUrl, + body: buildMatrixReactionRelation(opts.messageId, opts.emoji), + endpoint: `/_matrix/client/v3/rooms/${encodeURIComponent(opts.roomId)}/send/m.reaction/${encodeURIComponent(txnId)}`, + fetchImpl, + method: "PUT", + }); + const eventId = result.body.event_id?.trim(); + if (!eventId) { + throw new Error("Matrix sendReaction did not return event_id."); + } + return eventId; + }, + async joinRoom(roomId: string) { + const result = await requestMatrixJson<{ room_id?: string }>({ + accessToken: params.accessToken, + baseUrl: params.baseUrl, + body: {}, + endpoint: `/_matrix/client/v3/join/${encodeURIComponent(roomId)}`, + fetchImpl, + method: "POST", + }); + return result.body.room_id?.trim() || roomId; + }, + waitForOptionalRoomEvent, + async waitForRoomEvent(opts: { + observedEvents: MatrixQaObservedEvent[]; + predicate: (event: MatrixQaObservedEvent) => boolean; + roomId: string; + since?: string; + timeoutMs: number; + }) { + const result = await waitForOptionalRoomEvent(opts); + if (result.matched) { + return { event: result.event, since: result.since }; + } + throw new Error(`timed out after ${opts.timeoutMs}ms waiting for Matrix room event`); + }, + }; +} + +async function joinRoomWithRetry(params: { + accessToken: string; + baseUrl: string; + fetchImpl?: FetchLike; + roomId: string; +}) { + const client = createMatrixQaClient({ + accessToken: params.accessToken, + baseUrl: params.baseUrl, + fetchImpl: params.fetchImpl, + }); + let lastError: unknown = null; + for (let attempt = 1; attempt <= 10; attempt += 1) { + try { + await client.joinRoom(params.roomId); + return; + } catch (error) { + lastError = error; + await new Promise((resolve) => setTimeout(resolve, 300 * attempt)); + } + } + throw new Error(`Matrix join retry failed: ${formatErrorMessage(lastError)}`); +} + +export async function provisionMatrixQaRoom(params: { + baseUrl: string; + fetchImpl?: FetchLike; + roomName: string; + driverLocalpart: string; + observerLocalpart: string; + registrationToken: string; + sutLocalpart: string; +}) { + const anonClient = createMatrixQaClient({ + baseUrl: params.baseUrl, + fetchImpl: params.fetchImpl, + }); + const driver = await anonClient.registerWithToken({ + deviceName: "OpenClaw Matrix QA Driver", + localpart: params.driverLocalpart, + password: `driver-${randomUUID()}`, + registrationToken: params.registrationToken, + }); + const sut = await anonClient.registerWithToken({ + deviceName: "OpenClaw Matrix QA SUT", + localpart: params.sutLocalpart, + password: `sut-${randomUUID()}`, + registrationToken: params.registrationToken, + }); + const observer = await anonClient.registerWithToken({ + deviceName: "OpenClaw Matrix QA Observer", + localpart: params.observerLocalpart, + password: `observer-${randomUUID()}`, + registrationToken: params.registrationToken, + }); + const driverClient = createMatrixQaClient({ + accessToken: driver.accessToken, + baseUrl: params.baseUrl, + fetchImpl: params.fetchImpl, + }); + const roomId = await driverClient.createPrivateRoom({ + inviteUserIds: [sut.userId, observer.userId], + name: params.roomName, + }); + await joinRoomWithRetry({ + accessToken: sut.accessToken, + baseUrl: params.baseUrl, + fetchImpl: params.fetchImpl, + roomId, + }); + await joinRoomWithRetry({ + accessToken: observer.accessToken, + baseUrl: params.baseUrl, + fetchImpl: params.fetchImpl, + roomId, + }); + return { + driver, + observer, + roomId, + sut, + } satisfies MatrixQaProvisionResult; +} + +export const __testing = { + buildMatrixQaMessageContent, + buildMatrixReactionRelation, + buildMatrixThreadRelation, + normalizeMatrixQaObservedEvent, + resolveNextRegistrationAuth, +}; diff --git a/extensions/qa-lab/src/live-transports/matrix/matrix-harness.runtime.test.ts b/extensions/qa-lab/src/live-transports/matrix/matrix-harness.runtime.test.ts new file mode 100644 index 00000000000..e1451fe2b39 --- /dev/null +++ b/extensions/qa-lab/src/live-transports/matrix/matrix-harness.runtime.test.ts @@ -0,0 +1,271 @@ +import { mkdtemp, readFile, rm } from "node:fs/promises"; +import os from "node:os"; +import path from "node:path"; +import { describe, expect, it, vi } from "vitest"; +import { + __testing, + startMatrixQaHarness, + writeMatrixQaHarnessFiles, +} from "./matrix-harness.runtime.js"; + +describe("matrix harness runtime", () => { + it("writes a pinned Tuwunel compose file and redacted manifest", async () => { + const outputDir = await mkdtemp(path.join(os.tmpdir(), "matrix-qa-harness-")); + + try { + const result = await writeMatrixQaHarnessFiles({ + outputDir, + homeserverPort: 28008, + registrationToken: "secret-token", + serverName: "matrix-qa.test", + }); + + const compose = await readFile(result.composeFile, "utf8"); + const manifest = JSON.parse(await readFile(result.manifestPath, "utf8")) as { + image: string; + serverName: string; + homeserverPort: number; + composeFile: string; + }; + + expect(compose).toContain(`image: ${__testing.MATRIX_QA_DEFAULT_IMAGE}`); + expect(compose).toContain(' - "127.0.0.1:28008:8008"'); + expect(compose).toContain('TUWUNEL_ALLOW_REGISTRATION: "true"'); + expect(compose).toContain('TUWUNEL_REGISTRATION_TOKEN: "secret-token"'); + expect(compose).toContain('TUWUNEL_SERVER_NAME: "matrix-qa.test"'); + expect(manifest).toEqual({ + image: __testing.MATRIX_QA_DEFAULT_IMAGE, + serverName: "matrix-qa.test", + homeserverPort: 28008, + composeFile: path.join(outputDir, "docker-compose.matrix-qa.yml"), + dataDir: path.join(outputDir, "data"), + }); + expect(result.registrationToken).toBe("secret-token"); + } finally { + await rm(outputDir, { recursive: true, force: true }); + } + }); + + it("starts the harness, waits for versions, and exposes a stop command", async () => { + const calls: string[] = []; + const fetchCalls: string[] = []; + const outputDir = await mkdtemp(path.join(os.tmpdir(), "matrix-qa-harness-")); + + try { + const result = await startMatrixQaHarness( + { + outputDir, + repoRoot: "/repo/openclaw", + homeserverPort: 28008, + }, + { + async runCommand(command, args, cwd) { + calls.push([command, ...args, `@${cwd}`].join(" ")); + if (args.join(" ").includes("ps --format json")) { + return { stdout: '[{"State":"running"}]\n', stderr: "" }; + } + return { stdout: "", stderr: "" }; + }, + fetchImpl: vi.fn(async (input: string) => { + fetchCalls.push(input); + return { ok: true }; + }), + sleepImpl: vi.fn(async () => {}), + resolveHostPortImpl: vi.fn(async (port: number) => port), + }, + ); + + expect(calls).toEqual([ + `docker compose -f ${outputDir}/docker-compose.matrix-qa.yml down --remove-orphans @/repo/openclaw`, + `docker compose -f ${outputDir}/docker-compose.matrix-qa.yml up -d @/repo/openclaw`, + `docker compose -f ${outputDir}/docker-compose.matrix-qa.yml ps --format json matrix-qa-homeserver @/repo/openclaw`, + ]); + expect(fetchCalls).toEqual([ + "http://127.0.0.1:28008/_matrix/client/versions", + "http://127.0.0.1:28008/_matrix/client/versions", + ]); + expect(result.baseUrl).toBe("http://127.0.0.1:28008/"); + expect(result.stopCommand).toBe( + `docker compose -f ${outputDir}/docker-compose.matrix-qa.yml down --remove-orphans`, + ); + } finally { + await rm(outputDir, { recursive: true, force: true }); + } + }); + + it("treats empty Docker health fields as a fallback to running state", async () => { + const outputDir = await mkdtemp(path.join(os.tmpdir(), "matrix-qa-harness-")); + + try { + const result = await startMatrixQaHarness( + { + outputDir, + repoRoot: "/repo/openclaw", + homeserverPort: 28008, + }, + { + async runCommand(_command, args) { + if (args.join(" ").includes("ps --format json")) { + return { stdout: '{"Health":"","State":"running"}\n', stderr: "" }; + } + return { stdout: "", stderr: "" }; + }, + fetchImpl: vi.fn(async () => ({ ok: true })), + sleepImpl: vi.fn(async () => {}), + resolveHostPortImpl: vi.fn(async (port: number) => port), + }, + ); + + expect(result.baseUrl).toBe("http://127.0.0.1:28008/"); + } finally { + await rm(outputDir, { recursive: true, force: true }); + } + }); + + it("falls back to the container IP when the host port is unreachable", async () => { + const calls: string[] = []; + const outputDir = await mkdtemp(path.join(os.tmpdir(), "matrix-qa-harness-")); + + try { + const result = await startMatrixQaHarness( + { + outputDir, + repoRoot: "/repo/openclaw", + homeserverPort: 28008, + }, + { + async runCommand(command, args, cwd) { + calls.push([command, ...args, `@${cwd}`].join(" ")); + const rendered = args.join(" "); + if (rendered.includes("ps --format json")) { + return { stdout: '{"State":"running"}\n', stderr: "" }; + } + if (rendered.includes("ps -q")) { + return { stdout: "container-123\n", stderr: "" }; + } + if (rendered.includes("inspect --format")) { + return { stdout: "172.18.0.10\n", stderr: "" }; + } + return { stdout: "", stderr: "" }; + }, + fetchImpl: vi.fn(async (input: string) => ({ + ok: input.startsWith("http://172.18.0.10:8008/"), + })), + sleepImpl: vi.fn(async () => {}), + resolveHostPortImpl: vi.fn(async (port: number) => port), + }, + ); + + expect(result.baseUrl).toBe("http://172.18.0.10:8008/"); + expect(calls).toContain( + `docker compose -f ${outputDir}/docker-compose.matrix-qa.yml ps -q matrix-qa-homeserver @/repo/openclaw`, + ); + expect(calls).toContain( + "docker inspect --format {{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}} container-123 @/repo/openclaw", + ); + } finally { + await rm(outputDir, { recursive: true, force: true }); + } + }); + + it("keeps the host URL when the container IP is also unreachable", async () => { + const fetchCalls: string[] = []; + const outputDir = await mkdtemp(path.join(os.tmpdir(), "matrix-qa-harness-")); + + try { + const result = await startMatrixQaHarness( + { + outputDir, + repoRoot: "/repo/openclaw", + homeserverPort: 28008, + }, + { + async runCommand(_command, args) { + const rendered = args.join(" "); + if (rendered.includes("ps --format json")) { + return { stdout: '{"State":"running"}\n', stderr: "" }; + } + if (rendered.includes("ps -q")) { + return { stdout: "container-123\n", stderr: "" }; + } + if (rendered.includes("inspect --format")) { + return { stdout: "172.18.0.10\n", stderr: "" }; + } + return { stdout: "", stderr: "" }; + }, + fetchImpl: vi.fn(async (input: string) => { + fetchCalls.push(input); + return { + ok: + input === "http://127.0.0.1:28008/_matrix/client/versions" && + fetchCalls.filter((url) => url === input).length > 1, + }; + }), + sleepImpl: vi.fn(async () => {}), + resolveHostPortImpl: vi.fn(async (port: number) => port), + }, + ); + + expect(result.baseUrl).toBe("http://127.0.0.1:28008/"); + expect(fetchCalls).toEqual([ + "http://127.0.0.1:28008/_matrix/client/versions", + "http://127.0.0.1:28008/_matrix/client/versions", + "http://127.0.0.1:28008/_matrix/client/versions", + ]); + } finally { + await rm(outputDir, { recursive: true, force: true }); + } + }); + + it("keeps probing the container URL until it becomes reachable", async () => { + const fetchCalls: string[] = []; + const outputDir = await mkdtemp(path.join(os.tmpdir(), "matrix-qa-harness-")); + + try { + const result = await startMatrixQaHarness( + { + outputDir, + repoRoot: "/repo/openclaw", + homeserverPort: 28008, + }, + { + async runCommand(_command, args) { + const rendered = args.join(" "); + if (rendered.includes("ps --format json")) { + return { stdout: '{"State":"running"}\n', stderr: "" }; + } + if (rendered.includes("ps -q")) { + return { stdout: "container-123\n", stderr: "" }; + } + if (rendered.includes("inspect --format")) { + return { stdout: "172.18.0.10\n", stderr: "" }; + } + return { stdout: "", stderr: "" }; + }, + fetchImpl: vi.fn(async (input: string) => { + fetchCalls.push(input); + return { + ok: + input === "http://172.18.0.10:8008/_matrix/client/versions" && + fetchCalls.filter((url) => url === input).length > 1, + }; + }), + sleepImpl: vi.fn(async () => {}), + resolveHostPortImpl: vi.fn(async (port: number) => port), + }, + ); + + expect(result.baseUrl).toBe("http://172.18.0.10:8008/"); + expect(fetchCalls).toEqual([ + "http://127.0.0.1:28008/_matrix/client/versions", + "http://127.0.0.1:28008/_matrix/client/versions", + "http://172.18.0.10:8008/_matrix/client/versions", + "http://127.0.0.1:28008/_matrix/client/versions", + "http://172.18.0.10:8008/_matrix/client/versions", + "http://172.18.0.10:8008/_matrix/client/versions", + ]); + } finally { + await rm(outputDir, { recursive: true, force: true }); + } + }); +}); diff --git a/extensions/qa-lab/src/live-transports/matrix/matrix-harness.runtime.ts b/extensions/qa-lab/src/live-transports/matrix/matrix-harness.runtime.ts new file mode 100644 index 00000000000..d10b6df32b4 --- /dev/null +++ b/extensions/qa-lab/src/live-transports/matrix/matrix-harness.runtime.ts @@ -0,0 +1,275 @@ +import { randomUUID } from "node:crypto"; +import fs from "node:fs/promises"; +import path from "node:path"; +import { setTimeout as sleep } from "node:timers/promises"; +import { + execCommand, + fetchHealthUrl, + resolveComposeServiceUrl, + resolveHostPort, + waitForDockerServiceHealth, + waitForHealth, + type FetchLike, + type RunCommand, +} from "../../docker-runtime.js"; + +const MATRIX_QA_DEFAULT_IMAGE = "ghcr.io/matrix-construct/tuwunel:v1.5.1"; +const MATRIX_QA_DEFAULT_SERVER_NAME = "matrix-qa.test"; +const MATRIX_QA_DEFAULT_PORT = 28008; +const MATRIX_QA_INTERNAL_PORT = 8008; +const MATRIX_QA_SERVICE = "matrix-qa-homeserver"; + +type MatrixQaHarnessManifest = { + image: string; + serverName: string; + homeserverPort: number; + composeFile: string; + dataDir: string; +}; + +export type MatrixQaHarnessFiles = { + outputDir: string; + composeFile: string; + manifestPath: string; + image: string; + serverName: string; + homeserverPort: number; + registrationToken: string; +}; + +export type MatrixQaHarness = MatrixQaHarnessFiles & { + baseUrl: string; + stopCommand: string; + stop(): Promise; +}; + +function buildVersionsUrl(baseUrl: string) { + return `${baseUrl}_matrix/client/versions`; +} + +async function isMatrixVersionsReachable(baseUrl: string, fetchImpl: FetchLike) { + return await fetchImpl(buildVersionsUrl(baseUrl)) + .then((response) => response.ok) + .catch(() => false); +} + +async function waitForReachableMatrixBaseUrl(params: { + composeFile: string; + containerBaseUrl: string | null; + fetchImpl: FetchLike; + hostBaseUrl: string; + sleepImpl: (ms: number) => Promise; + timeoutMs?: number; + pollMs?: number; +}) { + const timeoutMs = params.timeoutMs ?? 60_000; + const pollMs = params.pollMs ?? 1_000; + const startedAt = Date.now(); + + while (Date.now() - startedAt < timeoutMs) { + if (await isMatrixVersionsReachable(params.hostBaseUrl, params.fetchImpl)) { + return params.hostBaseUrl; + } + if ( + params.containerBaseUrl && + (await isMatrixVersionsReachable(params.containerBaseUrl, params.fetchImpl)) + ) { + return params.containerBaseUrl; + } + await params.sleepImpl(pollMs); + } + + const candidateLabel = params.containerBaseUrl + ? `${params.hostBaseUrl} or ${params.containerBaseUrl}` + : params.hostBaseUrl; + throw new Error( + [ + `Matrix homeserver did not become healthy within ${Math.round(timeoutMs / 1000)}s.`, + `Last checked: ${candidateLabel}`, + `Hint: check container logs with \`docker compose -f ${params.composeFile} logs ${MATRIX_QA_SERVICE}\`.`, + ].join("\n"), + ); +} + +function resolveMatrixQaHarnessImage(image?: string) { + return ( + image?.trim() || process.env.OPENCLAW_QA_MATRIX_TUWUNEL_IMAGE?.trim() || MATRIX_QA_DEFAULT_IMAGE + ); +} + +function renderMatrixQaCompose(params: { + homeserverPort: number; + image: string; + registrationToken: string; + serverName: string; +}) { + return `services: + ${MATRIX_QA_SERVICE}: + image: ${params.image} + ports: + - "127.0.0.1:${params.homeserverPort}:${MATRIX_QA_INTERNAL_PORT}" + environment: + TUWUNEL_ADDRESS: "0.0.0.0" + TUWUNEL_ALLOW_ENCRYPTION: "false" + TUWUNEL_ALLOW_FEDERATION: "false" + TUWUNEL_ALLOW_REGISTRATION: "true" + TUWUNEL_DATABASE_PATH: "/var/lib/tuwunel" + TUWUNEL_PORT: "${MATRIX_QA_INTERNAL_PORT}" + TUWUNEL_REGISTRATION_TOKEN: "${params.registrationToken}" + TUWUNEL_SERVER_NAME: "${params.serverName}" + volumes: + - ./data:/var/lib/tuwunel +`; +} + +export async function writeMatrixQaHarnessFiles(params: { + outputDir: string; + image?: string; + homeserverPort: number; + registrationToken?: string; + serverName?: string; +}): Promise { + const image = resolveMatrixQaHarnessImage(params.image); + const registrationToken = params.registrationToken?.trim() || `matrix-qa-${randomUUID()}`; + const serverName = params.serverName?.trim() || MATRIX_QA_DEFAULT_SERVER_NAME; + const composeFile = path.join(params.outputDir, "docker-compose.matrix-qa.yml"); + const dataDir = path.join(params.outputDir, "data"); + const manifestPath = path.join(params.outputDir, "matrix-qa-harness.json"); + + await fs.mkdir(dataDir, { recursive: true }); + await fs.writeFile( + composeFile, + `${renderMatrixQaCompose({ + homeserverPort: params.homeserverPort, + image, + registrationToken, + serverName, + })}\n`, + { encoding: "utf8", mode: 0o600 }, + ); + const manifest: MatrixQaHarnessManifest = { + image, + serverName, + homeserverPort: params.homeserverPort, + composeFile, + dataDir, + }; + await fs.writeFile(manifestPath, `${JSON.stringify(manifest, null, 2)}\n`, { + encoding: "utf8", + mode: 0o600, + }); + + return { + outputDir: params.outputDir, + composeFile, + manifestPath, + image, + serverName, + homeserverPort: params.homeserverPort, + registrationToken, + }; +} + +export async function startMatrixQaHarness( + params: { + outputDir: string; + repoRoot?: string; + image?: string; + homeserverPort?: number; + serverName?: string; + }, + deps?: { + fetchImpl?: FetchLike; + runCommand?: RunCommand; + sleepImpl?: (ms: number) => Promise; + resolveHostPortImpl?: typeof resolveHostPort; + }, +): Promise { + const repoRoot = path.resolve(params.repoRoot ?? process.cwd()); + const resolveHostPortImpl = deps?.resolveHostPortImpl ?? resolveHostPort; + const runCommand = deps?.runCommand ?? execCommand; + const fetchImpl = deps?.fetchImpl ?? fetchHealthUrl; + const sleepImpl = deps?.sleepImpl ?? sleep; + const homeserverPort = await resolveHostPortImpl( + params.homeserverPort ?? MATRIX_QA_DEFAULT_PORT, + params.homeserverPort != null, + ); + const files = await writeMatrixQaHarnessFiles({ + outputDir: path.resolve(params.outputDir), + image: params.image, + homeserverPort, + serverName: params.serverName, + }); + + try { + await runCommand( + "docker", + ["compose", "-f", files.composeFile, "down", "--remove-orphans"], + repoRoot, + ); + } catch { + // First run or already stopped. + } + + await runCommand("docker", ["compose", "-f", files.composeFile, "up", "-d"], repoRoot); + await sleepImpl(1_000); + await waitForDockerServiceHealth( + MATRIX_QA_SERVICE, + files.composeFile, + repoRoot, + runCommand, + sleepImpl, + ); + + const hostBaseUrl = `http://127.0.0.1:${homeserverPort}/`; + let baseUrl = hostBaseUrl; + const hostReachable = await isMatrixVersionsReachable(hostBaseUrl, fetchImpl); + if (!hostReachable) { + const containerBaseUrl = await resolveComposeServiceUrl( + MATRIX_QA_SERVICE, + MATRIX_QA_INTERNAL_PORT, + files.composeFile, + repoRoot, + runCommand, + ); + baseUrl = await waitForReachableMatrixBaseUrl({ + composeFile: files.composeFile, + containerBaseUrl, + fetchImpl, + hostBaseUrl, + sleepImpl, + }); + } + + await waitForHealth(buildVersionsUrl(baseUrl), { + label: "Matrix homeserver", + composeFile: files.composeFile, + fetchImpl, + sleepImpl, + }); + + return { + ...files, + baseUrl, + stopCommand: `docker compose -f ${files.composeFile} down --remove-orphans`, + async stop() { + await runCommand( + "docker", + ["compose", "-f", files.composeFile, "down", "--remove-orphans"], + repoRoot, + ); + }, + }; +} + +export const __testing = { + MATRIX_QA_DEFAULT_IMAGE, + MATRIX_QA_DEFAULT_PORT, + MATRIX_QA_DEFAULT_SERVER_NAME, + MATRIX_QA_SERVICE, + buildVersionsUrl, + isMatrixVersionsReachable, + renderMatrixQaCompose, + resolveMatrixQaHarnessImage, + waitForReachableMatrixBaseUrl, +}; diff --git a/extensions/qa-lab/src/live-transports/matrix/matrix-live-scenarios.test.ts b/extensions/qa-lab/src/live-transports/matrix/matrix-live-scenarios.test.ts new file mode 100644 index 00000000000..27ae4fab677 --- /dev/null +++ b/extensions/qa-lab/src/live-transports/matrix/matrix-live-scenarios.test.ts @@ -0,0 +1,153 @@ +import { describe, expect, it, beforeEach, vi } from "vitest"; +const { createMatrixQaClient } = vi.hoisted(() => ({ + createMatrixQaClient: vi.fn(), +})); + +vi.mock("./matrix-driver-client.js", () => ({ + createMatrixQaClient, +})); + +import { + LIVE_TRANSPORT_BASELINE_STANDARD_SCENARIO_IDS, + findMissingLiveTransportStandardScenarios, +} from "../shared/live-transport-scenarios.js"; +import { + __testing as scenarioTesting, + MATRIX_QA_SCENARIOS, + runMatrixQaScenario, +} from "./matrix-live-scenarios.js"; + +describe("matrix live qa scenarios", () => { + beforeEach(() => { + createMatrixQaClient.mockReset(); + }); + + it("ships the Matrix live QA scenario set by default", () => { + expect(scenarioTesting.findMatrixQaScenarios().map((scenario) => scenario.id)).toEqual([ + "matrix-thread-follow-up", + "matrix-thread-isolation", + "matrix-top-level-reply-shape", + "matrix-reaction-notification", + "matrix-restart-resume", + "matrix-mention-gating", + "matrix-allowlist-block", + ]); + }); + + it("uses the repo-wide exact marker prompt shape for Matrix mentions", () => { + expect( + scenarioTesting.buildMentionPrompt("@sut:matrix-qa.test", "MATRIX_QA_CANARY_TOKEN"), + ).toBe("@sut:matrix-qa.test reply with only this exact marker: MATRIX_QA_CANARY_TOKEN"); + }); + + it("requires Matrix replies to match the exact marker body", () => { + expect( + scenarioTesting.buildMatrixReplyArtifact( + { + roomId: "!room:matrix-qa.test", + eventId: "$event", + sender: "@sut:matrix-qa.test", + type: "m.room.message", + body: "MATRIX_QA_TOKEN", + }, + "MATRIX_QA_TOKEN", + ).tokenMatched, + ).toBe(true); + expect( + scenarioTesting.buildMatrixReplyArtifact( + { + roomId: "!room:matrix-qa.test", + eventId: "$event-2", + sender: "@sut:matrix-qa.test", + type: "m.room.message", + body: "prefix MATRIX_QA_TOKEN suffix", + }, + "MATRIX_QA_TOKEN", + ).tokenMatched, + ).toBe(false); + }); + + it("fails when any requested Matrix scenario id is unknown", () => { + expect(() => + scenarioTesting.findMatrixQaScenarios(["matrix-thread-follow-up", "typo-scenario"]), + ).toThrow("unknown Matrix QA scenario id(s): typo-scenario"); + }); + + it("covers the baseline live transport contract plus Matrix-specific extras", () => { + expect(scenarioTesting.MATRIX_QA_STANDARD_SCENARIO_IDS).toEqual([ + "canary", + "thread-follow-up", + "thread-isolation", + "top-level-reply-shape", + "reaction-observation", + "restart-resume", + "mention-gating", + "allowlist-block", + ]); + expect( + findMissingLiveTransportStandardScenarios({ + coveredStandardScenarioIds: scenarioTesting.MATRIX_QA_STANDARD_SCENARIO_IDS, + expectedStandardScenarioIds: LIVE_TRANSPORT_BASELINE_STANDARD_SCENARIO_IDS, + }), + ).toEqual([]); + }); + + it("primes the observer sync cursor instead of reusing the driver's cursor", async () => { + const primeRoom = vi.fn().mockResolvedValue("observer-sync-start"); + const sendTextMessage = vi.fn().mockResolvedValue("$observer-trigger"); + const waitForOptionalRoomEvent = vi.fn().mockImplementation(async (params) => { + expect(params.since).toBe("observer-sync-start"); + return { + matched: false, + since: "observer-sync-next", + }; + }); + + createMatrixQaClient.mockReturnValue({ + primeRoom, + sendTextMessage, + waitForOptionalRoomEvent, + }); + + const scenario = MATRIX_QA_SCENARIOS.find((entry) => entry.id === "matrix-allowlist-block"); + expect(scenario).toBeDefined(); + + const syncState = { + driver: "driver-sync-next", + }; + + await expect( + runMatrixQaScenario(scenario!, { + baseUrl: "http://127.0.0.1:28008/", + canary: undefined, + driverAccessToken: "driver-token", + driverUserId: "@driver:matrix-qa.test", + observedEvents: [], + observerAccessToken: "observer-token", + observerUserId: "@observer:matrix-qa.test", + roomId: "!room:matrix-qa.test", + restartGateway: undefined, + syncState, + sutUserId: "@sut:matrix-qa.test", + timeoutMs: 8_000, + }), + ).resolves.toMatchObject({ + artifacts: { + actorUserId: "@observer:matrix-qa.test", + expectedNoReplyWindowMs: 8_000, + }, + }); + + expect(createMatrixQaClient).toHaveBeenCalledWith({ + accessToken: "observer-token", + baseUrl: "http://127.0.0.1:28008/", + }); + expect(primeRoom).toHaveBeenCalledTimes(1); + expect(sendTextMessage).toHaveBeenCalledTimes(1); + expect(waitForOptionalRoomEvent).toHaveBeenCalledTimes(1); + expect(syncState).toEqual({ + driver: "driver-sync-next", + observer: "observer-sync-next", + }); + }); +}); diff --git a/extensions/qa-lab/src/live-transports/matrix/matrix-live-scenarios.ts b/extensions/qa-lab/src/live-transports/matrix/matrix-live-scenarios.ts new file mode 100644 index 00000000000..fe4979e9eeb --- /dev/null +++ b/extensions/qa-lab/src/live-transports/matrix/matrix-live-scenarios.ts @@ -0,0 +1,670 @@ +import { randomUUID } from "node:crypto"; +import { + collectLiveTransportStandardScenarioCoverage, + selectLiveTransportScenarios, + type LiveTransportScenarioDefinition, +} from "../shared/live-transport-scenarios.js"; +import { createMatrixQaClient, type MatrixQaObservedEvent } from "./matrix-driver-client.js"; + +export type MatrixQaScenarioId = + | "matrix-thread-follow-up" + | "matrix-thread-isolation" + | "matrix-top-level-reply-shape" + | "matrix-reaction-notification" + | "matrix-restart-resume" + | "matrix-mention-gating" + | "matrix-allowlist-block"; + +export type MatrixQaScenarioDefinition = LiveTransportScenarioDefinition; + +export type MatrixQaReplyArtifact = { + bodyPreview?: string; + eventId: string; + mentions?: MatrixQaObservedEvent["mentions"]; + relatesTo?: MatrixQaObservedEvent["relatesTo"]; + sender?: string; + tokenMatched?: boolean; +}; + +export type MatrixQaCanaryArtifact = { + driverEventId: string; + reply: MatrixQaReplyArtifact; + token: string; +}; + +export type MatrixQaScenarioArtifacts = { + actorUserId?: string; + driverEventId?: string; + expectedNoReplyWindowMs?: number; + reactionEmoji?: string; + reactionEventId?: string; + reactionTargetEventId?: string; + reply?: MatrixQaReplyArtifact; + restartSignal?: string; + rootEventId?: string; + threadDriverEventId?: string; + threadReply?: MatrixQaReplyArtifact; + threadRootEventId?: string; + threadToken?: string; + token?: string; + topLevelDriverEventId?: string; + topLevelReply?: MatrixQaReplyArtifact; + topLevelToken?: string; + triggerBody?: string; +}; + +export type MatrixQaScenarioExecution = { + artifacts?: MatrixQaScenarioArtifacts; + details: string; +}; + +type MatrixQaActorId = "driver" | "observer"; + +type MatrixQaSyncState = Partial>; + +type MatrixQaScenarioContext = { + baseUrl: string; + canary?: MatrixQaCanaryArtifact; + driverAccessToken: string; + driverUserId: string; + observedEvents: MatrixQaObservedEvent[]; + observerAccessToken: string; + observerUserId: string; + restartGateway?: () => Promise; + roomId: string; + syncState: MatrixQaSyncState; + sutUserId: string; + timeoutMs: number; +}; + +const NO_REPLY_WINDOW_MS = 8_000; + +export const MATRIX_QA_SCENARIOS: MatrixQaScenarioDefinition[] = [ + { + id: "matrix-thread-follow-up", + standardId: "thread-follow-up", + timeoutMs: 60_000, + title: "Matrix thread follow-up reply", + }, + { + id: "matrix-thread-isolation", + standardId: "thread-isolation", + timeoutMs: 75_000, + title: "Matrix top-level reply stays out of prior thread", + }, + { + id: "matrix-top-level-reply-shape", + standardId: "top-level-reply-shape", + timeoutMs: 45_000, + title: "Matrix top-level reply keeps replyToMode off", + }, + { + id: "matrix-reaction-notification", + standardId: "reaction-observation", + timeoutMs: 45_000, + title: "Matrix reactions on bot replies are observed", + }, + { + id: "matrix-restart-resume", + standardId: "restart-resume", + timeoutMs: 60_000, + title: "Matrix lane resumes cleanly after gateway restart", + }, + { + id: "matrix-mention-gating", + standardId: "mention-gating", + timeoutMs: NO_REPLY_WINDOW_MS, + title: "Matrix room message without mention does not trigger", + }, + { + id: "matrix-allowlist-block", + standardId: "allowlist-block", + timeoutMs: NO_REPLY_WINDOW_MS, + title: "Matrix allowlist blocks non-driver replies", + }, +]; + +export const MATRIX_QA_STANDARD_SCENARIO_IDS = collectLiveTransportStandardScenarioCoverage({ + alwaysOnStandardScenarioIds: ["canary"], + scenarios: MATRIX_QA_SCENARIOS, +}); + +export function findMatrixQaScenarios(ids?: string[]) { + return selectLiveTransportScenarios({ + ids, + laneLabel: "Matrix", + scenarios: MATRIX_QA_SCENARIOS, + }); +} + +export function buildMentionPrompt(sutUserId: string, token: string) { + return `${sutUserId} reply with only this exact marker: ${token}`; +} + +function buildExactMarkerPrompt(token: string) { + return `reply with only this exact marker: ${token}`; +} + +function buildMatrixReplyArtifact( + event: MatrixQaObservedEvent, + token?: string, +): MatrixQaReplyArtifact { + const replyBody = event.body?.trim(); + return { + bodyPreview: replyBody?.slice(0, 200), + eventId: event.eventId, + mentions: event.mentions, + relatesTo: event.relatesTo, + sender: event.sender, + ...(token ? { tokenMatched: replyBody === token } : {}), + }; +} + +export function buildMatrixReplyDetails(label: string, artifact: MatrixQaReplyArtifact) { + return [ + `${label} event: ${artifact.eventId}`, + `${label} token matched: ${ + artifact.tokenMatched === undefined ? "n/a" : artifact.tokenMatched ? "yes" : "no" + }`, + `${label} rel_type: ${artifact.relatesTo?.relType ?? ""}`, + `${label} in_reply_to: ${artifact.relatesTo?.inReplyToId ?? ""}`, + `${label} is_falling_back: ${artifact.relatesTo?.isFallingBack === true ? "true" : "false"}`, + ]; +} + +function assertTopLevelReplyArtifact(label: string, artifact: MatrixQaReplyArtifact) { + if (!artifact.tokenMatched) { + throw new Error(`${label} did not contain the expected token`); + } + if (artifact.relatesTo !== undefined) { + throw new Error(`${label} unexpectedly included relation metadata`); + } +} + +function assertThreadReplyArtifact( + artifact: MatrixQaReplyArtifact, + params: { + expectedRootEventId: string; + label: string; + }, +) { + if (!artifact.tokenMatched) { + throw new Error(`${params.label} did not contain the expected token`); + } + if (artifact.relatesTo?.relType !== "m.thread") { + throw new Error(`${params.label} did not use m.thread`); + } + if (artifact.relatesTo.eventId !== params.expectedRootEventId) { + throw new Error( + `${params.label} targeted ${artifact.relatesTo.eventId ?? ""} instead of ${params.expectedRootEventId}`, + ); + } + if (artifact.relatesTo.isFallingBack !== true) { + throw new Error(`${params.label} did not set is_falling_back`); + } + if (!artifact.relatesTo.inReplyToId) { + throw new Error(`${params.label} did not set m.in_reply_to`); + } +} + +function readMatrixQaSyncCursor(syncState: MatrixQaSyncState, actorId: MatrixQaActorId) { + return syncState[actorId]; +} + +function writeMatrixQaSyncCursor( + syncState: MatrixQaSyncState, + actorId: MatrixQaActorId, + since?: string, +) { + if (since) { + syncState[actorId] = since; + } +} + +async function primeMatrixQaActorCursor(params: { + accessToken: string; + actorId: MatrixQaActorId; + baseUrl: string; + syncState: MatrixQaSyncState; +}) { + const client = createMatrixQaClient({ + accessToken: params.accessToken, + baseUrl: params.baseUrl, + }); + const existingSince = readMatrixQaSyncCursor(params.syncState, params.actorId); + if (existingSince) { + return { client, startSince: existingSince }; + } + const startSince = await client.primeRoom(); + if (!startSince) { + throw new Error(`Matrix ${params.actorId} /sync prime did not return a next_batch cursor`); + } + return { client, startSince }; +} + +function advanceMatrixQaActorCursor(params: { + actorId: MatrixQaActorId; + syncState: MatrixQaSyncState; + nextSince?: string; + startSince: string; +}) { + writeMatrixQaSyncCursor(params.syncState, params.actorId, params.nextSince ?? params.startSince); +} + +async function runTopLevelMentionScenario(params: { + accessToken: string; + actorId: MatrixQaActorId; + baseUrl: string; + observedEvents: MatrixQaObservedEvent[]; + roomId: string; + syncState: MatrixQaSyncState; + sutUserId: string; + timeoutMs: number; + tokenPrefix: string; + withMention?: boolean; +}) { + const { client, startSince } = await primeMatrixQaActorCursor({ + accessToken: params.accessToken, + actorId: params.actorId, + baseUrl: params.baseUrl, + syncState: params.syncState, + }); + const token = `${params.tokenPrefix}_${randomUUID().slice(0, 8).toUpperCase()}`; + const body = + params.withMention === false + ? buildExactMarkerPrompt(token) + : buildMentionPrompt(params.sutUserId, token); + const driverEventId = await client.sendTextMessage({ + body, + ...(params.withMention === false ? {} : { mentionUserIds: [params.sutUserId] }), + roomId: params.roomId, + }); + const matched = await client.waitForRoomEvent({ + observedEvents: params.observedEvents, + predicate: (event) => + event.roomId === params.roomId && + event.sender === params.sutUserId && + event.type === "m.room.message" && + (event.body ?? "").includes(token) && + event.relatesTo === undefined, + roomId: params.roomId, + since: startSince, + timeoutMs: params.timeoutMs, + }); + advanceMatrixQaActorCursor({ + actorId: params.actorId, + syncState: params.syncState, + nextSince: matched.since, + startSince, + }); + return { + body, + driverEventId, + reply: buildMatrixReplyArtifact(matched.event, token), + since: matched.since, + token, + }; +} + +async function runThreadScenario(params: MatrixQaScenarioContext) { + const { client, startSince } = await primeMatrixQaActorCursor({ + accessToken: params.driverAccessToken, + actorId: "driver", + baseUrl: params.baseUrl, + syncState: params.syncState, + }); + const rootBody = `thread root ${randomUUID().slice(0, 8)}`; + const rootEventId = await client.sendTextMessage({ + body: rootBody, + roomId: params.roomId, + }); + const token = `MATRIX_QA_THREAD_${randomUUID().slice(0, 8).toUpperCase()}`; + const driverEventId = await client.sendTextMessage({ + body: buildMentionPrompt(params.sutUserId, token), + mentionUserIds: [params.sutUserId], + replyToEventId: rootEventId, + roomId: params.roomId, + threadRootEventId: rootEventId, + }); + const matched = await client.waitForRoomEvent({ + observedEvents: params.observedEvents, + predicate: (event) => + event.roomId === params.roomId && + event.sender === params.sutUserId && + event.type === "m.room.message" && + (event.body ?? "").includes(token) && + event.relatesTo?.relType === "m.thread" && + event.relatesTo.eventId === rootEventId, + roomId: params.roomId, + since: startSince, + timeoutMs: params.timeoutMs, + }); + advanceMatrixQaActorCursor({ + actorId: "driver", + syncState: params.syncState, + nextSince: matched.since, + startSince, + }); + return { + driverEventId, + reply: buildMatrixReplyArtifact(matched.event, token), + rootEventId, + since: matched.since, + token, + }; +} + +async function runNoReplyExpectedScenario(params: { + accessToken: string; + actorId: MatrixQaActorId; + actorUserId: string; + baseUrl: string; + body: string; + mentionUserIds?: string[]; + observedEvents: MatrixQaObservedEvent[]; + roomId: string; + syncState: MatrixQaSyncState; + sutUserId: string; + timeoutMs: number; + token: string; +}) { + const { client, startSince } = await primeMatrixQaActorCursor({ + accessToken: params.accessToken, + actorId: params.actorId, + baseUrl: params.baseUrl, + syncState: params.syncState, + }); + const driverEventId = await client.sendTextMessage({ + body: params.body, + ...(params.mentionUserIds ? { mentionUserIds: params.mentionUserIds } : {}), + roomId: params.roomId, + }); + const result = await client.waitForOptionalRoomEvent({ + observedEvents: params.observedEvents, + predicate: (event) => + event.roomId === params.roomId && + event.sender === params.sutUserId && + event.type === "m.room.message", + roomId: params.roomId, + since: startSince, + timeoutMs: params.timeoutMs, + }); + if (result.matched) { + const unexpectedReply = buildMatrixReplyArtifact(result.event, params.token); + throw new Error( + [ + `unexpected SUT reply from ${params.sutUserId}`, + `trigger sender: ${params.actorUserId}`, + ...buildMatrixReplyDetails("unexpected reply", unexpectedReply), + ].join("\n"), + ); + } + advanceMatrixQaActorCursor({ + actorId: params.actorId, + syncState: params.syncState, + nextSince: result.since, + startSince, + }); + return { + artifacts: { + actorUserId: params.actorUserId, + driverEventId, + expectedNoReplyWindowMs: params.timeoutMs, + token: params.token, + triggerBody: params.body, + }, + details: [ + `trigger event: ${driverEventId}`, + `trigger sender: ${params.actorUserId}`, + `waited ${params.timeoutMs}ms with no SUT reply`, + ].join("\n"), + } satisfies MatrixQaScenarioExecution; +} + +async function runReactionNotificationScenario(context: MatrixQaScenarioContext) { + const reactionTargetEventId = context.canary?.reply.eventId?.trim(); + if (!reactionTargetEventId) { + throw new Error("Matrix reaction scenario requires a canary reply event id"); + } + const { client, startSince } = await primeMatrixQaActorCursor({ + accessToken: context.driverAccessToken, + actorId: "driver", + baseUrl: context.baseUrl, + syncState: context.syncState, + }); + const reactionEmoji = "👍"; + const reactionEventId = await client.sendReaction({ + emoji: reactionEmoji, + messageId: reactionTargetEventId, + roomId: context.roomId, + }); + const matched = await client.waitForRoomEvent({ + observedEvents: context.observedEvents, + predicate: (event) => + event.roomId === context.roomId && + event.sender === context.driverUserId && + event.type === "m.reaction" && + event.eventId === reactionEventId && + event.reaction?.eventId === reactionTargetEventId && + event.reaction?.key === reactionEmoji, + roomId: context.roomId, + since: startSince, + timeoutMs: context.timeoutMs, + }); + advanceMatrixQaActorCursor({ + actorId: "driver", + syncState: context.syncState, + nextSince: matched.since, + startSince, + }); + return { + artifacts: { + reactionEmoji, + reactionEventId, + reactionTargetEventId, + }, + details: [ + `reaction event: ${reactionEventId}`, + `reaction target: ${reactionTargetEventId}`, + `reaction emoji: ${reactionEmoji}`, + `observed reaction key: ${matched.event.reaction?.key ?? ""}`, + ].join("\n"), + } satisfies MatrixQaScenarioExecution; +} + +async function runRestartResumeScenario(context: MatrixQaScenarioContext) { + if (!context.restartGateway) { + throw new Error("Matrix restart scenario requires a gateway restart callback"); + } + await context.restartGateway(); + const result = await runTopLevelMentionScenario({ + accessToken: context.driverAccessToken, + actorId: "driver", + baseUrl: context.baseUrl, + observedEvents: context.observedEvents, + roomId: context.roomId, + syncState: context.syncState, + sutUserId: context.sutUserId, + timeoutMs: context.timeoutMs, + tokenPrefix: "MATRIX_QA_RESTART", + }); + assertTopLevelReplyArtifact("post-restart reply", result.reply); + return { + artifacts: { + driverEventId: result.driverEventId, + reply: result.reply, + restartSignal: "SIGUSR1", + token: result.token, + }, + details: [ + "restart signal: SIGUSR1", + `post-restart driver event: ${result.driverEventId}`, + ...buildMatrixReplyDetails("reply", result.reply), + ].join("\n"), + } satisfies MatrixQaScenarioExecution; +} + +export async function runMatrixQaCanary(params: { + baseUrl: string; + driverAccessToken: string; + observedEvents: MatrixQaObservedEvent[]; + roomId: string; + syncState: MatrixQaSyncState; + sutUserId: string; + timeoutMs: number; +}) { + const canary = await runTopLevelMentionScenario({ + accessToken: params.driverAccessToken, + actorId: "driver", + baseUrl: params.baseUrl, + observedEvents: params.observedEvents, + roomId: params.roomId, + syncState: params.syncState, + sutUserId: params.sutUserId, + timeoutMs: params.timeoutMs, + tokenPrefix: "MATRIX_QA_CANARY", + }); + assertTopLevelReplyArtifact("canary reply", canary.reply); + return canary; +} + +export async function runMatrixQaScenario( + scenario: MatrixQaScenarioDefinition, + context: MatrixQaScenarioContext, +): Promise { + switch (scenario.id) { + case "matrix-thread-follow-up": { + const result = await runThreadScenario(context); + assertThreadReplyArtifact(result.reply, { + expectedRootEventId: result.rootEventId, + label: "thread reply", + }); + return { + artifacts: { + driverEventId: result.driverEventId, + reply: result.reply, + rootEventId: result.rootEventId, + token: result.token, + }, + details: [ + `root event: ${result.rootEventId}`, + `driver thread event: ${result.driverEventId}`, + ...buildMatrixReplyDetails("reply", result.reply), + ].join("\n"), + }; + } + case "matrix-thread-isolation": { + const threadPhase = await runThreadScenario(context); + assertThreadReplyArtifact(threadPhase.reply, { + expectedRootEventId: threadPhase.rootEventId, + label: "thread isolation reply", + }); + const topLevelPhase = await runTopLevelMentionScenario({ + accessToken: context.driverAccessToken, + actorId: "driver", + baseUrl: context.baseUrl, + observedEvents: context.observedEvents, + roomId: context.roomId, + syncState: context.syncState, + sutUserId: context.sutUserId, + timeoutMs: context.timeoutMs, + tokenPrefix: "MATRIX_QA_TOPLEVEL", + }); + assertTopLevelReplyArtifact("top-level follow-up reply", topLevelPhase.reply); + return { + artifacts: { + threadDriverEventId: threadPhase.driverEventId, + threadReply: threadPhase.reply, + threadRootEventId: threadPhase.rootEventId, + threadToken: threadPhase.token, + topLevelDriverEventId: topLevelPhase.driverEventId, + topLevelReply: topLevelPhase.reply, + topLevelToken: topLevelPhase.token, + }, + details: [ + `thread root event: ${threadPhase.rootEventId}`, + `thread driver event: ${threadPhase.driverEventId}`, + ...buildMatrixReplyDetails("thread reply", threadPhase.reply), + `top-level driver event: ${topLevelPhase.driverEventId}`, + ...buildMatrixReplyDetails("top-level reply", topLevelPhase.reply), + ].join("\n"), + }; + } + case "matrix-top-level-reply-shape": { + const result = await runTopLevelMentionScenario({ + accessToken: context.driverAccessToken, + actorId: "driver", + baseUrl: context.baseUrl, + observedEvents: context.observedEvents, + roomId: context.roomId, + syncState: context.syncState, + sutUserId: context.sutUserId, + timeoutMs: context.timeoutMs, + tokenPrefix: "MATRIX_QA_TOPLEVEL", + }); + assertTopLevelReplyArtifact("top-level reply", result.reply); + return { + artifacts: { + driverEventId: result.driverEventId, + reply: result.reply, + token: result.token, + }, + details: [ + `driver event: ${result.driverEventId}`, + ...buildMatrixReplyDetails("reply", result.reply), + ].join("\n"), + }; + } + case "matrix-reaction-notification": + return await runReactionNotificationScenario(context); + case "matrix-restart-resume": + return await runRestartResumeScenario(context); + case "matrix-mention-gating": { + const token = `MATRIX_QA_NOMENTION_${randomUUID().slice(0, 8).toUpperCase()}`; + return await runNoReplyExpectedScenario({ + accessToken: context.driverAccessToken, + actorId: "driver", + actorUserId: context.driverUserId, + baseUrl: context.baseUrl, + body: buildExactMarkerPrompt(token), + observedEvents: context.observedEvents, + roomId: context.roomId, + syncState: context.syncState, + sutUserId: context.sutUserId, + timeoutMs: context.timeoutMs, + token, + }); + } + case "matrix-allowlist-block": { + const token = `MATRIX_QA_ALLOWLIST_${randomUUID().slice(0, 8).toUpperCase()}`; + return await runNoReplyExpectedScenario({ + accessToken: context.observerAccessToken, + actorId: "observer", + actorUserId: context.observerUserId, + baseUrl: context.baseUrl, + body: buildMentionPrompt(context.sutUserId, token), + mentionUserIds: [context.sutUserId], + observedEvents: context.observedEvents, + roomId: context.roomId, + syncState: context.syncState, + sutUserId: context.sutUserId, + timeoutMs: context.timeoutMs, + token, + }); + } + default: { + const exhaustiveScenarioId: never = scenario.id; + return exhaustiveScenarioId; + } + } +} + +export const __testing = { + MATRIX_QA_STANDARD_SCENARIO_IDS, + buildMatrixReplyDetails, + buildMatrixReplyArtifact, + buildMentionPrompt, + findMatrixQaScenarios, + readMatrixQaSyncCursor, + writeMatrixQaSyncCursor, +}; diff --git a/extensions/qa-lab/src/live-transports/matrix/matrix-live.runtime.test.ts b/extensions/qa-lab/src/live-transports/matrix/matrix-live.runtime.test.ts new file mode 100644 index 00000000000..061e552f8b3 --- /dev/null +++ b/extensions/qa-lab/src/live-transports/matrix/matrix-live.runtime.test.ts @@ -0,0 +1,272 @@ +import type { OpenClawConfig } from "openclaw/plugin-sdk/config-runtime"; +import { afterEach, describe, expect, it, vi } from "vitest"; +import { __testing as liveTesting } from "./matrix-live.runtime.js"; + +afterEach(() => { + vi.useRealTimers(); +}); + +describe("matrix live qa runtime", () => { + it("injects a temporary Matrix account into the QA gateway config", () => { + const baseCfg: OpenClawConfig = { + plugins: { + allow: ["memory-core", "qa-channel"], + entries: { + "memory-core": { enabled: true }, + "qa-channel": { enabled: true }, + }, + }, + }; + + const next = liveTesting.buildMatrixQaConfig(baseCfg, { + driverUserId: "@driver:matrix-qa.test", + homeserver: "http://127.0.0.1:28008/", + roomId: "!room:matrix-qa.test", + sutAccessToken: "syt_sut", + sutAccountId: "sut", + sutDeviceId: "DEVICE123", + sutUserId: "@sut:matrix-qa.test", + }); + + expect(next.plugins?.allow).toContain("matrix"); + expect(next.plugins?.entries?.matrix).toEqual({ enabled: true }); + expect(next.channels?.matrix).toEqual({ + enabled: true, + defaultAccount: "sut", + accounts: { + sut: { + accessToken: "syt_sut", + deviceId: "DEVICE123", + dm: { enabled: false }, + enabled: true, + encryption: false, + groupAllowFrom: ["@driver:matrix-qa.test"], + groupPolicy: "allowlist", + groups: { + "!room:matrix-qa.test": { + enabled: true, + requireMention: true, + }, + }, + homeserver: "http://127.0.0.1:28008/", + network: { + dangerouslyAllowPrivateNetwork: true, + }, + replyToMode: "off", + threadReplies: "inbound", + userId: "@sut:matrix-qa.test", + }, + }, + }); + }); + + it("redacts Matrix observed event content by default in artifacts", () => { + expect( + liveTesting.buildObservedEventsArtifact({ + includeContent: false, + observedEvents: [ + { + roomId: "!room:matrix-qa.test", + eventId: "$event", + sender: "@sut:matrix-qa.test", + type: "m.room.message", + body: "secret", + formattedBody: "

secret

", + msgtype: "m.text", + originServerTs: 1_700_000_000_000, + relatesTo: { + relType: "m.thread", + eventId: "$root", + inReplyToId: "$driver", + isFallingBack: true, + }, + }, + ], + }), + ).toEqual([ + { + roomId: "!room:matrix-qa.test", + eventId: "$event", + sender: "@sut:matrix-qa.test", + type: "m.room.message", + msgtype: "m.text", + originServerTs: 1_700_000_000_000, + relatesTo: { + relType: "m.thread", + eventId: "$root", + inReplyToId: "$driver", + isFallingBack: true, + }, + }, + ]); + }); + + it("keeps reaction metadata in redacted Matrix observed-event artifacts", () => { + expect( + liveTesting.buildObservedEventsArtifact({ + includeContent: false, + observedEvents: [ + { + roomId: "!room:matrix-qa.test", + eventId: "$reaction", + sender: "@driver:matrix-qa.test", + type: "m.reaction", + reaction: { + eventId: "$reply", + key: "👍", + }, + relatesTo: { + relType: "m.annotation", + eventId: "$reply", + }, + }, + ], + }), + ).toEqual([ + { + roomId: "!room:matrix-qa.test", + eventId: "$reaction", + sender: "@driver:matrix-qa.test", + type: "m.reaction", + originServerTs: undefined, + msgtype: undefined, + membership: undefined, + relatesTo: { + relType: "m.annotation", + eventId: "$reply", + }, + mentions: undefined, + reaction: { + eventId: "$reply", + key: "👍", + }, + }, + ]); + }); + + it("preserves negative-scenario artifacts in the Matrix summary", () => { + expect( + liveTesting.buildMatrixQaSummary({ + artifactPaths: { + observedEvents: "/tmp/observed.json", + report: "/tmp/report.md", + summary: "/tmp/summary.json", + }, + checks: [{ name: "Matrix harness ready", status: "pass" }], + finishedAt: "2026-04-10T10:05:00.000Z", + harness: { + baseUrl: "http://127.0.0.1:28008/", + composeFile: "/tmp/docker-compose.yml", + image: "ghcr.io/matrix-construct/tuwunel:v1.5.1", + roomId: "!room:matrix-qa.test", + serverName: "matrix-qa.test", + }, + observedEventCount: 4, + scenarios: [ + { + id: "matrix-mention-gating", + title: "Matrix room message without mention does not trigger", + status: "pass", + details: "no reply", + artifacts: { + actorUserId: "@driver:matrix-qa.test", + driverEventId: "$driver", + expectedNoReplyWindowMs: 8_000, + token: "MATRIX_QA_NOMENTION_TOKEN", + triggerBody: "reply with only this exact marker: MATRIX_QA_NOMENTION_TOKEN", + }, + }, + ], + startedAt: "2026-04-10T10:00:00.000Z", + sutAccountId: "sut", + userIds: { + driver: "@driver:matrix-qa.test", + observer: "@observer:matrix-qa.test", + sut: "@sut:matrix-qa.test", + }, + }), + ).toMatchObject({ + counts: { + total: 2, + passed: 2, + failed: 0, + }, + scenarios: [ + { + id: "matrix-mention-gating", + artifacts: { + actorUserId: "@driver:matrix-qa.test", + expectedNoReplyWindowMs: 8_000, + triggerBody: "reply with only this exact marker: MATRIX_QA_NOMENTION_TOKEN", + }, + }, + ], + }); + }); + + it("treats only connected, healthy Matrix accounts as ready", () => { + expect(liveTesting.isMatrixAccountReady({ running: true, connected: true })).toBe(true); + expect(liveTesting.isMatrixAccountReady({ running: true, connected: false })).toBe(false); + expect( + liveTesting.isMatrixAccountReady({ + running: true, + connected: true, + restartPending: true, + }), + ).toBe(false); + expect( + liveTesting.isMatrixAccountReady({ + running: true, + connected: true, + healthState: "degraded", + }), + ).toBe(false); + }); + + it("waits past not-ready Matrix status snapshots until the account is really ready", async () => { + vi.useFakeTimers(); + const gateway = { + call: vi + .fn() + .mockResolvedValueOnce({ + channelAccounts: { + matrix: [{ accountId: "sut", running: true, connected: false }], + }, + }) + .mockResolvedValueOnce({ + channelAccounts: { + matrix: [{ accountId: "sut", running: true, connected: true }], + }, + }), + }; + + const waitPromise = liveTesting.waitForMatrixChannelReady(gateway as never, "sut", { + timeoutMs: 1_000, + pollMs: 100, + }); + await vi.advanceTimersByTimeAsync(100); + await expect(waitPromise).resolves.toBeUndefined(); + expect(gateway.call).toHaveBeenCalledTimes(2); + }); + + it("fails readiness when the Matrix account never reaches a healthy connected state", async () => { + vi.useFakeTimers(); + const gateway = { + call: vi.fn().mockResolvedValue({ + channelAccounts: { + matrix: [{ accountId: "sut", running: true, connected: true, healthState: "degraded" }], + }, + }), + }; + + const waitPromise = liveTesting.waitForMatrixChannelReady(gateway as never, "sut", { + timeoutMs: 250, + pollMs: 100, + }); + const expectation = expect(waitPromise).rejects.toThrow( + 'matrix account "sut" did not become ready', + ); + await vi.advanceTimersByTimeAsync(300); + await expectation; + }); +}); diff --git a/extensions/qa-lab/src/live-transports/matrix/matrix-live.runtime.ts b/extensions/qa-lab/src/live-transports/matrix/matrix-live.runtime.ts new file mode 100644 index 00000000000..e75e2ee981e --- /dev/null +++ b/extensions/qa-lab/src/live-transports/matrix/matrix-live.runtime.ts @@ -0,0 +1,555 @@ +import { randomUUID } from "node:crypto"; +import fs from "node:fs/promises"; +import path from "node:path"; +import { setTimeout as sleep } from "node:timers/promises"; +import type { OpenClawConfig } from "openclaw/plugin-sdk/config-runtime"; +import { formatErrorMessage } from "openclaw/plugin-sdk/error-runtime"; +import { startQaGatewayChild } from "../../gateway-child.js"; +import type { QaReportCheck } from "../../report.js"; +import { renderQaMarkdownReport } from "../../report.js"; +import { + defaultQaModelForMode, + normalizeQaProviderMode, + type QaProviderModeInput, +} from "../../run-config.js"; +import { startQaLiveLaneGateway } from "../shared/live-gateway.runtime.js"; +import { appendLiveLaneIssue, buildLiveLaneArtifactsError } from "../shared/live-lane-helpers.js"; +import { + provisionMatrixQaRoom, + type MatrixQaObservedEvent, + type MatrixQaProvisionResult, +} from "./matrix-driver-client.js"; +import { startMatrixQaHarness } from "./matrix-harness.runtime.js"; +import { + MATRIX_QA_SCENARIOS, + buildMatrixReplyDetails, + findMatrixQaScenarios, + runMatrixQaCanary, + runMatrixQaScenario, + type MatrixQaCanaryArtifact, + type MatrixQaScenarioArtifacts, +} from "./matrix-live-scenarios.js"; + +type MatrixQaScenarioResult = { + artifacts?: MatrixQaScenarioArtifacts; + details: string; + id: string; + status: "fail" | "pass"; + title: string; +}; + +type MatrixQaSummary = { + checks: QaReportCheck[]; + counts: { + failed: number; + passed: number; + total: number; + }; + finishedAt: string; + harness: { + baseUrl: string; + composeFile: string; + image: string; + roomId: string; + serverName: string; + }; + canary?: MatrixQaCanaryArtifact; + observedEventCount: number; + observedEventsPath: string; + reportPath: string; + scenarios: MatrixQaScenarioResult[]; + startedAt: string; + summaryPath: string; + sutAccountId: string; + userIds: { + driver: string; + observer: string; + sut: string; + }; +}; + +type MatrixQaArtifactPaths = { + observedEvents: string; + report: string; + summary: string; +}; + +export type MatrixQaRunResult = { + observedEventsPath: string; + outputDir: string; + reportPath: string; + scenarios: MatrixQaScenarioResult[]; + summaryPath: string; +}; + +function buildMatrixQaSummary(params: { + artifactPaths: MatrixQaArtifactPaths; + canary?: MatrixQaCanaryArtifact; + checks: QaReportCheck[]; + finishedAt: string; + harness: MatrixQaSummary["harness"]; + observedEventCount: number; + scenarios: MatrixQaScenarioResult[]; + startedAt: string; + sutAccountId: string; + userIds: MatrixQaSummary["userIds"]; +}): MatrixQaSummary { + return { + checks: params.checks, + counts: { + total: params.checks.length + params.scenarios.length, + passed: + params.checks.filter((check) => check.status === "pass").length + + params.scenarios.filter((scenario) => scenario.status === "pass").length, + failed: + params.checks.filter((check) => check.status === "fail").length + + params.scenarios.filter((scenario) => scenario.status === "fail").length, + }, + finishedAt: params.finishedAt, + harness: params.harness, + canary: params.canary, + observedEventCount: params.observedEventCount, + observedEventsPath: params.artifactPaths.observedEvents, + reportPath: params.artifactPaths.report, + scenarios: params.scenarios, + startedAt: params.startedAt, + summaryPath: params.artifactPaths.summary, + sutAccountId: params.sutAccountId, + userIds: params.userIds, + }; +} + +function buildMatrixQaConfig( + baseCfg: OpenClawConfig, + params: { + driverUserId: string; + homeserver: string; + roomId: string; + sutAccessToken: string; + sutAccountId: string; + sutDeviceId?: string; + sutUserId: string; + }, +): OpenClawConfig { + const pluginAllow = [...new Set([...(baseCfg.plugins?.allow ?? []), "matrix"])]; + return { + ...baseCfg, + plugins: { + ...baseCfg.plugins, + allow: pluginAllow, + entries: { + ...baseCfg.plugins?.entries, + matrix: { enabled: true }, + }, + }, + channels: { + ...baseCfg.channels, + matrix: { + enabled: true, + defaultAccount: params.sutAccountId, + accounts: { + [params.sutAccountId]: { + accessToken: params.sutAccessToken, + ...(params.sutDeviceId ? { deviceId: params.sutDeviceId } : {}), + dm: { enabled: false }, + enabled: true, + encryption: false, + groupAllowFrom: [params.driverUserId], + groupPolicy: "allowlist", + groups: { + [params.roomId]: { + enabled: true, + requireMention: true, + }, + }, + homeserver: params.homeserver, + network: { + dangerouslyAllowPrivateNetwork: true, + }, + replyToMode: "off", + threadReplies: "inbound", + userId: params.sutUserId, + }, + }, + }, + }, + }; +} + +function buildObservedEventsArtifact(params: { + includeContent: boolean; + observedEvents: MatrixQaObservedEvent[]; +}) { + return params.observedEvents.map((event) => + params.includeContent + ? event + : { + roomId: event.roomId, + eventId: event.eventId, + sender: event.sender, + stateKey: event.stateKey, + type: event.type, + originServerTs: event.originServerTs, + msgtype: event.msgtype, + membership: event.membership, + relatesTo: event.relatesTo, + mentions: event.mentions, + reaction: event.reaction, + }, + ); +} + +function isMatrixAccountReady(entry?: { + connected?: boolean; + healthState?: string; + restartPending?: boolean; + running?: boolean; +}): boolean { + return Boolean( + entry?.running === true && + entry.connected === true && + entry.restartPending !== true && + (entry.healthState === undefined || entry.healthState === "healthy"), + ); +} + +async function waitForMatrixChannelReady( + gateway: Awaited>, + accountId: string, + opts?: { + pollMs?: number; + timeoutMs?: number; + }, +) { + const pollMs = opts?.pollMs ?? 500; + const timeoutMs = opts?.timeoutMs ?? 60_000; + const startedAt = Date.now(); + while (Date.now() - startedAt < timeoutMs) { + try { + const payload = (await gateway.call( + "channels.status", + { probe: false, timeoutMs: 2_000 }, + { timeoutMs: 5_000 }, + )) as { + channelAccounts?: Record< + string, + Array<{ + accountId?: string; + connected?: boolean; + healthState?: string; + restartPending?: boolean; + running?: boolean; + }> + >; + }; + const accounts = payload.channelAccounts?.matrix ?? []; + const match = accounts.find((entry) => entry.accountId === accountId); + if (isMatrixAccountReady(match)) { + return; + } + } catch { + // retry + } + await sleep(pollMs); + } + throw new Error(`matrix account "${accountId}" did not become ready`); +} + +export async function runMatrixQaLive(params: { + fastMode?: boolean; + outputDir?: string; + primaryModel?: string; + providerMode?: QaProviderModeInput; + repoRoot?: string; + scenarioIds?: string[]; + sutAccountId?: string; + alternateModel?: string; +}): Promise { + const repoRoot = path.resolve(params.repoRoot ?? process.cwd()); + const outputDir = + params.outputDir ?? + path.join(repoRoot, ".artifacts", "qa-e2e", `matrix-${Date.now().toString(36)}`); + await fs.mkdir(outputDir, { recursive: true }); + + const providerMode = normalizeQaProviderMode(params.providerMode ?? "live-frontier"); + const primaryModel = params.primaryModel?.trim() || defaultQaModelForMode(providerMode); + const alternateModel = params.alternateModel?.trim() || defaultQaModelForMode(providerMode, true); + const sutAccountId = params.sutAccountId?.trim() || "sut"; + const scenarios = findMatrixQaScenarios(params.scenarioIds); + const observedEvents: MatrixQaObservedEvent[] = []; + const includeObservedEventContent = process.env.OPENCLAW_QA_MATRIX_CAPTURE_CONTENT === "1"; + const startedAtDate = new Date(); + const startedAt = startedAtDate.toISOString(); + const runSuffix = randomUUID().slice(0, 8); + + const harness = await startMatrixQaHarness({ + outputDir: path.join(outputDir, "matrix-harness"), + repoRoot, + }); + const provisioning: MatrixQaProvisionResult = await (async () => { + try { + return await provisionMatrixQaRoom({ + baseUrl: harness.baseUrl, + driverLocalpart: `qa-driver-${runSuffix}`, + observerLocalpart: `qa-observer-${runSuffix}`, + registrationToken: harness.registrationToken, + roomName: `OpenClaw Matrix QA ${runSuffix}`, + sutLocalpart: `qa-sut-${runSuffix}`, + }); + } catch (error) { + await harness.stop().catch(() => {}); + throw error; + } + })(); + + const checks: QaReportCheck[] = [ + { + name: "Matrix harness ready", + status: "pass", + details: [ + `image: ${harness.image}`, + `baseUrl: ${harness.baseUrl}`, + `serverName: ${harness.serverName}`, + `roomId: ${provisioning.roomId}`, + ].join("\n"), + }, + ]; + const scenarioResults: MatrixQaScenarioResult[] = []; + const cleanupErrors: string[] = []; + let canaryArtifact: MatrixQaCanaryArtifact | undefined; + let gatewayHarness: Awaited> | null = null; + let canaryFailed = false; + const syncState: { driver?: string; observer?: string } = {}; + + try { + gatewayHarness = await startQaLiveLaneGateway({ + repoRoot, + qaBusBaseUrl: "http://127.0.0.1:43123", + providerMode, + primaryModel, + alternateModel, + fastMode: params.fastMode, + controlUiEnabled: false, + mutateConfig: (cfg) => + buildMatrixQaConfig(cfg, { + driverUserId: provisioning.driver.userId, + homeserver: harness.baseUrl, + roomId: provisioning.roomId, + sutAccessToken: provisioning.sut.accessToken, + sutAccountId, + sutDeviceId: provisioning.sut.deviceId, + sutUserId: provisioning.sut.userId, + }), + }); + await waitForMatrixChannelReady(gatewayHarness.gateway, sutAccountId); + checks.push({ + name: "Matrix channel ready", + status: "pass", + details: `accountId: ${sutAccountId}\nuserId: ${provisioning.sut.userId}`, + }); + + try { + const canary = await runMatrixQaCanary({ + baseUrl: harness.baseUrl, + driverAccessToken: provisioning.driver.accessToken, + observedEvents, + roomId: provisioning.roomId, + syncState, + sutUserId: provisioning.sut.userId, + timeoutMs: 45_000, + }); + canaryArtifact = { + driverEventId: canary.driverEventId, + reply: canary.reply, + token: canary.token, + }; + checks.push({ + name: "Matrix canary", + status: "pass", + details: buildMatrixReplyDetails("reply", canary.reply).join("\n"), + }); + } catch (error) { + canaryFailed = true; + checks.push({ + name: "Matrix canary", + status: "fail", + details: formatErrorMessage(error), + }); + } + + if (!canaryFailed) { + for (const scenario of scenarios) { + try { + const result = await runMatrixQaScenario(scenario, { + baseUrl: harness.baseUrl, + canary: canaryArtifact, + driverAccessToken: provisioning.driver.accessToken, + driverUserId: provisioning.driver.userId, + observedEvents, + observerAccessToken: provisioning.observer.accessToken, + observerUserId: provisioning.observer.userId, + restartGateway: async () => { + if (!gatewayHarness) { + throw new Error("Matrix restart scenario requires a live gateway"); + } + await gatewayHarness.gateway.restart(); + await waitForMatrixChannelReady(gatewayHarness.gateway, sutAccountId); + }, + roomId: provisioning.roomId, + syncState, + sutUserId: provisioning.sut.userId, + timeoutMs: scenario.timeoutMs, + }); + scenarioResults.push({ + artifacts: result.artifacts, + id: scenario.id, + title: scenario.title, + status: "pass", + details: result.details, + }); + } catch (error) { + scenarioResults.push({ + id: scenario.id, + title: scenario.title, + status: "fail", + details: formatErrorMessage(error), + }); + } + } + } + } finally { + if (gatewayHarness) { + try { + await gatewayHarness.stop(); + } catch (error) { + appendLiveLaneIssue(cleanupErrors, "live gateway cleanup", error); + } + } + try { + await harness.stop(); + } catch (error) { + appendLiveLaneIssue(cleanupErrors, "Matrix harness cleanup", error); + } + } + if (cleanupErrors.length > 0) { + checks.push({ + name: "Matrix cleanup", + status: "fail", + details: cleanupErrors.join("\n"), + }); + } + + const finishedAtDate = new Date(); + const finishedAt = finishedAtDate.toISOString(); + const reportPath = path.join(outputDir, "matrix-qa-report.md"); + const summaryPath = path.join(outputDir, "matrix-qa-summary.json"); + const observedEventsPath = path.join(outputDir, "matrix-qa-observed-events.json"); + const artifactPaths = { + observedEvents: observedEventsPath, + report: reportPath, + summary: summaryPath, + } satisfies MatrixQaArtifactPaths; + const report = renderQaMarkdownReport({ + title: "Matrix QA Report", + startedAt: startedAtDate, + finishedAt: finishedAtDate, + checks, + scenarios: scenarioResults.map((scenario) => ({ + details: scenario.details, + name: scenario.title, + status: scenario.status, + })), + notes: [ + `roomId: ${provisioning.roomId}`, + `driver: ${provisioning.driver.userId}`, + `observer: ${provisioning.observer.userId}`, + `sut: ${provisioning.sut.userId}`, + `homeserver: ${harness.baseUrl}`, + `image: ${harness.image}`, + ], + }); + const summary: MatrixQaSummary = buildMatrixQaSummary({ + artifactPaths, + canary: canaryArtifact, + checks, + finishedAt, + harness: { + baseUrl: harness.baseUrl, + composeFile: harness.composeFile, + image: harness.image, + roomId: provisioning.roomId, + serverName: harness.serverName, + }, + observedEventCount: observedEvents.length, + scenarios: scenarioResults, + startedAt, + sutAccountId, + userIds: { + driver: provisioning.driver.userId, + observer: provisioning.observer.userId, + sut: provisioning.sut.userId, + }, + }); + + await fs.writeFile(reportPath, `${report}\n`, { encoding: "utf8", mode: 0o600 }); + await fs.writeFile(summaryPath, `${JSON.stringify(summary, null, 2)}\n`, { + encoding: "utf8", + mode: 0o600, + }); + await fs.writeFile( + observedEventsPath, + `${JSON.stringify( + buildObservedEventsArtifact({ + includeContent: includeObservedEventContent, + observedEvents, + }), + null, + 2, + )}\n`, + { encoding: "utf8", mode: 0o600 }, + ); + + const failedChecks = checks.filter( + (check) => check.status === "fail" && check.name !== "Matrix cleanup", + ); + const failedScenarios = scenarioResults.filter((scenario) => scenario.status === "fail"); + if (failedChecks.length > 0 || failedScenarios.length > 0) { + throw new Error( + buildLiveLaneArtifactsError({ + heading: "Matrix QA failed.", + details: [ + ...failedChecks.map((check) => `check ${check.name}: ${check.details ?? "failed"}`), + ...failedScenarios.map((scenario) => `scenario ${scenario.id}: ${scenario.details}`), + ...cleanupErrors.map((error) => `cleanup: ${error}`), + ], + artifacts: artifactPaths, + }), + ); + } + if (cleanupErrors.length > 0) { + throw new Error( + buildLiveLaneArtifactsError({ + heading: "Matrix QA cleanup failed after artifacts were written.", + details: cleanupErrors, + artifacts: artifactPaths, + }), + ); + } + + return { + observedEventsPath, + outputDir, + reportPath, + scenarios: scenarioResults, + summaryPath, + }; +} + +export const __testing = { + buildMatrixQaSummary, + MATRIX_QA_SCENARIOS, + buildMatrixQaConfig, + buildObservedEventsArtifact, + isMatrixAccountReady, + waitForMatrixChannelReady, +}; diff --git a/extensions/qa-lab/src/live-transports/shared/live-gateway.runtime.test.ts b/extensions/qa-lab/src/live-transports/shared/live-gateway.runtime.test.ts new file mode 100644 index 00000000000..5cdb75106ff --- /dev/null +++ b/extensions/qa-lab/src/live-transports/shared/live-gateway.runtime.test.ts @@ -0,0 +1,125 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; + +const { startQaGatewayChild, startQaMockOpenAiServer } = vi.hoisted(() => ({ + startQaGatewayChild: vi.fn(), + startQaMockOpenAiServer: vi.fn(), +})); + +vi.mock("../../gateway-child.js", () => ({ + startQaGatewayChild, +})); + +vi.mock("../../mock-openai-server.js", () => ({ + startQaMockOpenAiServer, +})); + +import { startQaLiveLaneGateway } from "./live-gateway.runtime.js"; + +describe("startQaLiveLaneGateway", () => { + const gatewayStop = vi.fn(); + const mockStop = vi.fn(); + + beforeEach(() => { + gatewayStop.mockReset(); + mockStop.mockReset(); + startQaGatewayChild.mockReset(); + startQaMockOpenAiServer.mockReset(); + + startQaGatewayChild.mockResolvedValue({ + stop: gatewayStop, + }); + startQaMockOpenAiServer.mockResolvedValue({ + baseUrl: "http://127.0.0.1:44080", + stop: mockStop, + }); + }); + + afterEach(() => { + vi.clearAllMocks(); + }); + + it("threads the mock provider base url into the gateway child", async () => { + const harness = await startQaLiveLaneGateway({ + repoRoot: "/tmp/openclaw-repo", + qaBusBaseUrl: "http://127.0.0.1:43123", + providerMode: "mock-openai", + primaryModel: "mock-openai/gpt-5.4", + alternateModel: "mock-openai/gpt-5.4-alt", + controlUiEnabled: false, + }); + + expect(startQaMockOpenAiServer).toHaveBeenCalledWith({ + host: "127.0.0.1", + port: 0, + }); + expect(startQaGatewayChild).toHaveBeenCalledWith( + expect.objectContaining({ + includeQaChannel: false, + providerBaseUrl: "http://127.0.0.1:44080/v1", + providerMode: "mock-openai", + }), + ); + + await harness.stop(); + expect(gatewayStop).toHaveBeenCalledTimes(1); + expect(mockStop).toHaveBeenCalledTimes(1); + }); + + it("skips mock bootstrap for live frontier runs", async () => { + const harness = await startQaLiveLaneGateway({ + repoRoot: "/tmp/openclaw-repo", + qaBusBaseUrl: "http://127.0.0.1:43123", + providerMode: "live-frontier", + primaryModel: "openai/gpt-5.4", + alternateModel: "openai/gpt-5.4", + controlUiEnabled: false, + }); + + expect(startQaMockOpenAiServer).not.toHaveBeenCalled(); + expect(startQaGatewayChild).toHaveBeenCalledWith( + expect.objectContaining({ + includeQaChannel: false, + providerBaseUrl: undefined, + providerMode: "live-frontier", + }), + ); + + await harness.stop(); + expect(gatewayStop).toHaveBeenCalledTimes(1); + }); + + it("still stops the mock server when gateway shutdown fails", async () => { + gatewayStop.mockRejectedValueOnce(new Error("gateway down")); + const harness = await startQaLiveLaneGateway({ + repoRoot: "/tmp/openclaw-repo", + qaBusBaseUrl: "http://127.0.0.1:43123", + providerMode: "mock-openai", + primaryModel: "mock-openai/gpt-5.4", + alternateModel: "mock-openai/gpt-5.4-alt", + controlUiEnabled: false, + }); + + await expect(harness.stop()).rejects.toThrow( + "failed to stop QA live lane resources:\ngateway stop failed: gateway down", + ); + expect(gatewayStop).toHaveBeenCalledTimes(1); + expect(mockStop).toHaveBeenCalledTimes(1); + }); + + it("reports both gateway and mock shutdown failures together", async () => { + gatewayStop.mockRejectedValueOnce(new Error("gateway down")); + mockStop.mockRejectedValueOnce(new Error("mock down")); + const harness = await startQaLiveLaneGateway({ + repoRoot: "/tmp/openclaw-repo", + qaBusBaseUrl: "http://127.0.0.1:43123", + providerMode: "mock-openai", + primaryModel: "mock-openai/gpt-5.4", + alternateModel: "mock-openai/gpt-5.4-alt", + controlUiEnabled: false, + }); + + await expect(harness.stop()).rejects.toThrow( + "failed to stop QA live lane resources:\ngateway stop failed: gateway down\nmock provider stop failed: mock down", + ); + }); +}); diff --git a/extensions/qa-lab/src/live-transports/shared/live-gateway.runtime.ts b/extensions/qa-lab/src/live-transports/shared/live-gateway.runtime.ts new file mode 100644 index 00000000000..9e5c9068e02 --- /dev/null +++ b/extensions/qa-lab/src/live-transports/shared/live-gateway.runtime.ts @@ -0,0 +1,76 @@ +import type { OpenClawConfig } from "openclaw/plugin-sdk/config-runtime"; +import { startQaGatewayChild, type QaCliBackendAuthMode } from "../../gateway-child.js"; +import { startQaMockOpenAiServer } from "../../mock-openai-server.js"; +import type { QaThinkingLevel } from "../../qa-gateway-config.js"; +import { appendLiveLaneIssue } from "./live-lane-helpers.js"; + +async function stopQaLiveLaneResources(resources: { + gateway: Awaited>; + mock: Awaited> | null; +}) { + const errors: string[] = []; + try { + await resources.gateway.stop(); + } catch (error) { + appendLiveLaneIssue(errors, "gateway stop failed", error); + } + if (resources.mock) { + try { + await resources.mock.stop(); + } catch (error) { + appendLiveLaneIssue(errors, "mock provider stop failed", error); + } + } + if (errors.length > 0) { + throw new Error(`failed to stop QA live lane resources:\n${errors.join("\n")}`); + } +} + +export async function startQaLiveLaneGateway(params: { + repoRoot: string; + qaBusBaseUrl: string; + controlUiAllowedOrigins?: string[]; + providerMode: "mock-openai" | "live-frontier"; + primaryModel: string; + alternateModel: string; + fastMode?: boolean; + thinkingDefault?: QaThinkingLevel; + claudeCliAuthMode?: QaCliBackendAuthMode; + controlUiEnabled?: boolean; + mutateConfig?: (cfg: OpenClawConfig) => OpenClawConfig; +}) { + const mock = + params.providerMode === "mock-openai" + ? await startQaMockOpenAiServer({ + host: "127.0.0.1", + port: 0, + }) + : null; + try { + const gateway = await startQaGatewayChild({ + repoRoot: params.repoRoot, + providerBaseUrl: mock ? `${mock.baseUrl}/v1` : undefined, + qaBusBaseUrl: params.qaBusBaseUrl, + includeQaChannel: false, + controlUiAllowedOrigins: params.controlUiAllowedOrigins, + providerMode: params.providerMode, + primaryModel: params.primaryModel, + alternateModel: params.alternateModel, + fastMode: params.fastMode, + thinkingDefault: params.thinkingDefault, + claudeCliAuthMode: params.claudeCliAuthMode, + controlUiEnabled: params.controlUiEnabled, + mutateConfig: params.mutateConfig, + }); + return { + gateway, + mock, + async stop() { + await stopQaLiveLaneResources({ gateway, mock }); + }, + }; + } catch (error) { + await mock?.stop().catch(() => {}); + throw error; + } +} diff --git a/extensions/qa-lab/src/live-transports/shared/live-lane-helpers.ts b/extensions/qa-lab/src/live-transports/shared/live-lane-helpers.ts new file mode 100644 index 00000000000..cf65146123e --- /dev/null +++ b/extensions/qa-lab/src/live-transports/shared/live-lane-helpers.ts @@ -0,0 +1,18 @@ +import { formatErrorMessage } from "openclaw/plugin-sdk/error-runtime"; + +export function appendLiveLaneIssue(issues: string[], label: string, error: unknown) { + issues.push(`${label}: ${formatErrorMessage(error)}`); +} + +export function buildLiveLaneArtifactsError(params: { + heading: string; + artifacts: Record; + details?: string[]; +}) { + return [ + params.heading, + ...(params.details ?? []), + "Artifacts:", + ...Object.entries(params.artifacts).map(([label, filePath]) => `- ${label}: ${filePath}`), + ].join("\n"); +} diff --git a/extensions/qa-lab/src/live-transports/shared/live-transport-cli.runtime.ts b/extensions/qa-lab/src/live-transports/shared/live-transport-cli.runtime.ts new file mode 100644 index 00000000000..cbe805d899f --- /dev/null +++ b/extensions/qa-lab/src/live-transports/shared/live-transport-cli.runtime.ts @@ -0,0 +1,38 @@ +import path from "node:path"; +import { resolveRepoRelativeOutputDir } from "../../cli-paths.js"; +import type { QaProviderMode } from "../../run-config.js"; +import { normalizeQaProviderMode } from "../../run-config.js"; +import type { LiveTransportQaCommandOptions } from "./live-transport-cli.js"; + +export function resolveLiveTransportQaRunOptions( + opts: LiveTransportQaCommandOptions, +): LiveTransportQaCommandOptions & { + repoRoot: string; + providerMode: QaProviderMode; +} { + return { + repoRoot: path.resolve(opts.repoRoot ?? process.cwd()), + outputDir: resolveRepoRelativeOutputDir( + path.resolve(opts.repoRoot ?? process.cwd()), + opts.outputDir, + ), + providerMode: + opts.providerMode === undefined + ? "live-frontier" + : normalizeQaProviderMode(opts.providerMode), + primaryModel: opts.primaryModel, + alternateModel: opts.alternateModel, + fastMode: opts.fastMode, + scenarioIds: opts.scenarioIds, + sutAccountId: opts.sutAccountId, + }; +} + +export function printLiveTransportQaArtifacts( + laneLabel: string, + artifacts: Record, +) { + for (const [label, filePath] of Object.entries(artifacts)) { + process.stdout.write(`${laneLabel} ${label}: ${filePath}\n`); + } +} diff --git a/extensions/qa-lab/src/live-transports/shared/live-transport-cli.ts b/extensions/qa-lab/src/live-transports/shared/live-transport-cli.ts new file mode 100644 index 00000000000..bc6efebc1d5 --- /dev/null +++ b/extensions/qa-lab/src/live-transports/shared/live-transport-cli.ts @@ -0,0 +1,106 @@ +import type { Command } from "commander"; +import { collectString } from "../../cli-options.js"; +import type { QaProviderModeInput } from "../../run-config.js"; + +export type LiveTransportQaCommandOptions = { + repoRoot?: string; + outputDir?: string; + providerMode?: QaProviderModeInput; + primaryModel?: string; + alternateModel?: string; + fastMode?: boolean; + scenarioIds?: string[]; + sutAccountId?: string; +}; + +type LiveTransportQaCommanderOptions = { + repoRoot?: string; + outputDir?: string; + providerMode?: QaProviderModeInput; + model?: string; + altModel?: string; + scenario?: string[]; + fast?: boolean; + sutAccount?: string; +}; + +export type LiveTransportQaCliRegistration = { + commandName: string; + register(qa: Command): void; +}; + +export function createLazyCliRuntimeLoader(load: () => Promise) { + let promise: Promise | null = null; + return async () => { + promise ??= load(); + return await promise; + }; +} + +export function mapLiveTransportQaCommanderOptions( + opts: LiveTransportQaCommanderOptions, +): LiveTransportQaCommandOptions { + return { + repoRoot: opts.repoRoot, + outputDir: opts.outputDir, + providerMode: opts.providerMode, + primaryModel: opts.model, + alternateModel: opts.altModel, + fastMode: opts.fast, + scenarioIds: opts.scenario, + sutAccountId: opts.sutAccount, + }; +} + +export function registerLiveTransportQaCli(params: { + qa: Command; + commandName: string; + description: string; + outputDirHelp: string; + scenarioHelp: string; + sutAccountHelp: string; + run: (opts: LiveTransportQaCommandOptions) => Promise; +}) { + params.qa + .command(params.commandName) + .description(params.description) + .option("--repo-root ", "Repository root to target when running from a neutral cwd") + .option("--output-dir ", params.outputDirHelp) + .option( + "--provider-mode ", + "Provider mode: mock-openai or live-frontier (legacy live-openai still works)", + "live-frontier", + ) + .option("--model ", "Primary provider/model ref") + .option("--alt-model ", "Alternate provider/model ref") + .option("--scenario ", params.scenarioHelp, collectString, []) + .option("--fast", "Enable provider fast mode where supported", false) + .option("--sut-account ", params.sutAccountHelp, "sut") + .action(async (opts: LiveTransportQaCommanderOptions) => { + await params.run(mapLiveTransportQaCommanderOptions(opts)); + }); +} + +export function createLiveTransportQaCliRegistration(params: { + commandName: string; + description: string; + outputDirHelp: string; + scenarioHelp: string; + sutAccountHelp: string; + run: (opts: LiveTransportQaCommandOptions) => Promise; +}): LiveTransportQaCliRegistration { + return { + commandName: params.commandName, + register(qa: Command) { + registerLiveTransportQaCli({ + qa, + commandName: params.commandName, + description: params.description, + outputDirHelp: params.outputDirHelp, + scenarioHelp: params.scenarioHelp, + sutAccountHelp: params.sutAccountHelp, + run: params.run, + }); + }, + }; +} diff --git a/extensions/qa-lab/src/live-transports/shared/live-transport-scenarios.test.ts b/extensions/qa-lab/src/live-transports/shared/live-transport-scenarios.test.ts new file mode 100644 index 00000000000..79da20e6017 --- /dev/null +++ b/extensions/qa-lab/src/live-transports/shared/live-transport-scenarios.test.ts @@ -0,0 +1,76 @@ +import { describe, expect, it } from "vitest"; +import { + LIVE_TRANSPORT_BASELINE_STANDARD_SCENARIO_IDS, + collectLiveTransportStandardScenarioCoverage, + findMissingLiveTransportStandardScenarios, + selectLiveTransportScenarios, +} from "./live-transport-scenarios.js"; + +describe("live transport scenario helpers", () => { + it("keeps the repo-wide baseline contract ordered", () => { + expect(LIVE_TRANSPORT_BASELINE_STANDARD_SCENARIO_IDS).toEqual([ + "canary", + "mention-gating", + "allowlist-block", + "top-level-reply-shape", + "restart-resume", + ]); + }); + + it("selects requested scenarios and reports unknown ids with the lane label", () => { + const definitions = [ + { id: "alpha", timeoutMs: 1_000, title: "alpha" }, + { id: "beta", timeoutMs: 1_000, title: "beta" }, + ] as const; + + expect( + selectLiveTransportScenarios({ + ids: ["beta"], + laneLabel: "Demo", + scenarios: definitions, + }), + ).toEqual([definitions[1]]); + + expect(() => + selectLiveTransportScenarios({ + ids: ["alpha", "missing"], + laneLabel: "Demo", + scenarios: definitions, + }), + ).toThrow("unknown Demo QA scenario id(s): missing"); + }); + + it("dedupes always-on and scenario-backed standard coverage", () => { + const covered = collectLiveTransportStandardScenarioCoverage({ + alwaysOnStandardScenarioIds: ["canary"], + scenarios: [ + { + id: "scenario-1", + standardId: "mention-gating", + timeoutMs: 1_000, + title: "mention", + }, + { + id: "scenario-2", + standardId: "mention-gating", + timeoutMs: 1_000, + title: "mention again", + }, + { + id: "scenario-3", + standardId: "restart-resume", + timeoutMs: 1_000, + title: "restart", + }, + ], + }); + + expect(covered).toEqual(["canary", "mention-gating", "restart-resume"]); + expect( + findMissingLiveTransportStandardScenarios({ + coveredStandardScenarioIds: covered, + expectedStandardScenarioIds: LIVE_TRANSPORT_BASELINE_STANDARD_SCENARIO_IDS, + }), + ).toEqual(["allowlist-block", "top-level-reply-shape"]); + }); +}); diff --git a/extensions/qa-lab/src/live-transports/shared/live-transport-scenarios.ts b/extensions/qa-lab/src/live-transports/shared/live-transport-scenarios.ts new file mode 100644 index 00000000000..535bcc3de53 --- /dev/null +++ b/extensions/qa-lab/src/live-transports/shared/live-transport-scenarios.ts @@ -0,0 +1,149 @@ +export type LiveTransportStandardScenarioId = + | "canary" + | "mention-gating" + | "allowlist-block" + | "top-level-reply-shape" + | "restart-resume" + | "thread-follow-up" + | "thread-isolation" + | "reaction-observation" + | "help-command"; + +export type LiveTransportScenarioDefinition = { + id: TId; + standardId?: LiveTransportStandardScenarioId; + timeoutMs: number; + title: string; +}; + +export type LiveTransportStandardScenarioDefinition = { + description: string; + id: LiveTransportStandardScenarioId; + title: string; +}; + +export const LIVE_TRANSPORT_STANDARD_SCENARIOS: readonly LiveTransportStandardScenarioDefinition[] = + [ + { + id: "canary", + title: "Transport canary", + description: "The lane can trigger one known-good reply on the real transport.", + }, + { + id: "mention-gating", + title: "Mention gating", + description: "Messages without the required mention do not trigger a reply.", + }, + { + id: "allowlist-block", + title: "Sender allowlist block", + description: "Non-allowlisted senders do not trigger a reply.", + }, + { + id: "top-level-reply-shape", + title: "Top-level reply shape", + description: "Top-level replies stay top-level when the lane is configured that way.", + }, + { + id: "restart-resume", + title: "Restart resume", + description: "The lane still responds after a gateway restart.", + }, + { + id: "thread-follow-up", + title: "Thread follow-up", + description: "Threaded prompts receive threaded replies with the expected relation metadata.", + }, + { + id: "thread-isolation", + title: "Thread isolation", + description: "Fresh top-level prompts stay out of prior threads.", + }, + { + id: "reaction-observation", + title: "Reaction observation", + description: "Reaction events are observed and normalized correctly.", + }, + { + id: "help-command", + title: "Help command", + description: "The transport-specific help command path replies successfully.", + }, + ] as const; + +export const LIVE_TRANSPORT_BASELINE_STANDARD_SCENARIO_IDS: readonly LiveTransportStandardScenarioId[] = + [ + "canary", + "mention-gating", + "allowlist-block", + "top-level-reply-shape", + "restart-resume", + ] as const; + +const LIVE_TRANSPORT_STANDARD_SCENARIO_ID_SET = new Set( + LIVE_TRANSPORT_STANDARD_SCENARIOS.map((scenario) => scenario.id), +); + +function assertKnownStandardScenarioIds(ids: readonly LiveTransportStandardScenarioId[]) { + for (const id of ids) { + if (!LIVE_TRANSPORT_STANDARD_SCENARIO_ID_SET.has(id)) { + throw new Error(`unknown live transport standard scenario id: ${id}`); + } + } +} + +export function selectLiveTransportScenarios(params: { + ids?: string[]; + laneLabel: string; + scenarios: readonly TDefinition[]; +}) { + if (!params.ids || params.ids.length === 0) { + return [...params.scenarios]; + } + const requested = new Set(params.ids); + const selected = params.scenarios.filter((scenario) => params.ids?.includes(scenario.id)); + const missingIds = [...requested].filter( + (id) => !selected.some((scenario) => scenario.id === id), + ); + if (missingIds.length > 0) { + throw new Error(`unknown ${params.laneLabel} QA scenario id(s): ${missingIds.join(", ")}`); + } + return selected; +} + +export function collectLiveTransportStandardScenarioCoverage(params: { + alwaysOnStandardScenarioIds?: readonly LiveTransportStandardScenarioId[]; + scenarios: readonly LiveTransportScenarioDefinition[]; +}) { + const coverage: LiveTransportStandardScenarioId[] = []; + const seen = new Set(); + const append = (id: LiveTransportStandardScenarioId | undefined) => { + if (!id || seen.has(id)) { + return; + } + seen.add(id); + coverage.push(id); + }; + + assertKnownStandardScenarioIds(params.alwaysOnStandardScenarioIds ?? []); + for (const id of params.alwaysOnStandardScenarioIds ?? []) { + append(id); + } + for (const scenario of params.scenarios) { + if (scenario.standardId) { + assertKnownStandardScenarioIds([scenario.standardId]); + } + append(scenario.standardId); + } + return coverage; +} + +export function findMissingLiveTransportStandardScenarios(params: { + coveredStandardScenarioIds: readonly LiveTransportStandardScenarioId[]; + expectedStandardScenarioIds: readonly LiveTransportStandardScenarioId[]; +}) { + assertKnownStandardScenarioIds(params.coveredStandardScenarioIds); + assertKnownStandardScenarioIds(params.expectedStandardScenarioIds); + const covered = new Set(params.coveredStandardScenarioIds); + return params.expectedStandardScenarioIds.filter((id) => !covered.has(id)); +} diff --git a/extensions/qa-lab/src/live-transports/telegram/cli.runtime.ts b/extensions/qa-lab/src/live-transports/telegram/cli.runtime.ts new file mode 100644 index 00000000000..b99e37b1a70 --- /dev/null +++ b/extensions/qa-lab/src/live-transports/telegram/cli.runtime.ts @@ -0,0 +1,15 @@ +import type { LiveTransportQaCommandOptions } from "../shared/live-transport-cli.js"; +import { + printLiveTransportQaArtifacts, + resolveLiveTransportQaRunOptions, +} from "../shared/live-transport-cli.runtime.js"; +import { runTelegramQaLive } from "./telegram-live.runtime.js"; + +export async function runQaTelegramCommand(opts: LiveTransportQaCommandOptions) { + const result = await runTelegramQaLive(resolveLiveTransportQaRunOptions(opts)); + printLiveTransportQaArtifacts("Telegram QA", { + report: result.reportPath, + summary: result.summaryPath, + "observed messages": result.observedMessagesPath, + }); +} diff --git a/extensions/qa-lab/src/live-transports/telegram/cli.ts b/extensions/qa-lab/src/live-transports/telegram/cli.ts new file mode 100644 index 00000000000..e957f229ebb --- /dev/null +++ b/extensions/qa-lab/src/live-transports/telegram/cli.ts @@ -0,0 +1,32 @@ +import type { Command } from "commander"; +import { + createLazyCliRuntimeLoader, + createLiveTransportQaCliRegistration, + type LiveTransportQaCliRegistration, + type LiveTransportQaCommandOptions, +} from "../shared/live-transport-cli.js"; + +type TelegramQaCliRuntime = typeof import("./cli.runtime.js"); + +const loadTelegramQaCliRuntime = createLazyCliRuntimeLoader( + () => import("./cli.runtime.js"), +); + +async function runQaTelegram(opts: LiveTransportQaCommandOptions) { + const runtime = await loadTelegramQaCliRuntime(); + await runtime.runQaTelegramCommand(opts); +} + +export const telegramQaCliRegistration: LiveTransportQaCliRegistration = + createLiveTransportQaCliRegistration({ + commandName: "telegram", + description: "Run the manual Telegram live QA lane against a private bot-to-bot group harness", + outputDirHelp: "Telegram QA artifact directory", + scenarioHelp: "Run only the named Telegram QA scenario (repeatable)", + sutAccountHelp: "Temporary Telegram account id inside the QA gateway config", + run: runQaTelegram, + }); + +export function registerTelegramQaCli(qa: Command) { + telegramQaCliRegistration.register(qa); +} diff --git a/extensions/qa-lab/src/telegram-live.runtime.test.ts b/extensions/qa-lab/src/live-transports/telegram/telegram-live.runtime.test.ts similarity index 93% rename from extensions/qa-lab/src/telegram-live.runtime.test.ts rename to extensions/qa-lab/src/live-transports/telegram/telegram-live.runtime.test.ts index fc4f964cc30..474e84315fa 100644 --- a/extensions/qa-lab/src/telegram-live.runtime.test.ts +++ b/extensions/qa-lab/src/live-transports/telegram/telegram-live.runtime.test.ts @@ -1,5 +1,9 @@ import type { OpenClawConfig } from "openclaw/plugin-sdk/config-runtime"; import { afterEach, describe, expect, it, vi } from "vitest"; +import { + LIVE_TRANSPORT_BASELINE_STANDARD_SCENARIO_IDS, + findMissingLiveTransportStandardScenarios, +} from "../shared/live-transport-scenarios.js"; import { __testing } from "./telegram-live.runtime.js"; const fetchWithSsrFGuardMock = vi.hoisted(() => @@ -220,6 +224,16 @@ describe("telegram live qa runtime", () => { ); }); + it("tracks Telegram live coverage against the shared transport contract", () => { + expect(__testing.TELEGRAM_QA_STANDARD_SCENARIO_IDS).toEqual(["canary", "help-command"]); + expect( + findMissingLiveTransportStandardScenarios({ + coveredStandardScenarioIds: __testing.TELEGRAM_QA_STANDARD_SCENARIO_IDS, + expectedStandardScenarioIds: LIVE_TRANSPORT_BASELINE_STANDARD_SCENARIO_IDS, + }), + ).toEqual(["mention-gating", "allowlist-block", "top-level-reply-shape", "restart-resume"]); + }); + it("adds an abort deadline to Telegram API requests", async () => { let signal: AbortSignal | undefined; vi.stubGlobal( diff --git a/extensions/qa-lab/src/telegram-live.runtime.ts b/extensions/qa-lab/src/live-transports/telegram/telegram-live.runtime.ts similarity index 92% rename from extensions/qa-lab/src/telegram-live.runtime.ts rename to extensions/qa-lab/src/live-transports/telegram/telegram-live.runtime.ts index 3220822f4b4..c70bc80767e 100644 --- a/extensions/qa-lab/src/telegram-live.runtime.ts +++ b/extensions/qa-lab/src/live-transports/telegram/telegram-live.runtime.ts @@ -3,12 +3,19 @@ import path from "node:path"; import type { OpenClawConfig } from "openclaw/plugin-sdk/config-runtime"; import { formatErrorMessage } from "openclaw/plugin-sdk/error-runtime"; import { fetchWithSsrFGuard } from "openclaw/plugin-sdk/ssrf-runtime"; -import { startQaGatewayChild } from "./gateway-child.js"; +import { startQaGatewayChild } from "../../gateway-child.js"; import { defaultQaModelForMode, normalizeQaProviderMode, type QaProviderModeInput, -} from "./run-config.js"; +} from "../../run-config.js"; +import { startQaLiveLaneGateway } from "../shared/live-gateway.runtime.js"; +import { appendLiveLaneIssue, buildLiveLaneArtifactsError } from "../shared/live-lane-helpers.js"; +import { + collectLiveTransportStandardScenarioCoverage, + selectLiveTransportScenarios, + type LiveTransportScenarioDefinition, +} from "../shared/live-transport-scenarios.js"; type TelegramQaRuntimeEnv = { groupId: string; @@ -23,10 +30,7 @@ type TelegramBotIdentity = { username?: string; }; -type TelegramQaScenarioDefinition = { - id: "telegram-help-command"; - title: string; - timeoutMs: number; +type TelegramQaScenarioDefinition = LiveTransportScenarioDefinition<"telegram-help-command"> & { buildInput: (sutUsername: string) => string; }; @@ -71,6 +75,7 @@ type TelegramQaSummary = { groupId: string; startedAt: string; finishedAt: string; + cleanupIssues: string[]; counts: { total: number; passed: number; @@ -154,12 +159,18 @@ type TelegramSendMessageResult = { const TELEGRAM_QA_SCENARIOS: TelegramQaScenarioDefinition[] = [ { id: "telegram-help-command", + standardId: "help-command", title: "Telegram help command reply", timeoutMs: 45_000, buildInput: (sutUsername) => `/help@${sutUsername}`, }, ]; +export const TELEGRAM_QA_STANDARD_SCENARIO_IDS = collectLiveTransportStandardScenarioCoverage({ + alwaysOnStandardScenarioIds: ["canary"], + scenarios: TELEGRAM_QA_SCENARIOS, +}); + const TELEGRAM_QA_ENV_KEYS = [ "OPENCLAW_QA_TELEGRAM_GROUP_ID", "OPENCLAW_QA_TELEGRAM_DRIVER_BOT_TOKEN", @@ -427,6 +438,7 @@ async function waitForTelegramChannelRunning( } function renderTelegramQaMarkdown(params: { + cleanupIssues: string[]; groupId: string; startedAt: string; finishedAt: string; @@ -449,6 +461,14 @@ function renderTelegramQaMarkdown(params: { lines.push(`- Details: ${scenario.details}`); lines.push(""); } + if (params.cleanupIssues.length > 0) { + lines.push("## Cleanup"); + lines.push(""); + for (const issue of params.cleanupIssues) { + lines.push(`- ${issue}`); + } + lines.push(""); + } return lines.join("\n"); } @@ -475,18 +495,11 @@ function buildObservedMessagesArtifact(params: { } function findScenario(ids?: string[]) { - if (!ids || ids.length === 0) { - return [...TELEGRAM_QA_SCENARIOS]; - } - const requested = new Set(ids); - const selected = TELEGRAM_QA_SCENARIOS.filter((scenario) => ids.includes(scenario.id)); - const missingIds = [...requested].filter( - (id) => !selected.some((scenario) => scenario.id === id), - ); - if (missingIds.length > 0) { - throw new Error(`unknown Telegram QA scenario id(s): ${missingIds.join(", ")}`); - } - return selected; + return selectLiveTransportScenarios({ + ids, + laneLabel: "Telegram", + scenarios: TELEGRAM_QA_SCENARIOS, + }); } function classifyCanaryReply(params: { @@ -699,7 +712,7 @@ export async function runTelegramQaLive(params: { flushTelegramUpdates(runtimeEnv.sutToken), ]); - const gateway = await startQaGatewayChild({ + const gatewayHarness = await startQaLiveLaneGateway({ repoRoot, qaBusBaseUrl: "http://127.0.0.1:43123", providerMode, @@ -717,9 +730,10 @@ export async function runTelegramQaLive(params: { }); const scenarioResults: TelegramQaScenarioResult[] = []; + const cleanupIssues: string[] = []; let canaryFailure: string | null = null; try { - await waitForTelegramChannelRunning(gateway, sutAccountId); + await waitForTelegramChannelRunning(gatewayHarness.gateway, sutAccountId); try { await runCanary({ driverToken: runtimeEnv.driverToken, @@ -782,7 +796,11 @@ export async function runTelegramQaLive(params: { } } } finally { - await gateway.stop(); + try { + await gatewayHarness.stop(); + } catch (error) { + appendLiveLaneIssue(cleanupIssues, "live gateway cleanup", error); + } } const finishedAt = new Date().toISOString(); @@ -790,6 +808,7 @@ export async function runTelegramQaLive(params: { groupId: runtimeEnv.groupId, startedAt, finishedAt, + cleanupIssues, counts: { total: scenarioResults.length, passed: scenarioResults.filter((entry) => entry.status === "pass").length, @@ -803,6 +822,7 @@ export async function runTelegramQaLive(params: { await fs.writeFile( reportPath, `${renderTelegramQaMarkdown({ + cleanupIssues, groupId: runtimeEnv.groupId, startedAt, finishedAt, @@ -826,9 +846,26 @@ export async function runTelegramQaLive(params: { )}\n`, { encoding: "utf8", mode: 0o600 }, ); + const artifactPaths = { + report: reportPath, + summary: summaryPath, + observedMessages: observedMessagesPath, + }; if (canaryFailure) { throw new Error( - `${canaryFailure}\nArtifacts:\n- report: ${reportPath}\n- summary: ${summaryPath}\n- observedMessages: ${observedMessagesPath}`, + buildLiveLaneArtifactsError({ + heading: canaryFailure, + artifacts: artifactPaths, + }), + ); + } + if (cleanupIssues.length > 0) { + throw new Error( + buildLiveLaneArtifactsError({ + heading: "Telegram QA cleanup failed after artifacts were written.", + details: cleanupIssues, + artifacts: artifactPaths, + }), ); } @@ -843,6 +880,7 @@ export async function runTelegramQaLive(params: { export const __testing = { TELEGRAM_QA_SCENARIOS, + TELEGRAM_QA_STANDARD_SCENARIO_IDS, buildTelegramQaConfig, buildObservedMessagesArtifact, canaryFailureMessage, diff --git a/extensions/qa-lab/src/mock-openai-server.test.ts b/extensions/qa-lab/src/mock-openai-server.test.ts index 7684470715c..2101bcbab8b 100644 --- a/extensions/qa-lab/src/mock-openai-server.test.ts +++ b/extensions/qa-lab/src/mock-openai-server.test.ts @@ -593,6 +593,55 @@ describe("qa mock openai server", () => { }); }); + it("uses the latest exact marker directive from conversation history", async () => { + const server = await startQaMockOpenAiServer({ + host: "127.0.0.1", + port: 0, + }); + cleanups.push(async () => { + await server.stop(); + }); + + const response = await fetch(`${server.baseUrl}/v1/responses`, { + method: "POST", + headers: { + "content-type": "application/json", + }, + body: JSON.stringify({ + stream: false, + input: [ + { + role: "user", + content: [ + { + type: "input_text", + text: "Earlier turn: reply with only this exact marker: OLD_TOKEN", + }, + ], + }, + { + role: "user", + content: [ + { + type: "input_text", + text: "Current turn: reply with only this exact marker: NEW_TOKEN", + }, + ], + }, + ], + }), + }); + + expect(response.status).toBe(200); + expect(await response.json()).toMatchObject({ + output: [ + { + content: [{ text: "NEW_TOKEN" }], + }, + ], + }); + }); + it("records image inputs and describes attached images", async () => { const server = await startQaMockOpenAiServer({ host: "127.0.0.1", diff --git a/extensions/qa-lab/src/mock-openai-server.ts b/extensions/qa-lab/src/mock-openai-server.ts index ff0f5324779..b44db8424ff 100644 --- a/extensions/qa-lab/src/mock-openai-server.ts +++ b/extensions/qa-lab/src/mock-openai-server.ts @@ -320,22 +320,30 @@ function extractOrbitCode(text: string) { return /\bORBIT-\d+\b/i.exec(text)?.[0]?.toUpperCase() ?? null; } -function extractExactReplyDirective(text: string) { - const colonMatch = /reply(?: with)? exactly:\s*([^\n]+)/i.exec(text); - if (colonMatch?.[1]) { - return colonMatch[1].trim(); +function extractLastCapture(text: string, pattern: RegExp) { + let lastMatch: RegExpExecArray | null = null; + const flags = pattern.flags.includes("g") ? pattern.flags : `${pattern.flags}g`; + const globalPattern = new RegExp(pattern.source, flags); + for (let match = globalPattern.exec(text); match; match = globalPattern.exec(text)) { + lastMatch = match; } - const backtickedMatch = /reply(?: with)? exactly\s+`([^`]+)`/i.exec(text); - return backtickedMatch?.[1]?.trim() || null; + return lastMatch?.[1]?.trim() || null; +} + +function extractExactReplyDirective(text: string) { + const colonMatch = extractLastCapture(text, /reply(?: with)? exactly:\s*([^\n]+)/i); + if (colonMatch) { + return colonMatch; + } + return extractLastCapture(text, /reply(?: with)? exactly\s+`([^`]+)`/i); } function extractExactMarkerDirective(text: string) { - const backtickedMatch = /exact marker:\s*`([^`]+)`/i.exec(text); - if (backtickedMatch?.[1]) { - return backtickedMatch[1].trim(); + const backtickedMatch = extractLastCapture(text, /exact marker:\s*`([^`]+)`/i); + if (backtickedMatch) { + return backtickedMatch; } - const plainMatch = /exact marker:\s*([^\s`.,;:!?]+(?:-[^\s`.,;:!?]+)*)/i.exec(text); - return plainMatch?.[1]?.trim() || null; + return extractLastCapture(text, /exact marker:\s*([^\s`.,;:!?]+(?:-[^\s`.,;:!?]+)*)/i); } function isHeartbeatPrompt(text: string) { diff --git a/extensions/qa-lab/src/qa-gateway-config.test.ts b/extensions/qa-lab/src/qa-gateway-config.test.ts index c3b2989ff09..5c23e94f23c 100644 --- a/extensions/qa-lab/src/qa-gateway-config.test.ts +++ b/extensions/qa-lab/src/qa-gateway-config.test.ts @@ -31,10 +31,27 @@ describe("buildQaGatewayConfig", () => { expect(cfg.models?.providers?.["mock-openai"]?.baseUrl).toBe("http://127.0.0.1:44080/v1"); expect(cfg.plugins?.allow).toEqual(["memory-core", "qa-channel"]); expect(cfg.plugins?.entries?.["memory-core"]).toEqual({ enabled: true }); + expect(cfg.plugins?.entries?.["qa-channel"]).toEqual({ enabled: true }); expect(cfg.plugins?.entries?.openai).toBeUndefined(); expect(cfg.gateway?.reload?.deferralTimeoutMs).toBe(1_000); }); + it("can omit qa-channel for live transport gateway children", () => { + const cfg = buildQaGatewayConfig({ + bind: "loopback", + gatewayPort: 18789, + gatewayToken: "token", + providerBaseUrl: "http://127.0.0.1:44080/v1", + qaBusBaseUrl: "http://127.0.0.1:43124", + includeQaChannel: false, + workspaceDir: "/tmp/qa-workspace", + }); + + expect(cfg.plugins?.allow).toEqual(["memory-core"]); + expect(cfg.plugins?.entries?.["qa-channel"]).toBeUndefined(); + expect(cfg.channels?.["qa-channel"]).toBeUndefined(); + }); + it("uses built-in provider wiring in frontier live mode", () => { const cfg = buildQaGatewayConfig({ bind: "loopback", diff --git a/extensions/qa-lab/src/qa-gateway-config.ts b/extensions/qa-lab/src/qa-gateway-config.ts index d027b9a55c5..413dce79abe 100644 --- a/extensions/qa-lab/src/qa-gateway-config.ts +++ b/extensions/qa-lab/src/qa-gateway-config.ts @@ -57,6 +57,7 @@ export function buildQaGatewayConfig(params: { gatewayToken: string; providerBaseUrl?: string; qaBusBaseUrl: string; + includeQaChannel?: boolean; workspaceDir: string; controlUiRoot?: string; controlUiAllowedOrigins?: string[]; @@ -71,6 +72,7 @@ export function buildQaGatewayConfig(params: { fastMode?: boolean; thinkingDefault?: QaThinkingLevel; }): OpenClawConfig { + const includeQaChannel = params.includeQaChannel !== false; const mockProviderBaseUrl = params.providerBaseUrl ?? "http://127.0.0.1:44080/v1"; const mockOpenAiProvider: ModelProviderConfig = { baseUrl: mockProviderBaseUrl, @@ -167,8 +169,8 @@ export function buildQaGatewayConfig(params: { : {}; const allowedPlugins = providerMode === "live-frontier" - ? ["memory-core", ...selectedPluginIds, "qa-channel"] - : ["memory-core", "qa-channel"]; + ? ["memory-core", ...selectedPluginIds, ...(includeQaChannel ? ["qa-channel"] : [])] + : ["memory-core", ...(includeQaChannel ? ["qa-channel"] : [])]; const liveModelParams = providerMode === "live-frontier" ? (modelRef: string) => ({ @@ -197,6 +199,7 @@ export function buildQaGatewayConfig(params: { enabled: true, }, ...pluginEntries, + ...(includeQaChannel ? { "qa-channel": { enabled: true } } : {}), }, }, agents: { @@ -304,16 +307,20 @@ export function buildQaGatewayConfig(params: { mode: "off", }, }, - channels: { - "qa-channel": { - enabled: true, - baseUrl: params.qaBusBaseUrl, - botUserId: "openclaw", - botDisplayName: "OpenClaw QA", - allowFrom: ["*"], - pollTimeoutMs: 250, - }, - }, + ...(includeQaChannel + ? { + channels: { + "qa-channel": { + enabled: true, + baseUrl: params.qaBusBaseUrl, + botUserId: "openclaw", + botDisplayName: "OpenClaw QA", + allowFrom: ["*"], + pollTimeoutMs: 250, + }, + }, + } + : {}), messages: { groupChat: { mentionPatterns: ["\\b@?openclaw\\b"], diff --git a/src/plugin-sdk/status-helpers.test.ts b/src/plugin-sdk/status-helpers.test.ts index f38b14c946f..45698e51380 100644 --- a/src/plugin-sdk/status-helpers.test.ts +++ b/src/plugin-sdk/status-helpers.test.ts @@ -318,6 +318,34 @@ describe("buildRuntimeAccountStatusSnapshot", () => { port: 3978, }, }, + { + name: "preserves runtime connectivity metadata", + input: { + runtime: { + connected: true, + restartPending: true, + reconnectAttempts: 3, + lastConnectedAt: 11, + lastDisconnect: { at: 12, error: "boom" }, + lastEventAt: 13, + healthState: "healthy", + running: true, + }, + }, + extra: undefined, + expected: { + ...defaultRuntimeState, + running: true, + connected: true, + restartPending: true, + reconnectAttempts: 3, + lastConnectedAt: 11, + lastDisconnect: { at: 12, error: "boom" }, + lastEventAt: 13, + healthState: "healthy", + probe: undefined, + }, + }, ])("$name", ({ input, extra, expected }) => { expect(buildRuntimeAccountStatusSnapshot(input, extra)).toEqual(expected); }); diff --git a/src/plugin-sdk/status-helpers.ts b/src/plugin-sdk/status-helpers.ts index 4a9cb3ec704..71523bfffdb 100644 --- a/src/plugin-sdk/status-helpers.ts +++ b/src/plugin-sdk/status-helpers.ts @@ -16,6 +16,21 @@ export { type RuntimeLifecycleSnapshot = { running?: boolean | null; + connected?: boolean | null; + restartPending?: boolean | null; + reconnectAttempts?: number | null; + lastConnectedAt?: number | null; + lastDisconnect?: + | string + | { + at: number; + status?: number; + error?: string; + loggedOut?: boolean; + } + | null; + lastEventAt?: number | null; + healthState?: string | null; lastStartAt?: number | null; lastStopAt?: number | null; lastError?: string | null; @@ -282,6 +297,19 @@ export function buildRuntimeAccountStatusSnapshot