From d215f157c20b73be164672f92305b21bbbd1a224 Mon Sep 17 00:00:00 2001 From: Gustavo Madeira Santana Date: Tue, 14 Apr 2026 12:51:50 -0400 Subject: [PATCH] QA: split Matrix runner into optional plugin --- docs/cli/plugins.md | 6 + docs/help/testing.md | 27 +- extensions/qa-lab/runtime-api.ts | 1 + extensions/qa-lab/src/cli.runtime.test.ts | 53 ---- extensions/qa-lab/src/cli.test.ts | 68 ++--- extensions/qa-lab/src/cli.ts | 4 +- extensions/qa-lab/src/live-transports/cli.ts | 30 +- extensions/qa-matrix/cli.runtime.ts | 1 + extensions/qa-matrix/cli.ts | 1 + extensions/qa-matrix/index.ts | 8 + extensions/qa-matrix/openclaw.plugin.json | 10 + extensions/qa-matrix/package.json | 38 +++ extensions/qa-matrix/runtime-api.ts | 4 + extensions/qa-matrix/runtime.ts | 1 + extensions/qa-matrix/src/cli-options.ts | 4 + extensions/qa-matrix/src/cli-paths.ts | 16 + .../src}/cli.runtime.test.ts | 2 +- .../matrix => qa-matrix/src}/cli.runtime.ts | 6 +- .../matrix => qa-matrix/src}/cli.ts | 2 +- extensions/qa-matrix/src/docker-runtime.ts | 283 ++++++++++++++++++ extensions/qa-matrix/src/report.ts | 100 +++++++ extensions/qa-matrix/src/run-config.ts | 31 ++ .../src/runners/contract/runtime.test.ts} | 2 +- .../src/runners/contract/runtime.ts} | 55 +++- .../src/runners/contract/scenarios.test.ts} | 6 +- .../src/runners/contract/scenarios.ts} | 4 +- .../qa-matrix/src/shared/live-lane-helpers.ts | 18 ++ .../src/shared/live-transport-cli.runtime.ts | 40 +++ .../src/shared/live-transport-cli.ts | 120 ++++++++ .../src/shared/live-transport-scenarios.ts | 149 +++++++++ .../src/substrate/client.test.ts} | 2 +- .../src/substrate/client.ts} | 0 .../src/substrate/harness.runtime.test.ts} | 6 +- .../src/substrate/harness.runtime.ts} | 2 +- package.json | 11 +- scripts/lib/bundled-plugin-build-entries.mjs | 2 +- .../lib/bundled-runtime-sidecar-paths.json | 1 + scripts/lib/npm-update-compat-sidecars.mjs | 5 - scripts/lib/plugin-sdk-entrypoints.json | 2 + src/plugin-sdk/qa-lab-runtime.ts | 32 ++ src/plugin-sdk/qa-matrix.test.ts | 48 +++ src/plugin-sdk/qa-matrix.ts | 36 +++ src/plugins/bundled-plugin-metadata.test.ts | 5 +- src/plugins/runtime-sidecar-paths-baseline.ts | 2 +- 44 files changed, 1097 insertions(+), 147 deletions(-) create mode 100644 extensions/qa-matrix/cli.runtime.ts create mode 100644 extensions/qa-matrix/cli.ts create mode 100644 extensions/qa-matrix/index.ts create mode 100644 extensions/qa-matrix/openclaw.plugin.json create mode 100644 extensions/qa-matrix/package.json create mode 100644 extensions/qa-matrix/runtime-api.ts create mode 100644 extensions/qa-matrix/runtime.ts create mode 100644 extensions/qa-matrix/src/cli-options.ts create mode 100644 extensions/qa-matrix/src/cli-paths.ts rename extensions/{qa-lab/src/live-transports/matrix => qa-matrix/src}/cli.runtime.test.ts (96%) rename extensions/{qa-lab/src/live-transports/matrix => qa-matrix/src}/cli.runtime.ts (77%) rename extensions/{qa-lab/src/live-transports/matrix => qa-matrix/src}/cli.ts (96%) create mode 100644 extensions/qa-matrix/src/docker-runtime.ts create mode 100644 extensions/qa-matrix/src/report.ts create mode 100644 extensions/qa-matrix/src/run-config.ts rename extensions/{qa-lab/src/live-transports/matrix/matrix-live.runtime.test.ts => qa-matrix/src/runners/contract/runtime.test.ts} (99%) rename extensions/{qa-lab/src/live-transports/matrix/matrix-live.runtime.ts => qa-matrix/src/runners/contract/runtime.ts} (91%) rename extensions/{qa-lab/src/live-transports/matrix/matrix-live-scenarios.test.ts => qa-matrix/src/runners/contract/scenarios.test.ts} (97%) rename extensions/{qa-lab/src/live-transports/matrix/matrix-live-scenarios.ts => qa-matrix/src/runners/contract/scenarios.ts} (99%) create mode 100644 extensions/qa-matrix/src/shared/live-lane-helpers.ts create mode 100644 extensions/qa-matrix/src/shared/live-transport-cli.runtime.ts create mode 100644 extensions/qa-matrix/src/shared/live-transport-cli.ts create mode 100644 extensions/qa-matrix/src/shared/live-transport-scenarios.ts rename extensions/{qa-lab/src/live-transports/matrix/matrix-driver-client.test.ts => qa-matrix/src/substrate/client.test.ts} (99%) rename extensions/{qa-lab/src/live-transports/matrix/matrix-driver-client.ts => qa-matrix/src/substrate/client.ts} (100%) rename extensions/{qa-lab/src/live-transports/matrix/matrix-harness.runtime.test.ts => qa-matrix/src/substrate/harness.runtime.test.ts} (98%) rename extensions/{qa-lab/src/live-transports/matrix/matrix-harness.runtime.ts => qa-matrix/src/substrate/harness.runtime.ts} (99%) create mode 100644 src/plugin-sdk/qa-lab-runtime.ts create mode 100644 src/plugin-sdk/qa-matrix.test.ts create mode 100644 src/plugin-sdk/qa-matrix.ts diff --git a/docs/cli/plugins.md b/docs/cli/plugins.md index 0ed8425cd3a..b6ccda80c40 100644 --- a/docs/cli/plugins.md +++ b/docs/cli/plugins.md @@ -197,6 +197,12 @@ Use `--link` to avoid copying a local directory (adds to `plugins.load.paths`): openclaw plugins install -l ./my-plugin ``` +Repo QA example: + +```bash +openclaw plugins install -l ./extensions/qa-matrix +``` + `--force` is not supported with `--link` because linked installs reuse the source path instead of copying over a managed install target. diff --git a/docs/help/testing.md b/docs/help/testing.md index 26924635889..746ad03cea4 100644 --- a/docs/help/testing.md +++ b/docs/help/testing.md @@ -67,6 +67,10 @@ These commands sit beside the main test suites when you need QA-lab realism: - Starts the Docker-backed QA site for operator-style QA work. - `pnpm openclaw qa matrix` - Runs the Matrix live QA lane against a disposable Docker-backed Tuwunel homeserver. + - Packaged OpenClaw installs require the optional Matrix runner plugin first: + `openclaw plugins install @openclaw/qa-matrix`. + - Repo checkouts can link the in-tree plugin directly: + `openclaw plugins install -l ./extensions/qa-matrix`. - Provisions three temporary Matrix users (`driver`, `sut`, `observer`) plus one private room, then starts a QA gateway child with the real Matrix plugin as the SUT transport. - Uses the pinned stable Tuwunel image `ghcr.io/matrix-construct/tuwunel:v1.5.1` by default. Override with `OPENCLAW_QA_MATRIX_TUWUNEL_IMAGE` when you need to test a different image. - Matrix currently supports only `--credential-source env` because the lane provisions disposable users locally. @@ -170,11 +174,12 @@ Adding a channel to the markdown QA system requires exactly two things: 1. A transport adapter for the channel. 2. A scenario pack that exercises the channel contract. -Do not add a channel-specific QA runner when the shared `qa-lab` runner can +Do not add a new top-level QA command root when the shared `qa-lab` host can own the flow. -`qa-lab` owns the shared mechanics: +`qa-lab` owns the shared host mechanics: +- the `openclaw qa` command root - suite startup and teardown - worker concurrency - artifact writing @@ -182,8 +187,9 @@ own the flow. - scenario execution - compatibility aliases for older `qa-channel` scenarios -The channel adapter owns the transport contract: +Runner plugins own the transport contract: +- how `openclaw qa ` is mounted beneath the shared `qa` root - how the gateway is configured for that transport - how readiness is checked - how inbound events are injected @@ -194,17 +200,18 @@ The channel adapter owns the transport contract: The minimum adoption bar for a new channel is: -1. Implement the transport adapter on the shared `qa-lab` seam. -2. Register the adapter in the transport registry. -3. Keep transport-specific mechanics inside the adapter or the channel harness. -4. Author or adapt markdown scenarios under `qa/scenarios/`. -5. Use the generic scenario helpers for new scenarios. -6. Keep existing compatibility aliases working unless the repo is doing an intentional migration. +1. Keep `qa-lab` as the owner of the shared `qa` root. +2. Implement the transport runner on the shared `qa-lab` host seam. +3. Keep transport-specific mechanics inside the runner plugin or channel harness. +4. Mount the runner as `openclaw qa ` instead of registering a competing root command. +5. Author or adapt markdown scenarios under `qa/scenarios/`. +6. Use the generic scenario helpers for new scenarios. +7. Keep existing compatibility aliases working unless the repo is doing an intentional migration. The decision rule is strict: - If behavior can be expressed once in `qa-lab`, put it in `qa-lab`. -- If behavior depends on one channel transport, keep it in that adapter or plugin harness. +- If behavior depends on one channel transport, keep it in that runner plugin or plugin harness. - If a scenario needs a new capability that more than one channel can use, add a generic helper instead of a channel-specific branch in `suite.ts`. - If a behavior is only meaningful for one transport, keep the scenario transport-specific and make that explicit in the scenario contract. diff --git a/extensions/qa-lab/runtime-api.ts b/extensions/qa-lab/runtime-api.ts index 801051438fb..0d61dc0b7b9 100644 --- a/extensions/qa-lab/runtime-api.ts +++ b/extensions/qa-lab/runtime-api.ts @@ -1 +1,2 @@ export * from "./src/runtime-api.js"; +export { startQaLiveLaneGateway } from "./src/live-transports/shared/live-gateway.runtime.js"; diff --git a/extensions/qa-lab/src/cli.runtime.test.ts b/extensions/qa-lab/src/cli.runtime.test.ts index 29732d7dea3..984d6b0d401 100644 --- a/extensions/qa-lab/src/cli.runtime.test.ts +++ b/extensions/qa-lab/src/cli.runtime.test.ts @@ -8,7 +8,6 @@ const { runQaSuiteFromRuntime, runQaCharacterEval, runQaMultipass, - runMatrixQaLive, runTelegramQaLive, startQaLabServer, writeQaDockerHarnessFiles, @@ -20,7 +19,6 @@ const { runQaSuiteFromRuntime: vi.fn(), runQaCharacterEval: vi.fn(), runQaMultipass: vi.fn(), - runMatrixQaLive: vi.fn(), runTelegramQaLive: vi.fn(), startQaLabServer: vi.fn(), writeQaDockerHarnessFiles: vi.fn(), @@ -52,10 +50,6 @@ vi.mock("./multipass.runtime.js", () => ({ runQaMultipass, })); -vi.mock("./live-transports/matrix/matrix-live.runtime.js", () => ({ - runMatrixQaLive, -})); - vi.mock("./live-transports/telegram/telegram-live.runtime.js", () => ({ runTelegramQaLive, })); @@ -88,7 +82,6 @@ import { runQaParityReportCommand, runQaSuiteCommand, } from "./cli.runtime.js"; -import { runQaMatrixCommand } from "./live-transports/matrix/cli.runtime.js"; import { runQaTelegramCommand } from "./live-transports/telegram/cli.runtime.js"; describe("qa cli runtime", () => { @@ -100,7 +93,6 @@ describe("qa cli runtime", () => { runQaCharacterEval.mockReset(); runQaManualLane.mockReset(); runQaMultipass.mockReset(); - runMatrixQaLive.mockReset(); runTelegramQaLive.mockReset(); startQaLabServer.mockReset(); writeQaDockerHarnessFiles.mockReset(); @@ -139,13 +131,6 @@ describe("qa cli runtime", () => { vmName: "openclaw-qa-test", scenarioIds: ["channel-chat-baseline"], }); - runMatrixQaLive.mockResolvedValue({ - outputDir: "/tmp/matrix", - reportPath: "/tmp/matrix/report.md", - summaryPath: "/tmp/matrix/summary.json", - observedEventsPath: "/tmp/matrix/observed.json", - scenarios: [], - }); runTelegramQaLive.mockResolvedValue({ outputDir: "/tmp/telegram", reportPath: "/tmp/telegram/report.md", @@ -226,30 +211,6 @@ describe("qa cli runtime", () => { }); }); - it("resolves matrix qa repo-root-relative paths before dispatching", async () => { - await runQaMatrixCommand({ - repoRoot: "/tmp/openclaw-repo", - outputDir: ".artifacts/qa/matrix", - providerMode: "live-frontier", - primaryModel: "openai/gpt-5.4", - alternateModel: "openai/gpt-5.4", - fastMode: true, - scenarioIds: ["matrix-thread-follow-up"], - sutAccountId: "sut-live", - }); - - expect(runMatrixQaLive).toHaveBeenCalledWith({ - repoRoot: path.resolve("/tmp/openclaw-repo"), - outputDir: path.resolve("/tmp/openclaw-repo", ".artifacts/qa/matrix"), - providerMode: "live-frontier", - primaryModel: "openai/gpt-5.4", - alternateModel: "openai/gpt-5.4", - fastMode: true, - scenarioIds: ["matrix-thread-follow-up"], - sutAccountId: "sut-live", - }); - }); - it("rejects output dirs that escape the repo root", () => { expect(() => resolveRepoRelativeOutputDir("/tmp/openclaw-repo", "../outside")).toThrow( "--output-dir must stay within the repo root.", @@ -273,20 +234,6 @@ describe("qa cli runtime", () => { ); }); - it("defaults matrix qa runs onto the live provider lane", async () => { - await runQaMatrixCommand({ - repoRoot: "/tmp/openclaw-repo", - scenarioIds: ["matrix-thread-follow-up"], - }); - - expect(runMatrixQaLive).toHaveBeenCalledWith( - expect.objectContaining({ - repoRoot: path.resolve("/tmp/openclaw-repo"), - providerMode: "live-frontier", - }), - ); - }); - it("normalizes legacy live-openai suite runs onto the frontier provider mode", async () => { await runQaSuiteCommand({ repoRoot: "/tmp/openclaw-repo", diff --git a/extensions/qa-lab/src/cli.test.ts b/extensions/qa-lab/src/cli.test.ts index 5fc231ce621..e9d31a22429 100644 --- a/extensions/qa-lab/src/cli.test.ts +++ b/extensions/qa-lab/src/cli.test.ts @@ -5,18 +5,24 @@ const { runQaCredentialsAddCommand, runQaCredentialsListCommand, runQaCredentialsRemoveCommand, - runQaMatrixCommand, runQaTelegramCommand, } = vi.hoisted(() => ({ runQaCredentialsAddCommand: vi.fn(), runQaCredentialsListCommand: vi.fn(), runQaCredentialsRemoveCommand: vi.fn(), - runQaMatrixCommand: vi.fn(), runQaTelegramCommand: vi.fn(), })); -vi.mock("./live-transports/matrix/cli.runtime.js", () => ({ - runQaMatrixCommand, +const { isMatrixQaCliAvailable, registerMatrixQaCli } = vi.hoisted(() => ({ + isMatrixQaCliAvailable: vi.fn(() => true), + registerMatrixQaCli: vi.fn((qa: Command) => { + qa.command("matrix").action(() => undefined); + }), +})); + +vi.mock("openclaw/plugin-sdk/qa-matrix", () => ({ + isMatrixQaCliAvailable, + registerMatrixQaCli, })); vi.mock("./live-transports/telegram/cli.runtime.js", () => ({ @@ -36,12 +42,13 @@ describe("qa cli registration", () => { beforeEach(() => { program = new Command(); - registerQaLabCli(program); runQaCredentialsAddCommand.mockReset(); runQaCredentialsListCommand.mockReset(); runQaCredentialsRemoveCommand.mockReset(); - runQaMatrixCommand.mockReset(); runQaTelegramCommand.mockReset(); + isMatrixQaCliAvailable.mockClear().mockReturnValue(true); + registerMatrixQaCli.mockClear(); + registerQaLabCli(program); }); afterEach(() => { @@ -56,43 +63,20 @@ describe("qa cli registration", () => { ); }); - it("routes matrix CLI flags into the lane runtime", async () => { - await program.parseAsync([ - "node", - "openclaw", - "qa", - "matrix", - "--repo-root", - "/tmp/openclaw-repo", - "--output-dir", - ".artifacts/qa/matrix", - "--provider-mode", - "mock-openai", - "--model", - "mock-openai/gpt-5.4", - "--alt-model", - "mock-openai/gpt-5.4-alt", - "--scenario", - "matrix-thread-follow-up", - "--scenario", - "matrix-thread-isolation", - "--fast", - "--sut-account", - "sut-live", - ]); + it("delegates matrix command registration to the qa-matrix facade", () => { + expect(registerMatrixQaCli).toHaveBeenCalledTimes(1); + }); - expect(runQaMatrixCommand).toHaveBeenCalledWith({ - repoRoot: "/tmp/openclaw-repo", - outputDir: ".artifacts/qa/matrix", - providerMode: "mock-openai", - primaryModel: "mock-openai/gpt-5.4", - alternateModel: "mock-openai/gpt-5.4-alt", - fastMode: true, - scenarioIds: ["matrix-thread-follow-up", "matrix-thread-isolation"], - sutAccountId: "sut-live", - credentialSource: undefined, - credentialRole: undefined, - }); + it("shows an install hint when the matrix runner plugin is unavailable", async () => { + isMatrixQaCliAvailable.mockReset().mockReturnValue(false); + registerMatrixQaCli.mockReset(); + const missingProgram = new Command(); + registerQaLabCli(missingProgram); + + await expect(missingProgram.parseAsync(["node", "openclaw", "qa", "matrix"])).rejects.toThrow( + "openclaw plugins install @openclaw/qa-matrix", + ); + expect(registerMatrixQaCli).not.toHaveBeenCalled(); }); it("routes telegram CLI defaults into the lane runtime", async () => { diff --git a/extensions/qa-lab/src/cli.ts b/extensions/qa-lab/src/cli.ts index 3eb0b6f8a0f..0bce3eefe8f 100644 --- a/extensions/qa-lab/src/cli.ts +++ b/extensions/qa-lab/src/cli.ts @@ -1,6 +1,6 @@ import type { Command } from "commander"; import { collectString } from "./cli-options.js"; -import { LIVE_TRANSPORT_QA_CLI_REGISTRATIONS } from "./live-transports/cli.js"; +import { listLiveTransportQaCliRegistrations } from "./live-transports/cli.js"; import type { QaProviderModeInput } from "./run-config.js"; import { hasQaScenarioPack } from "./scenario-catalog.js"; @@ -284,7 +284,7 @@ export function registerQaLabCli(program: Command) { }, ); - for (const lane of LIVE_TRANSPORT_QA_CLI_REGISTRATIONS) { + for (const lane of listLiveTransportQaCliRegistrations()) { lane.register(qa); } diff --git a/extensions/qa-lab/src/live-transports/cli.ts b/extensions/qa-lab/src/live-transports/cli.ts index 2038ff80a8c..940b20d236d 100644 --- a/extensions/qa-lab/src/live-transports/cli.ts +++ b/extensions/qa-lab/src/live-transports/cli.ts @@ -1,8 +1,34 @@ -import { matrixQaCliRegistration } from "./matrix/cli.js"; +import { isMatrixQaCliAvailable, registerMatrixQaCli } from "openclaw/plugin-sdk/qa-matrix"; import type { LiveTransportQaCliRegistration } from "./shared/live-transport-cli.js"; import { telegramQaCliRegistration } from "./telegram/cli.js"; +function createUnavailableMatrixQaCliRegistration(): LiveTransportQaCliRegistration { + return { + commandName: "matrix", + register(qa) { + qa.command("matrix") + .description("Run the Matrix live QA lane (install @openclaw/qa-matrix first)") + .action(() => { + throw new Error( + 'Matrix QA runner not installed. Install it with "openclaw plugins install @openclaw/qa-matrix".', + ); + }); + }, + }; +} + export const LIVE_TRANSPORT_QA_CLI_REGISTRATIONS: readonly LiveTransportQaCliRegistration[] = [ telegramQaCliRegistration, - matrixQaCliRegistration, ]; + +export function listLiveTransportQaCliRegistrations(): readonly LiveTransportQaCliRegistration[] { + return [ + ...LIVE_TRANSPORT_QA_CLI_REGISTRATIONS, + isMatrixQaCliAvailable() + ? { + commandName: "matrix", + register: registerMatrixQaCli, + } + : createUnavailableMatrixQaCliRegistration(), + ]; +} diff --git a/extensions/qa-matrix/cli.runtime.ts b/extensions/qa-matrix/cli.runtime.ts new file mode 100644 index 00000000000..4959d167a37 --- /dev/null +++ b/extensions/qa-matrix/cli.runtime.ts @@ -0,0 +1 @@ +export { runQaMatrixCommand } from "./src/cli.runtime.js"; diff --git a/extensions/qa-matrix/cli.ts b/extensions/qa-matrix/cli.ts new file mode 100644 index 00000000000..273931da137 --- /dev/null +++ b/extensions/qa-matrix/cli.ts @@ -0,0 +1 @@ +export { registerMatrixQaCli } from "./src/cli.js"; diff --git a/extensions/qa-matrix/index.ts b/extensions/qa-matrix/index.ts new file mode 100644 index 00000000000..6c0d8629ff0 --- /dev/null +++ b/extensions/qa-matrix/index.ts @@ -0,0 +1,8 @@ +import { definePluginEntry } from "./runtime-api.js"; + +export default definePluginEntry({ + id: "qa-matrix", + name: "QA Matrix", + description: "Matrix QA transport runner and substrate", + register() {}, +}); diff --git a/extensions/qa-matrix/openclaw.plugin.json b/extensions/qa-matrix/openclaw.plugin.json new file mode 100644 index 00000000000..0a40ce77732 --- /dev/null +++ b/extensions/qa-matrix/openclaw.plugin.json @@ -0,0 +1,10 @@ +{ + "id": "qa-matrix", + "name": "QA Matrix", + "description": "Matrix QA transport runner and substrate", + "configSchema": { + "type": "object", + "additionalProperties": false, + "properties": {} + } +} diff --git a/extensions/qa-matrix/package.json b/extensions/qa-matrix/package.json new file mode 100644 index 00000000000..a36f1f12a3b --- /dev/null +++ b/extensions/qa-matrix/package.json @@ -0,0 +1,38 @@ +{ + "name": "@openclaw/qa-matrix", + "version": "2026.4.12", + "description": "OpenClaw Matrix QA runner plugin", + "type": "module", + "devDependencies": { + "@openclaw/plugin-sdk": "workspace:*", + "openclaw": "workspace:*" + }, + "peerDependencies": { + "openclaw": ">=2026.4.12" + }, + "peerDependenciesMeta": { + "openclaw": { + "optional": true + } + }, + "openclaw": { + "extensions": [ + "./index.ts" + ], + "install": { + "npmSpec": "@openclaw/qa-matrix", + "defaultChoice": "npm", + "minHostVersion": ">=2026.4.12" + }, + "compat": { + "pluginApi": ">=2026.4.12" + }, + "build": { + "openclawVersion": "2026.4.12" + }, + "release": { + "publishToClawHub": true, + "publishToNpm": true + } + } +} diff --git a/extensions/qa-matrix/runtime-api.ts b/extensions/qa-matrix/runtime-api.ts new file mode 100644 index 00000000000..57f2ea533d5 --- /dev/null +++ b/extensions/qa-matrix/runtime-api.ts @@ -0,0 +1,4 @@ +export { definePluginEntry } from "openclaw/plugin-sdk/plugin-entry"; +export { registerMatrixQaCli } from "./cli.js"; +export { runQaMatrixCommand } from "./cli.runtime.js"; +export { runMatrixQaLive } from "./runtime.js"; diff --git a/extensions/qa-matrix/runtime.ts b/extensions/qa-matrix/runtime.ts new file mode 100644 index 00000000000..6e56bad1416 --- /dev/null +++ b/extensions/qa-matrix/runtime.ts @@ -0,0 +1 @@ +export { runMatrixQaLive } from "./src/runners/contract/runtime.js"; diff --git a/extensions/qa-matrix/src/cli-options.ts b/extensions/qa-matrix/src/cli-options.ts new file mode 100644 index 00000000000..1576d43291f --- /dev/null +++ b/extensions/qa-matrix/src/cli-options.ts @@ -0,0 +1,4 @@ +export function collectString(value: string, previous: string[]) { + const trimmed = value.trim(); + return trimmed ? [...previous, trimmed] : previous; +} diff --git a/extensions/qa-matrix/src/cli-paths.ts b/extensions/qa-matrix/src/cli-paths.ts new file mode 100644 index 00000000000..529527fdff6 --- /dev/null +++ b/extensions/qa-matrix/src/cli-paths.ts @@ -0,0 +1,16 @@ +import path from "node:path"; + +export function resolveRepoRelativeOutputDir(repoRoot: string, outputDir?: string) { + if (!outputDir) { + return undefined; + } + if (path.isAbsolute(outputDir)) { + throw new Error("--output-dir must be a relative path inside the repo root."); + } + const resolved = path.resolve(repoRoot, outputDir); + const relative = path.relative(repoRoot, resolved); + if (relative.startsWith("..") || path.isAbsolute(relative)) { + throw new Error("--output-dir must stay within the repo root."); + } + return resolved; +} diff --git a/extensions/qa-lab/src/live-transports/matrix/cli.runtime.test.ts b/extensions/qa-matrix/src/cli.runtime.test.ts similarity index 96% rename from extensions/qa-lab/src/live-transports/matrix/cli.runtime.test.ts rename to extensions/qa-matrix/src/cli.runtime.test.ts index a73080273e1..81a91e3ee9e 100644 --- a/extensions/qa-lab/src/live-transports/matrix/cli.runtime.test.ts +++ b/extensions/qa-matrix/src/cli.runtime.test.ts @@ -2,7 +2,7 @@ import { describe, expect, it, vi } from "vitest"; const runMatrixQaLive = vi.hoisted(() => vi.fn()); -vi.mock("./matrix-live.runtime.js", () => ({ +vi.mock("./runners/contract/runtime.js", () => ({ runMatrixQaLive, })); diff --git a/extensions/qa-lab/src/live-transports/matrix/cli.runtime.ts b/extensions/qa-matrix/src/cli.runtime.ts similarity index 77% rename from extensions/qa-lab/src/live-transports/matrix/cli.runtime.ts rename to extensions/qa-matrix/src/cli.runtime.ts index f38843aa7dc..485f0ff023d 100644 --- a/extensions/qa-lab/src/live-transports/matrix/cli.runtime.ts +++ b/extensions/qa-matrix/src/cli.runtime.ts @@ -1,9 +1,9 @@ -import type { LiveTransportQaCommandOptions } from "../shared/live-transport-cli.js"; +import { runMatrixQaLive } from "./runners/contract/runtime.js"; +import type { LiveTransportQaCommandOptions } from "./shared/live-transport-cli.js"; import { printLiveTransportQaArtifacts, resolveLiveTransportQaRunOptions, -} from "../shared/live-transport-cli.runtime.js"; -import { runMatrixQaLive } from "./matrix-live.runtime.js"; +} from "./shared/live-transport-cli.runtime.js"; export async function runQaMatrixCommand(opts: LiveTransportQaCommandOptions) { const runOptions = resolveLiveTransportQaRunOptions(opts); diff --git a/extensions/qa-lab/src/live-transports/matrix/cli.ts b/extensions/qa-matrix/src/cli.ts similarity index 96% rename from extensions/qa-lab/src/live-transports/matrix/cli.ts rename to extensions/qa-matrix/src/cli.ts index 9c9cfd88668..836c443c3b9 100644 --- a/extensions/qa-lab/src/live-transports/matrix/cli.ts +++ b/extensions/qa-matrix/src/cli.ts @@ -4,7 +4,7 @@ import { createLiveTransportQaCliRegistration, type LiveTransportQaCliRegistration, type LiveTransportQaCommandOptions, -} from "../shared/live-transport-cli.js"; +} from "./shared/live-transport-cli.js"; type MatrixQaCliRuntime = typeof import("./cli.runtime.js"); diff --git a/extensions/qa-matrix/src/docker-runtime.ts b/extensions/qa-matrix/src/docker-runtime.ts new file mode 100644 index 00000000000..f8b24032a8b --- /dev/null +++ b/extensions/qa-matrix/src/docker-runtime.ts @@ -0,0 +1,283 @@ +import { execFile } from "node:child_process"; +import { createServer } from "node:net"; +import { fetchWithSsrFGuard } from "openclaw/plugin-sdk/ssrf-runtime"; + +export type RunCommand = ( + command: string, + args: string[], + cwd: string, +) => Promise<{ stdout: string; stderr: string }>; + +export type FetchLike = (input: string) => Promise<{ ok: boolean }>; + +export async function fetchHealthUrl(url: string): Promise<{ ok: boolean }> { + const { response, release } = await fetchWithSsrFGuard({ + url, + init: { + signal: AbortSignal.timeout(2_000), + }, + policy: { allowPrivateNetwork: true }, + auditContext: "qa-lab-docker-health-check", + }); + try { + return { ok: response.ok }; + } finally { + await release(); + } +} + +export function describeError(error: unknown) { + if (error instanceof Error) { + return error.message; + } + if (typeof error === "string") { + return error; + } + return JSON.stringify(error); +} + +async function isPortFree(port: number) { + return await new Promise((resolve) => { + const server = createServer(); + server.once("error", () => resolve(false)); + server.listen(port, "127.0.0.1", () => { + server.close(() => resolve(true)); + }); + }); +} + +async function findFreePort() { + return await new Promise((resolve, reject) => { + const server = createServer(); + server.once("error", reject); + server.listen(0, () => { + const address = server.address(); + if (!address || typeof address === "string") { + server.close(); + reject(new Error("failed to find free port")); + return; + } + server.close((error) => { + if (error) { + reject(error); + return; + } + resolve(address.port); + }); + }); + }); +} + +export async function resolveHostPort(preferredPort: number, pinned: boolean) { + if (pinned || (await isPortFree(preferredPort))) { + return preferredPort; + } + return await findFreePort(); +} + +function trimCommandOutput(output: string) { + const trimmed = output.trim(); + if (!trimmed) { + return ""; + } + const lines = trimmed.split("\n"); + return lines.length <= 120 ? trimmed : lines.slice(-120).join("\n"); +} + +export async function execCommand(command: string, args: string[], cwd: string) { + return await new Promise<{ stdout: string; stderr: string }>((resolve, reject) => { + execFile( + command, + args, + { cwd, encoding: "utf8", maxBuffer: 10 * 1024 * 1024 }, + (error, stdout, stderr) => { + if (error) { + const renderedStdout = trimCommandOutput(stdout); + const renderedStderr = trimCommandOutput(stderr); + reject( + new Error( + [ + `Command failed: ${[command, ...args].join(" ")}`, + renderedStderr ? `stderr:\n${renderedStderr}` : "", + renderedStdout ? `stdout:\n${renderedStdout}` : "", + ] + .filter(Boolean) + .join("\n\n"), + ), + ); + return; + } + resolve({ stdout, stderr }); + }, + ); + }); +} + +export async function waitForHealth( + url: string, + deps: { + label?: string; + composeFile?: string; + fetchImpl: FetchLike; + sleepImpl: (ms: number) => Promise; + timeoutMs?: number; + pollMs?: number; + }, +) { + const timeoutMs = deps.timeoutMs ?? 360_000; + const pollMs = deps.pollMs ?? 1_000; + const startMs = Date.now(); + const deadline = startMs + timeoutMs; + let lastError: unknown = null; + + while (Date.now() < deadline) { + try { + const response = await deps.fetchImpl(url); + if (response.ok) { + return; + } + lastError = new Error(`Health check returned non-OK for ${url}`); + } catch (error) { + lastError = error; + } + await deps.sleepImpl(pollMs); + } + + const elapsedSec = Math.round((Date.now() - startMs) / 1000); + const service = deps.label ?? url; + const lines = [ + `${service} did not become healthy within ${elapsedSec}s (limit ${Math.round(timeoutMs / 1000)}s).`, + lastError ? `Last error: ${describeError(lastError)}` : "", + `Hint: check container logs with \`docker compose -f ${deps.composeFile ?? ""} logs\` and verify the port is not already in use.`, + ]; + throw new Error(lines.filter(Boolean).join("\n")); +} + +async function isHealthy(url: string, fetchImpl: FetchLike) { + try { + const response = await fetchImpl(url); + return response.ok; + } catch { + return false; + } +} + +function normalizeDockerServiceStatus(row?: { Health?: string; State?: string }) { + const health = row?.Health?.trim(); + if (health) { + return health; + } + const state = row?.State?.trim(); + if (state) { + return state; + } + return "unknown"; +} + +function parseDockerComposePsRows(stdout: string) { + const trimmed = stdout.trim(); + if (!trimmed) { + return [] as Array<{ Health?: string; State?: string }>; + } + + try { + const parsed = JSON.parse(trimmed) as + | Array<{ Health?: string; State?: string }> + | { Health?: string; State?: string }; + if (Array.isArray(parsed)) { + return parsed; + } + return [parsed]; + } catch { + return trimmed + .split("\n") + .map((line) => line.trim()) + .filter(Boolean) + .map((line) => JSON.parse(line) as { Health?: string; State?: string }); + } +} + +export async function waitForDockerServiceHealth( + service: string, + composeFile: string, + repoRoot: string, + runCommand: RunCommand, + sleepImpl: (ms: number) => Promise, + timeoutMs = 360_000, + pollMs = 1_000, +) { + const startMs = Date.now(); + const deadline = startMs + timeoutMs; + let lastStatus = "unknown"; + + while (Date.now() < deadline) { + try { + const { stdout } = await runCommand( + "docker", + ["compose", "-f", composeFile, "ps", "--format", "json", service], + repoRoot, + ); + const rows = parseDockerComposePsRows(stdout); + const row = rows[0]; + lastStatus = normalizeDockerServiceStatus(row); + if (lastStatus === "healthy" || lastStatus === "running") { + return; + } + } catch (error) { + lastStatus = describeError(error); + } + await sleepImpl(pollMs); + } + + const elapsedSec = Math.round((Date.now() - startMs) / 1000); + throw new Error( + [ + `${service} did not become healthy within ${elapsedSec}s (limit ${Math.round(timeoutMs / 1000)}s).`, + `Last status: ${lastStatus}`, + `Hint: check container logs with \`docker compose -f ${composeFile} logs ${service}\`.`, + ].join("\n"), + ); +} + +export async function resolveComposeServiceUrl( + service: string, + port: number, + composeFile: string, + repoRoot: string, + runCommand: RunCommand, + fetchImpl?: FetchLike, +) { + const { stdout: containerStdout } = await runCommand( + "docker", + ["compose", "-f", composeFile, "ps", "-q", service], + repoRoot, + ); + const containerId = containerStdout.trim(); + if (!containerId) { + return null; + } + const { stdout: ipStdout } = await runCommand( + "docker", + [ + "inspect", + "--format", + "{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}", + containerId, + ], + repoRoot, + ); + const ip = ipStdout.trim(); + if (!ip) { + return null; + } + const baseUrl = `http://${ip}:${port}/`; + if (!fetchImpl) { + return baseUrl; + } + return (await isHealthy(`${baseUrl}healthz`, fetchImpl)) ? baseUrl : null; +} + +export const __testing = { + fetchHealthUrl, + normalizeDockerServiceStatus, +}; diff --git a/extensions/qa-matrix/src/report.ts b/extensions/qa-matrix/src/report.ts new file mode 100644 index 00000000000..f0d9b8c0704 --- /dev/null +++ b/extensions/qa-matrix/src/report.ts @@ -0,0 +1,100 @@ +export type QaReportCheck = { + name: string; + status: "pass" | "fail" | "skip"; + details?: string; +}; + +export type QaReportScenario = { + name: string; + status: "pass" | "fail" | "skip"; + details?: string; + steps?: QaReportCheck[]; +}; + +function pushDetailsBlock(lines: string[], label: string, details: string, indent = "") { + if (!details.includes("\n")) { + lines.push(`${indent}- ${label}: ${details}`); + return; + } + lines.push(`${indent}- ${label}:`); + lines.push("", "```text", details, "```"); +} + +export function renderQaMarkdownReport(params: { + title: string; + startedAt: Date; + finishedAt: Date; + checks?: QaReportCheck[]; + scenarios?: QaReportScenario[]; + timeline?: string[]; + notes?: string[]; +}) { + const checks = params.checks ?? []; + const scenarios = params.scenarios ?? []; + const passCount = + checks.filter((check) => check.status === "pass").length + + scenarios.filter((scenario) => scenario.status === "pass").length; + const failCount = + checks.filter((check) => check.status === "fail").length + + scenarios.filter((scenario) => scenario.status === "fail").length; + + const lines = [ + `# ${params.title}`, + "", + `- Started: ${params.startedAt.toISOString()}`, + `- Finished: ${params.finishedAt.toISOString()}`, + `- Duration ms: ${params.finishedAt.getTime() - params.startedAt.getTime()}`, + `- Passed: ${passCount}`, + `- Failed: ${failCount}`, + "", + ]; + + if (checks.length > 0) { + lines.push("## Checks", ""); + for (const check of checks) { + lines.push(`- [${check.status === "pass" ? "x" : " "}] ${check.name}`); + if (check.details) { + pushDetailsBlock(lines, "Details", check.details, " "); + } + } + } + + if (scenarios.length > 0) { + lines.push("", "## Scenarios", ""); + for (const scenario of scenarios) { + lines.push(`### ${scenario.name}`); + lines.push(""); + lines.push(`- Status: ${scenario.status}`); + if (scenario.details) { + pushDetailsBlock(lines, "Details", scenario.details); + } + if (scenario.steps?.length) { + lines.push("- Steps:"); + for (const step of scenario.steps) { + lines.push(` - [${step.status === "pass" ? "x" : " "}] ${step.name}`); + if (step.details) { + pushDetailsBlock(lines, "Details", step.details, " "); + } + } + } + lines.push(""); + } + } + + if (params.timeline && params.timeline.length > 0) { + lines.push("## Timeline", ""); + for (const item of params.timeline) { + lines.push(`- ${item}`); + } + } + + if (params.notes && params.notes.length > 0) { + lines.push("", "## Notes", ""); + for (const note of params.notes) { + lines.push(`- ${note}`); + } + } + + lines.push(""); + return lines.join("\n"); +} diff --git a/extensions/qa-matrix/src/run-config.ts b/extensions/qa-matrix/src/run-config.ts new file mode 100644 index 00000000000..1a05f3793aa --- /dev/null +++ b/extensions/qa-matrix/src/run-config.ts @@ -0,0 +1,31 @@ +export type QaProviderMode = "mock-openai" | "live-frontier"; +export type QaProviderModeInput = QaProviderMode | "live-openai"; + +const DEFAULT_QA_MODELS = { + "live-frontier": { + primary: "openai/gpt-5.4", + alternate: "anthropic/claude-sonnet-4-6", + }, + "mock-openai": { + primary: "mock-openai/gpt-5.4", + alternate: "mock-openai/gpt-5.4-alt", + }, +} as const satisfies Record< + QaProviderMode, + { + primary: string; + alternate: string; + } +>; + +export function normalizeQaProviderMode(input: unknown): QaProviderMode { + if (input === "mock-openai") { + return "mock-openai"; + } + return "live-frontier"; +} + +export function defaultQaModelForMode(mode: QaProviderMode, alternate = false) { + const preset = DEFAULT_QA_MODELS[normalizeQaProviderMode(mode)]; + return alternate ? preset.alternate : preset.primary; +} diff --git a/extensions/qa-lab/src/live-transports/matrix/matrix-live.runtime.test.ts b/extensions/qa-matrix/src/runners/contract/runtime.test.ts similarity index 99% rename from extensions/qa-lab/src/live-transports/matrix/matrix-live.runtime.test.ts rename to extensions/qa-matrix/src/runners/contract/runtime.test.ts index 061e552f8b3..f7e68d47809 100644 --- a/extensions/qa-lab/src/live-transports/matrix/matrix-live.runtime.test.ts +++ b/extensions/qa-matrix/src/runners/contract/runtime.test.ts @@ -1,6 +1,6 @@ import type { OpenClawConfig } from "openclaw/plugin-sdk/config-runtime"; import { afterEach, describe, expect, it, vi } from "vitest"; -import { __testing as liveTesting } from "./matrix-live.runtime.js"; +import { __testing as liveTesting } from "./runtime.js"; afterEach(() => { vi.useRealTimers(); diff --git a/extensions/qa-lab/src/live-transports/matrix/matrix-live.runtime.ts b/extensions/qa-matrix/src/runners/contract/runtime.ts similarity index 91% rename from extensions/qa-lab/src/live-transports/matrix/matrix-live.runtime.ts rename to extensions/qa-matrix/src/runners/contract/runtime.ts index 0b480c6ac66..87904fd6f1e 100644 --- a/extensions/qa-lab/src/live-transports/matrix/matrix-live.runtime.ts +++ b/extensions/qa-matrix/src/runners/contract/runtime.ts @@ -4,7 +4,7 @@ import path from "node:path"; import { setTimeout as sleep } from "node:timers/promises"; import type { OpenClawConfig } from "openclaw/plugin-sdk/config-runtime"; import { formatErrorMessage } from "openclaw/plugin-sdk/error-runtime"; -import { startQaGatewayChild } from "../../gateway-child.js"; +import { loadQaLabRuntimeModule } from "openclaw/plugin-sdk/qa-lab-runtime"; import type { QaReportCheck } from "../../report.js"; import { renderQaMarkdownReport } from "../../report.js"; import { @@ -12,14 +12,16 @@ import { normalizeQaProviderMode, type QaProviderModeInput, } from "../../run-config.js"; -import { startQaLiveLaneGateway } from "../shared/live-gateway.runtime.js"; -import { appendLiveLaneIssue, buildLiveLaneArtifactsError } from "../shared/live-lane-helpers.js"; +import { + appendLiveLaneIssue, + buildLiveLaneArtifactsError, +} from "../../shared/live-lane-helpers.js"; import { provisionMatrixQaRoom, type MatrixQaObservedEvent, type MatrixQaProvisionResult, -} from "./matrix-driver-client.js"; -import { startMatrixQaHarness } from "./matrix-harness.runtime.js"; +} from "../../substrate/client.js"; +import { startMatrixQaHarness } from "../../substrate/harness.runtime.js"; import { MATRIX_QA_SCENARIOS, buildMatrixReplyDetails, @@ -28,7 +30,21 @@ import { runMatrixQaScenario, type MatrixQaCanaryArtifact, type MatrixQaScenarioArtifacts, -} from "./matrix-live-scenarios.js"; +} from "./scenarios.js"; + +type MatrixQaGatewayChild = { + call( + method: string, + params: Record, + options?: { timeoutMs?: number }, + ): Promise; + restart(): Promise; +}; + +type MatrixQaLiveLaneGatewayHarness = { + gateway: MatrixQaGatewayChild; + stop(): Promise; +}; type MatrixQaScenarioResult = { artifacts?: MatrixQaScenarioArtifacts; @@ -214,7 +230,7 @@ function isMatrixAccountReady(entry?: { } async function waitForMatrixChannelReady( - gateway: Awaited>, + gateway: MatrixQaGatewayChild, accountId: string, opts?: { pollMs?: number; @@ -255,6 +271,27 @@ async function waitForMatrixChannelReady( throw new Error(`matrix account "${accountId}" did not become ready`); } +async function startMatrixQaLiveLaneGateway(params: { + repoRoot: string; + transport: { + requiredPluginIds: readonly string[]; + createGatewayConfig: (params: { + baseUrl: string; + }) => Pick; + }; + transportBaseUrl: string; + providerMode: "mock-openai" | "live-frontier"; + primaryModel: string; + alternateModel: string; + fastMode?: boolean; + controlUiEnabled?: boolean; + mutateConfig?: (cfg: OpenClawConfig) => OpenClawConfig; +}): Promise { + return (await loadQaLabRuntimeModule().startQaLiveLaneGateway( + params, + )) as MatrixQaLiveLaneGatewayHarness; +} + export async function runMatrixQaLive(params: { fastMode?: boolean; outputDir?: string; @@ -317,12 +354,12 @@ export async function runMatrixQaLive(params: { const scenarioResults: MatrixQaScenarioResult[] = []; const cleanupErrors: string[] = []; let canaryArtifact: MatrixQaCanaryArtifact | undefined; - let gatewayHarness: Awaited> | null = null; + let gatewayHarness: MatrixQaLiveLaneGatewayHarness | null = null; let canaryFailed = false; const syncState: { driver?: string; observer?: string } = {}; try { - gatewayHarness = await startQaLiveLaneGateway({ + gatewayHarness = await startMatrixQaLiveLaneGateway({ repoRoot, transport: { requiredPluginIds: [], diff --git a/extensions/qa-lab/src/live-transports/matrix/matrix-live-scenarios.test.ts b/extensions/qa-matrix/src/runners/contract/scenarios.test.ts similarity index 97% rename from extensions/qa-lab/src/live-transports/matrix/matrix-live-scenarios.test.ts rename to extensions/qa-matrix/src/runners/contract/scenarios.test.ts index 27ae4fab677..41f670957bb 100644 --- a/extensions/qa-lab/src/live-transports/matrix/matrix-live-scenarios.test.ts +++ b/extensions/qa-matrix/src/runners/contract/scenarios.test.ts @@ -3,19 +3,19 @@ const { createMatrixQaClient } = vi.hoisted(() => ({ createMatrixQaClient: vi.fn(), })); -vi.mock("./matrix-driver-client.js", () => ({ +vi.mock("../../substrate/client.js", () => ({ createMatrixQaClient, })); import { LIVE_TRANSPORT_BASELINE_STANDARD_SCENARIO_IDS, findMissingLiveTransportStandardScenarios, -} from "../shared/live-transport-scenarios.js"; +} from "../../shared/live-transport-scenarios.js"; import { __testing as scenarioTesting, MATRIX_QA_SCENARIOS, runMatrixQaScenario, -} from "./matrix-live-scenarios.js"; +} from "./scenarios.js"; describe("matrix live qa scenarios", () => { beforeEach(() => { diff --git a/extensions/qa-lab/src/live-transports/matrix/matrix-live-scenarios.ts b/extensions/qa-matrix/src/runners/contract/scenarios.ts similarity index 99% rename from extensions/qa-lab/src/live-transports/matrix/matrix-live-scenarios.ts rename to extensions/qa-matrix/src/runners/contract/scenarios.ts index fe4979e9eeb..7c8638d845c 100644 --- a/extensions/qa-lab/src/live-transports/matrix/matrix-live-scenarios.ts +++ b/extensions/qa-matrix/src/runners/contract/scenarios.ts @@ -3,8 +3,8 @@ import { collectLiveTransportStandardScenarioCoverage, selectLiveTransportScenarios, type LiveTransportScenarioDefinition, -} from "../shared/live-transport-scenarios.js"; -import { createMatrixQaClient, type MatrixQaObservedEvent } from "./matrix-driver-client.js"; +} from "../../shared/live-transport-scenarios.js"; +import { createMatrixQaClient, type MatrixQaObservedEvent } from "../../substrate/client.js"; export type MatrixQaScenarioId = | "matrix-thread-follow-up" diff --git a/extensions/qa-matrix/src/shared/live-lane-helpers.ts b/extensions/qa-matrix/src/shared/live-lane-helpers.ts new file mode 100644 index 00000000000..cf65146123e --- /dev/null +++ b/extensions/qa-matrix/src/shared/live-lane-helpers.ts @@ -0,0 +1,18 @@ +import { formatErrorMessage } from "openclaw/plugin-sdk/error-runtime"; + +export function appendLiveLaneIssue(issues: string[], label: string, error: unknown) { + issues.push(`${label}: ${formatErrorMessage(error)}`); +} + +export function buildLiveLaneArtifactsError(params: { + heading: string; + artifacts: Record; + details?: string[]; +}) { + return [ + params.heading, + ...(params.details ?? []), + "Artifacts:", + ...Object.entries(params.artifacts).map(([label, filePath]) => `- ${label}: ${filePath}`), + ].join("\n"); +} diff --git a/extensions/qa-matrix/src/shared/live-transport-cli.runtime.ts b/extensions/qa-matrix/src/shared/live-transport-cli.runtime.ts new file mode 100644 index 00000000000..b840b2d6712 --- /dev/null +++ b/extensions/qa-matrix/src/shared/live-transport-cli.runtime.ts @@ -0,0 +1,40 @@ +import path from "node:path"; +import { resolveRepoRelativeOutputDir } from "../cli-paths.js"; +import type { QaProviderMode } from "../run-config.js"; +import { normalizeQaProviderMode } from "../run-config.js"; +import type { LiveTransportQaCommandOptions } from "./live-transport-cli.js"; + +export function resolveLiveTransportQaRunOptions( + opts: LiveTransportQaCommandOptions, +): LiveTransportQaCommandOptions & { + repoRoot: string; + providerMode: QaProviderMode; +} { + return { + repoRoot: path.resolve(opts.repoRoot ?? process.cwd()), + outputDir: resolveRepoRelativeOutputDir( + path.resolve(opts.repoRoot ?? process.cwd()), + opts.outputDir, + ), + providerMode: + opts.providerMode === undefined + ? "live-frontier" + : normalizeQaProviderMode(opts.providerMode), + primaryModel: opts.primaryModel, + alternateModel: opts.alternateModel, + fastMode: opts.fastMode, + scenarioIds: opts.scenarioIds, + sutAccountId: opts.sutAccountId, + credentialSource: opts.credentialSource?.trim(), + credentialRole: opts.credentialRole?.trim(), + }; +} + +export function printLiveTransportQaArtifacts( + laneLabel: string, + artifacts: Record, +) { + for (const [label, filePath] of Object.entries(artifacts)) { + process.stdout.write(`${laneLabel} ${label}: ${filePath}\n`); + } +} diff --git a/extensions/qa-matrix/src/shared/live-transport-cli.ts b/extensions/qa-matrix/src/shared/live-transport-cli.ts new file mode 100644 index 00000000000..f8a91a7ee25 --- /dev/null +++ b/extensions/qa-matrix/src/shared/live-transport-cli.ts @@ -0,0 +1,120 @@ +import type { Command } from "commander"; +import { collectString } from "../cli-options.js"; +import type { QaProviderModeInput } from "../run-config.js"; + +export type LiveTransportQaCommandOptions = { + repoRoot?: string; + outputDir?: string; + providerMode?: QaProviderModeInput; + primaryModel?: string; + alternateModel?: string; + fastMode?: boolean; + scenarioIds?: string[]; + sutAccountId?: string; + credentialSource?: string; + credentialRole?: string; +}; + +type LiveTransportQaCommanderOptions = { + repoRoot?: string; + outputDir?: string; + providerMode?: QaProviderModeInput; + model?: string; + altModel?: string; + scenario?: string[]; + fast?: boolean; + sutAccount?: string; + credentialSource?: string; + credentialRole?: string; +}; + +export type LiveTransportQaCliRegistration = { + commandName: string; + register(qa: Command): void; +}; + +export function createLazyCliRuntimeLoader(load: () => Promise) { + let promise: Promise | null = null; + return async () => { + promise ??= load(); + return await promise; + }; +} + +export function mapLiveTransportQaCommanderOptions( + opts: LiveTransportQaCommanderOptions, +): LiveTransportQaCommandOptions { + return { + repoRoot: opts.repoRoot, + outputDir: opts.outputDir, + providerMode: opts.providerMode, + primaryModel: opts.model, + alternateModel: opts.altModel, + fastMode: opts.fast, + scenarioIds: opts.scenario, + sutAccountId: opts.sutAccount, + credentialSource: opts.credentialSource, + credentialRole: opts.credentialRole, + }; +} + +export function registerLiveTransportQaCli(params: { + qa: Command; + commandName: string; + description: string; + outputDirHelp: string; + scenarioHelp: string; + sutAccountHelp: string; + run: (opts: LiveTransportQaCommandOptions) => Promise; +}) { + params.qa + .command(params.commandName) + .description(params.description) + .option("--repo-root ", "Repository root to target when running from a neutral cwd") + .option("--output-dir ", params.outputDirHelp) + .option( + "--provider-mode ", + "Provider mode: mock-openai or live-frontier (legacy live-openai still works)", + "live-frontier", + ) + .option("--model ", "Primary provider/model ref") + .option("--alt-model ", "Alternate provider/model ref") + .option("--scenario ", params.scenarioHelp, collectString, []) + .option("--fast", "Enable provider fast mode where supported", false) + .option("--sut-account ", params.sutAccountHelp, "sut") + .option( + "--credential-source ", + "Credential source for live lanes: env or convex (default: env)", + ) + .option( + "--credential-role ", + "Credential role for convex auth: maintainer or ci (default: maintainer)", + ) + .action(async (opts: LiveTransportQaCommanderOptions) => { + await params.run(mapLiveTransportQaCommanderOptions(opts)); + }); +} + +export function createLiveTransportQaCliRegistration(params: { + commandName: string; + description: string; + outputDirHelp: string; + scenarioHelp: string; + sutAccountHelp: string; + run: (opts: LiveTransportQaCommandOptions) => Promise; +}): LiveTransportQaCliRegistration { + return { + commandName: params.commandName, + register(qa: Command) { + registerLiveTransportQaCli({ + qa, + commandName: params.commandName, + description: params.description, + outputDirHelp: params.outputDirHelp, + scenarioHelp: params.scenarioHelp, + sutAccountHelp: params.sutAccountHelp, + run: params.run, + }); + }, + }; +} diff --git a/extensions/qa-matrix/src/shared/live-transport-scenarios.ts b/extensions/qa-matrix/src/shared/live-transport-scenarios.ts new file mode 100644 index 00000000000..535bcc3de53 --- /dev/null +++ b/extensions/qa-matrix/src/shared/live-transport-scenarios.ts @@ -0,0 +1,149 @@ +export type LiveTransportStandardScenarioId = + | "canary" + | "mention-gating" + | "allowlist-block" + | "top-level-reply-shape" + | "restart-resume" + | "thread-follow-up" + | "thread-isolation" + | "reaction-observation" + | "help-command"; + +export type LiveTransportScenarioDefinition = { + id: TId; + standardId?: LiveTransportStandardScenarioId; + timeoutMs: number; + title: string; +}; + +export type LiveTransportStandardScenarioDefinition = { + description: string; + id: LiveTransportStandardScenarioId; + title: string; +}; + +export const LIVE_TRANSPORT_STANDARD_SCENARIOS: readonly LiveTransportStandardScenarioDefinition[] = + [ + { + id: "canary", + title: "Transport canary", + description: "The lane can trigger one known-good reply on the real transport.", + }, + { + id: "mention-gating", + title: "Mention gating", + description: "Messages without the required mention do not trigger a reply.", + }, + { + id: "allowlist-block", + title: "Sender allowlist block", + description: "Non-allowlisted senders do not trigger a reply.", + }, + { + id: "top-level-reply-shape", + title: "Top-level reply shape", + description: "Top-level replies stay top-level when the lane is configured that way.", + }, + { + id: "restart-resume", + title: "Restart resume", + description: "The lane still responds after a gateway restart.", + }, + { + id: "thread-follow-up", + title: "Thread follow-up", + description: "Threaded prompts receive threaded replies with the expected relation metadata.", + }, + { + id: "thread-isolation", + title: "Thread isolation", + description: "Fresh top-level prompts stay out of prior threads.", + }, + { + id: "reaction-observation", + title: "Reaction observation", + description: "Reaction events are observed and normalized correctly.", + }, + { + id: "help-command", + title: "Help command", + description: "The transport-specific help command path replies successfully.", + }, + ] as const; + +export const LIVE_TRANSPORT_BASELINE_STANDARD_SCENARIO_IDS: readonly LiveTransportStandardScenarioId[] = + [ + "canary", + "mention-gating", + "allowlist-block", + "top-level-reply-shape", + "restart-resume", + ] as const; + +const LIVE_TRANSPORT_STANDARD_SCENARIO_ID_SET = new Set( + LIVE_TRANSPORT_STANDARD_SCENARIOS.map((scenario) => scenario.id), +); + +function assertKnownStandardScenarioIds(ids: readonly LiveTransportStandardScenarioId[]) { + for (const id of ids) { + if (!LIVE_TRANSPORT_STANDARD_SCENARIO_ID_SET.has(id)) { + throw new Error(`unknown live transport standard scenario id: ${id}`); + } + } +} + +export function selectLiveTransportScenarios(params: { + ids?: string[]; + laneLabel: string; + scenarios: readonly TDefinition[]; +}) { + if (!params.ids || params.ids.length === 0) { + return [...params.scenarios]; + } + const requested = new Set(params.ids); + const selected = params.scenarios.filter((scenario) => params.ids?.includes(scenario.id)); + const missingIds = [...requested].filter( + (id) => !selected.some((scenario) => scenario.id === id), + ); + if (missingIds.length > 0) { + throw new Error(`unknown ${params.laneLabel} QA scenario id(s): ${missingIds.join(", ")}`); + } + return selected; +} + +export function collectLiveTransportStandardScenarioCoverage(params: { + alwaysOnStandardScenarioIds?: readonly LiveTransportStandardScenarioId[]; + scenarios: readonly LiveTransportScenarioDefinition[]; +}) { + const coverage: LiveTransportStandardScenarioId[] = []; + const seen = new Set(); + const append = (id: LiveTransportStandardScenarioId | undefined) => { + if (!id || seen.has(id)) { + return; + } + seen.add(id); + coverage.push(id); + }; + + assertKnownStandardScenarioIds(params.alwaysOnStandardScenarioIds ?? []); + for (const id of params.alwaysOnStandardScenarioIds ?? []) { + append(id); + } + for (const scenario of params.scenarios) { + if (scenario.standardId) { + assertKnownStandardScenarioIds([scenario.standardId]); + } + append(scenario.standardId); + } + return coverage; +} + +export function findMissingLiveTransportStandardScenarios(params: { + coveredStandardScenarioIds: readonly LiveTransportStandardScenarioId[]; + expectedStandardScenarioIds: readonly LiveTransportStandardScenarioId[]; +}) { + assertKnownStandardScenarioIds(params.coveredStandardScenarioIds); + assertKnownStandardScenarioIds(params.expectedStandardScenarioIds); + const covered = new Set(params.coveredStandardScenarioIds); + return params.expectedStandardScenarioIds.filter((id) => !covered.has(id)); +} diff --git a/extensions/qa-lab/src/live-transports/matrix/matrix-driver-client.test.ts b/extensions/qa-matrix/src/substrate/client.test.ts similarity index 99% rename from extensions/qa-lab/src/live-transports/matrix/matrix-driver-client.test.ts rename to extensions/qa-matrix/src/substrate/client.test.ts index 59a3e4ae97c..b3152da8b4e 100644 --- a/extensions/qa-lab/src/live-transports/matrix/matrix-driver-client.test.ts +++ b/extensions/qa-matrix/src/substrate/client.test.ts @@ -4,7 +4,7 @@ import { createMatrixQaClient, provisionMatrixQaRoom, type MatrixQaObservedEvent, -} from "./matrix-driver-client.js"; +} from "./client.js"; function resolveRequestUrl(input: RequestInfo | URL) { if (typeof input === "string") { diff --git a/extensions/qa-lab/src/live-transports/matrix/matrix-driver-client.ts b/extensions/qa-matrix/src/substrate/client.ts similarity index 100% rename from extensions/qa-lab/src/live-transports/matrix/matrix-driver-client.ts rename to extensions/qa-matrix/src/substrate/client.ts diff --git a/extensions/qa-lab/src/live-transports/matrix/matrix-harness.runtime.test.ts b/extensions/qa-matrix/src/substrate/harness.runtime.test.ts similarity index 98% rename from extensions/qa-lab/src/live-transports/matrix/matrix-harness.runtime.test.ts rename to extensions/qa-matrix/src/substrate/harness.runtime.test.ts index e1451fe2b39..1a907f63872 100644 --- a/extensions/qa-lab/src/live-transports/matrix/matrix-harness.runtime.test.ts +++ b/extensions/qa-matrix/src/substrate/harness.runtime.test.ts @@ -2,11 +2,7 @@ import { mkdtemp, readFile, rm } from "node:fs/promises"; import os from "node:os"; import path from "node:path"; import { describe, expect, it, vi } from "vitest"; -import { - __testing, - startMatrixQaHarness, - writeMatrixQaHarnessFiles, -} from "./matrix-harness.runtime.js"; +import { __testing, startMatrixQaHarness, writeMatrixQaHarnessFiles } from "./harness.runtime.js"; describe("matrix harness runtime", () => { it("writes a pinned Tuwunel compose file and redacted manifest", async () => { diff --git a/extensions/qa-lab/src/live-transports/matrix/matrix-harness.runtime.ts b/extensions/qa-matrix/src/substrate/harness.runtime.ts similarity index 99% rename from extensions/qa-lab/src/live-transports/matrix/matrix-harness.runtime.ts rename to extensions/qa-matrix/src/substrate/harness.runtime.ts index d10b6df32b4..5b21e34d823 100644 --- a/extensions/qa-lab/src/live-transports/matrix/matrix-harness.runtime.ts +++ b/extensions/qa-matrix/src/substrate/harness.runtime.ts @@ -11,7 +11,7 @@ import { waitForHealth, type FetchLike, type RunCommand, -} from "../../docker-runtime.js"; +} from "../docker-runtime.js"; const MATRIX_QA_DEFAULT_IMAGE = "ghcr.io/matrix-construct/tuwunel:v1.5.1"; const MATRIX_QA_DEFAULT_SERVER_NAME = "matrix-qa.test"; diff --git a/package.json b/package.json index 6d141d8e48b..4b03af65077 100644 --- a/package.json +++ b/package.json @@ -31,8 +31,7 @@ "!dist/plugin-sdk/.tsbuildinfo", "!dist/extensions/qa-channel/**", "dist/extensions/qa-channel/runtime-api.js", - "!dist/extensions/qa-lab/**", - "dist/extensions/qa-lab/runtime-api.js", + "!dist/extensions/qa-matrix/**", "docs/", "!docs/.generated/**", "!docs/.i18n/zh-CN.tm.jsonl", @@ -766,6 +765,14 @@ "types": "./dist/plugin-sdk/matrix-thread-bindings.d.ts", "default": "./dist/plugin-sdk/matrix-thread-bindings.js" }, + "./plugin-sdk/qa-lab-runtime": { + "types": "./dist/plugin-sdk/qa-lab-runtime.d.ts", + "default": "./dist/plugin-sdk/qa-lab-runtime.js" + }, + "./plugin-sdk/qa-matrix": { + "types": "./dist/plugin-sdk/qa-matrix.d.ts", + "default": "./dist/plugin-sdk/qa-matrix.js" + }, "./plugin-sdk/mattermost": { "types": "./dist/plugin-sdk/mattermost.d.ts", "default": "./dist/plugin-sdk/mattermost.js" diff --git a/scripts/lib/bundled-plugin-build-entries.mjs b/scripts/lib/bundled-plugin-build-entries.mjs index ded906f4a33..5d931b88954 100644 --- a/scripts/lib/bundled-plugin-build-entries.mjs +++ b/scripts/lib/bundled-plugin-build-entries.mjs @@ -8,7 +8,7 @@ import { import { shouldBuildBundledCluster } from "./optional-bundled-clusters.mjs"; const TOP_LEVEL_PUBLIC_SURFACE_EXTENSIONS = new Set([".ts", ".js", ".mts", ".cts", ".mjs", ".cjs"]); -const NON_PACKAGED_BUNDLED_PLUGIN_DIRS = new Set(["qa-channel", "qa-lab"]); +const NON_PACKAGED_BUNDLED_PLUGIN_DIRS = new Set(["qa-channel", "qa-matrix"]); const toPosixPath = (value) => value.replaceAll("\\", "/"); function readBundledPluginPackageJson(packageJsonPath) { diff --git a/scripts/lib/bundled-runtime-sidecar-paths.json b/scripts/lib/bundled-runtime-sidecar-paths.json index 9d4e3264a5a..be9a497ad8e 100644 --- a/scripts/lib/bundled-runtime-sidecar-paths.json +++ b/scripts/lib/bundled-runtime-sidecar-paths.json @@ -23,6 +23,7 @@ "dist/extensions/nostr/runtime-api.js", "dist/extensions/ollama/runtime-api.js", "dist/extensions/open-prose/runtime-api.js", + "dist/extensions/qa-lab/runtime-api.js", "dist/extensions/qqbot/runtime-api.js", "dist/extensions/signal/runtime-api.js", "dist/extensions/slack/runtime-api.js", diff --git a/scripts/lib/npm-update-compat-sidecars.mjs b/scripts/lib/npm-update-compat-sidecars.mjs index 228b120dec3..66211821b77 100644 --- a/scripts/lib/npm-update-compat-sidecars.mjs +++ b/scripts/lib/npm-update-compat-sidecars.mjs @@ -4,11 +4,6 @@ export const NPM_UPDATE_COMPAT_SIDECARS = [ content: "// Compatibility stub for older OpenClaw updaters. The QA channel implementation is not packaged.\nexport {};\n", }, - { - path: "dist/extensions/qa-lab/runtime-api.js", - content: - "// Compatibility stub for older OpenClaw updaters. The QA lab implementation is not packaged.\nexport {};\n", - }, ]; export const NPM_UPDATE_COMPAT_SIDECAR_PATHS = new Set( diff --git a/scripts/lib/plugin-sdk-entrypoints.json b/scripts/lib/plugin-sdk-entrypoints.json index b55448ef5f2..c37027265f0 100644 --- a/scripts/lib/plugin-sdk-entrypoints.json +++ b/scripts/lib/plugin-sdk-entrypoints.json @@ -179,6 +179,8 @@ "matrix-runtime-surface", "matrix-surface", "matrix-thread-bindings", + "qa-lab-runtime", + "qa-matrix", "mattermost", "mattermost-policy", "memory-core", diff --git a/src/plugin-sdk/qa-lab-runtime.ts b/src/plugin-sdk/qa-lab-runtime.ts new file mode 100644 index 00000000000..93dc7c51834 --- /dev/null +++ b/src/plugin-sdk/qa-lab-runtime.ts @@ -0,0 +1,32 @@ +import { loadBundledPluginPublicSurfaceModuleSync } from "./facade-runtime.js"; + +type QaLabRuntimeSurface = { + startQaLiveLaneGateway: (...args: unknown[]) => Promise; +}; + +function isMissingQaLabRuntimeError(error: unknown) { + return ( + error instanceof Error && + (error.message === "Unable to resolve bundled plugin public surface qa-lab/runtime-api.js" || + error.message.startsWith("Unable to open bundled plugin public surface ")) + ); +} + +export function loadQaLabRuntimeModule(): QaLabRuntimeSurface { + return loadBundledPluginPublicSurfaceModuleSync({ + dirName: "qa-lab", + artifactBasename: "runtime-api.js", + }); +} + +export function isQaLabRuntimeAvailable(): boolean { + try { + loadQaLabRuntimeModule(); + return true; + } catch (error) { + if (isMissingQaLabRuntimeError(error)) { + return false; + } + throw error; + } +} diff --git a/src/plugin-sdk/qa-matrix.test.ts b/src/plugin-sdk/qa-matrix.test.ts new file mode 100644 index 00000000000..7bca0e90423 --- /dev/null +++ b/src/plugin-sdk/qa-matrix.test.ts @@ -0,0 +1,48 @@ +import { beforeEach, describe, expect, it, vi } from "vitest"; + +const loadBundledPluginPublicSurfaceModuleSync = vi.hoisted(() => vi.fn()); +const registerMatrixQaCliImpl = vi.hoisted(() => vi.fn()); + +vi.mock("./facade-runtime.js", async () => { + const actual = await vi.importActual("./facade-runtime.js"); + return { + ...actual, + loadBundledPluginPublicSurfaceModuleSync, + }; +}); + +describe("plugin-sdk qa-matrix", () => { + beforeEach(() => { + registerMatrixQaCliImpl.mockReset(); + loadBundledPluginPublicSurfaceModuleSync.mockReset().mockReturnValue({ + registerMatrixQaCli: registerMatrixQaCliImpl, + }); + }); + + it("keeps the qa-matrix facade cold until used", async () => { + const module = await import("./qa-matrix.js"); + + expect(loadBundledPluginPublicSurfaceModuleSync).not.toHaveBeenCalled(); + module.registerMatrixQaCli({} as never); + expect(loadBundledPluginPublicSurfaceModuleSync).toHaveBeenCalledWith({ + dirName: "qa-matrix", + artifactBasename: "cli.js", + }); + }); + + it("delegates matrix qa cli registration through the public surface", async () => { + const module = await import("./qa-matrix.js"); + + module.registerMatrixQaCli({} as never); + expect(registerMatrixQaCliImpl).toHaveBeenCalledWith({} as never); + }); + + it("reports qa-matrix unavailable when the public facade is missing", async () => { + loadBundledPluginPublicSurfaceModuleSync.mockImplementation(() => { + throw new Error("Unable to resolve bundled plugin public surface qa-matrix/cli.js"); + }); + const module = await import("./qa-matrix.js"); + + expect(module.isMatrixQaCliAvailable()).toBe(false); + }); +}); diff --git a/src/plugin-sdk/qa-matrix.ts b/src/plugin-sdk/qa-matrix.ts new file mode 100644 index 00000000000..aa7218fcdea --- /dev/null +++ b/src/plugin-sdk/qa-matrix.ts @@ -0,0 +1,36 @@ +import type { Command } from "commander"; +import { loadBundledPluginPublicSurfaceModuleSync } from "./facade-runtime.js"; + +type MatrixQaCliSurface = { + registerMatrixQaCli: (qa: Command) => void; +}; + +function isMissingMatrixQaFacadeError(error: unknown) { + return ( + error instanceof Error && + (error.message === "Unable to resolve bundled plugin public surface qa-matrix/cli.js" || + error.message.startsWith("Unable to open bundled plugin public surface ")) + ); +} + +function loadFacadeModule(): MatrixQaCliSurface { + return loadBundledPluginPublicSurfaceModuleSync({ + dirName: "qa-matrix", + artifactBasename: "cli.js", + }); +} + +export const registerMatrixQaCli: MatrixQaCliSurface["registerMatrixQaCli"] = ((...args) => + loadFacadeModule().registerMatrixQaCli(...args)) as MatrixQaCliSurface["registerMatrixQaCli"]; + +export function isMatrixQaCliAvailable(): boolean { + try { + loadFacadeModule(); + return true; + } catch (error) { + if (isMissingMatrixQaFacadeError(error)) { + return false; + } + throw error; + } +} diff --git a/src/plugins/bundled-plugin-metadata.test.ts b/src/plugins/bundled-plugin-metadata.test.ts index 939dac4c3d1..6a936ea0256 100644 --- a/src/plugins/bundled-plugin-metadata.test.ts +++ b/src/plugins/bundled-plugin-metadata.test.ts @@ -131,11 +131,12 @@ describe("bundled plugin metadata", () => { }, ); - it("excludes private QA sidecars from the packaged runtime sidecar baseline", () => { + it("excludes non-packaged QA sidecars from the packaged runtime sidecar baseline", () => { expect(BUNDLED_RUNTIME_SIDECAR_PATHS).not.toContain( "dist/extensions/qa-channel/runtime-api.js", ); - expect(BUNDLED_RUNTIME_SIDECAR_PATHS).not.toContain("dist/extensions/qa-lab/runtime-api.js"); + expect(BUNDLED_RUNTIME_SIDECAR_PATHS).not.toContain("dist/extensions/qa-matrix/runtime-api.js"); + expect(BUNDLED_RUNTIME_SIDECAR_PATHS).toContain("dist/extensions/qa-lab/runtime-api.js"); }); it("captures setup-entry metadata for bundled channel plugins", () => { diff --git a/src/plugins/runtime-sidecar-paths-baseline.ts b/src/plugins/runtime-sidecar-paths-baseline.ts index 57872afe90c..140fc027baf 100644 --- a/src/plugins/runtime-sidecar-paths-baseline.ts +++ b/src/plugins/runtime-sidecar-paths-baseline.ts @@ -2,7 +2,7 @@ import fs from "node:fs"; import path from "node:path"; import { listBundledPluginMetadata } from "./bundled-plugin-metadata.js"; -const NON_PACKAGED_RUNTIME_SIDECAR_PLUGIN_DIRS = new Set(["qa-channel", "qa-lab"]); +const NON_PACKAGED_RUNTIME_SIDECAR_PLUGIN_DIRS = new Set(["qa-channel", "qa-matrix"]); function buildBundledDistArtifactPath(dirName: string, artifact: string): string { return ["dist", "extensions", dirName, artifact].join("/");