refactor(qa): split Matrix QA into optional plugin (#66723)

Merged via squash.

Prepared head SHA: 27241bd089
Co-authored-by: gumadeiras <5599352+gumadeiras@users.noreply.github.com>
Co-authored-by: gumadeiras <5599352+gumadeiras@users.noreply.github.com>
Reviewed-by: @gumadeiras
This commit is contained in:
Gustavo Madeira Santana
2026-04-14 16:28:57 -04:00
committed by GitHub
parent 3425823dfb
commit 82a2db71e8
69 changed files with 2026 additions and 229 deletions

View File

@@ -1 +1,2 @@
export * from "./src/runtime-api.js";
export { startQaLiveLaneGateway } from "./src/live-transports/shared/live-gateway.runtime.js";

View File

@@ -8,7 +8,6 @@ const {
runQaSuiteFromRuntime,
runQaCharacterEval,
runQaMultipass,
runMatrixQaLive,
runTelegramQaLive,
startQaLabServer,
writeQaDockerHarnessFiles,
@@ -20,7 +19,6 @@ const {
runQaSuiteFromRuntime: vi.fn(),
runQaCharacterEval: vi.fn(),
runQaMultipass: vi.fn(),
runMatrixQaLive: vi.fn(),
runTelegramQaLive: vi.fn(),
startQaLabServer: vi.fn(),
writeQaDockerHarnessFiles: vi.fn(),
@@ -52,10 +50,6 @@ vi.mock("./multipass.runtime.js", () => ({
runQaMultipass,
}));
vi.mock("./live-transports/matrix/matrix-live.runtime.js", () => ({
runMatrixQaLive,
}));
vi.mock("./live-transports/telegram/telegram-live.runtime.js", () => ({
runTelegramQaLive,
}));
@@ -88,7 +82,6 @@ import {
runQaParityReportCommand,
runQaSuiteCommand,
} from "./cli.runtime.js";
import { runQaMatrixCommand } from "./live-transports/matrix/cli.runtime.js";
import { runQaTelegramCommand } from "./live-transports/telegram/cli.runtime.js";
describe("qa cli runtime", () => {
@@ -100,7 +93,6 @@ describe("qa cli runtime", () => {
runQaCharacterEval.mockReset();
runQaManualLane.mockReset();
runQaMultipass.mockReset();
runMatrixQaLive.mockReset();
runTelegramQaLive.mockReset();
startQaLabServer.mockReset();
writeQaDockerHarnessFiles.mockReset();
@@ -139,13 +131,6 @@ describe("qa cli runtime", () => {
vmName: "openclaw-qa-test",
scenarioIds: ["channel-chat-baseline"],
});
runMatrixQaLive.mockResolvedValue({
outputDir: "/tmp/matrix",
reportPath: "/tmp/matrix/report.md",
summaryPath: "/tmp/matrix/summary.json",
observedEventsPath: "/tmp/matrix/observed.json",
scenarios: [],
});
runTelegramQaLive.mockResolvedValue({
outputDir: "/tmp/telegram",
reportPath: "/tmp/telegram/report.md",
@@ -226,30 +211,6 @@ describe("qa cli runtime", () => {
});
});
it("resolves matrix qa repo-root-relative paths before dispatching", async () => {
await runQaMatrixCommand({
repoRoot: "/tmp/openclaw-repo",
outputDir: ".artifacts/qa/matrix",
providerMode: "live-frontier",
primaryModel: "openai/gpt-5.4",
alternateModel: "openai/gpt-5.4",
fastMode: true,
scenarioIds: ["matrix-thread-follow-up"],
sutAccountId: "sut-live",
});
expect(runMatrixQaLive).toHaveBeenCalledWith({
repoRoot: path.resolve("/tmp/openclaw-repo"),
outputDir: path.resolve("/tmp/openclaw-repo", ".artifacts/qa/matrix"),
providerMode: "live-frontier",
primaryModel: "openai/gpt-5.4",
alternateModel: "openai/gpt-5.4",
fastMode: true,
scenarioIds: ["matrix-thread-follow-up"],
sutAccountId: "sut-live",
});
});
it("rejects output dirs that escape the repo root", () => {
expect(() => resolveRepoRelativeOutputDir("/tmp/openclaw-repo", "../outside")).toThrow(
"--output-dir must stay within the repo root.",
@@ -273,20 +234,6 @@ describe("qa cli runtime", () => {
);
});
it("defaults matrix qa runs onto the live provider lane", async () => {
await runQaMatrixCommand({
repoRoot: "/tmp/openclaw-repo",
scenarioIds: ["matrix-thread-follow-up"],
});
expect(runMatrixQaLive).toHaveBeenCalledWith(
expect.objectContaining({
repoRoot: path.resolve("/tmp/openclaw-repo"),
providerMode: "live-frontier",
}),
);
});
it("normalizes legacy live-openai suite runs onto the frontier provider mode", async () => {
await runQaSuiteCommand({
repoRoot: "/tmp/openclaw-repo",

View File

@@ -1,22 +1,76 @@
import { Command } from "commander";
import type { QaRunnerCliContribution } from "openclaw/plugin-sdk/qa-runner-runtime";
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
const TEST_QA_RUNNER = {
pluginId: "qa-runner-test",
commandName: "runner-test",
description: "Run the test live QA lane",
npmSpec: "@openclaw/qa-runner-test",
} as const;
function createAvailableQaRunnerContribution() {
return {
pluginId: TEST_QA_RUNNER.pluginId,
commandName: TEST_QA_RUNNER.commandName,
status: "available" as const,
registration: {
commandName: TEST_QA_RUNNER.commandName,
register: vi.fn((qa: Command) => {
qa.command(TEST_QA_RUNNER.commandName).action(() => undefined);
}),
},
} satisfies QaRunnerCliContribution;
}
function createMissingQaRunnerContribution(): QaRunnerCliContribution {
return {
pluginId: TEST_QA_RUNNER.pluginId,
commandName: TEST_QA_RUNNER.commandName,
description: TEST_QA_RUNNER.description,
status: "missing",
npmSpec: TEST_QA_RUNNER.npmSpec,
};
}
function createBlockedQaRunnerContribution(): QaRunnerCliContribution {
return {
pluginId: TEST_QA_RUNNER.pluginId,
commandName: TEST_QA_RUNNER.commandName,
description: TEST_QA_RUNNER.description,
status: "blocked",
};
}
function createConflictingQaRunnerContribution(commandName: string): QaRunnerCliContribution {
return {
pluginId: TEST_QA_RUNNER.pluginId,
commandName,
description: TEST_QA_RUNNER.description,
status: "blocked",
};
}
const {
runQaCredentialsAddCommand,
runQaCredentialsListCommand,
runQaCredentialsRemoveCommand,
runQaMatrixCommand,
runQaTelegramCommand,
} = vi.hoisted(() => ({
runQaCredentialsAddCommand: vi.fn(),
runQaCredentialsListCommand: vi.fn(),
runQaCredentialsRemoveCommand: vi.fn(),
runQaMatrixCommand: vi.fn(),
runQaTelegramCommand: vi.fn(),
}));
vi.mock("./live-transports/matrix/cli.runtime.js", () => ({
runQaMatrixCommand,
const { listQaRunnerCliContributions } = vi.hoisted(() => ({
listQaRunnerCliContributions: vi.fn<() => QaRunnerCliContribution[]>(() => [
createAvailableQaRunnerContribution(),
]),
}));
vi.mock("openclaw/plugin-sdk/qa-runner-runtime", () => ({
listQaRunnerCliContributions,
}));
vi.mock("./live-transports/telegram/cli.runtime.js", () => ({
@@ -36,63 +90,71 @@ describe("qa cli registration", () => {
beforeEach(() => {
program = new Command();
registerQaLabCli(program);
runQaCredentialsAddCommand.mockReset();
runQaCredentialsListCommand.mockReset();
runQaCredentialsRemoveCommand.mockReset();
runQaMatrixCommand.mockReset();
runQaTelegramCommand.mockReset();
listQaRunnerCliContributions
.mockReset()
.mockReturnValue([createAvailableQaRunnerContribution()]);
registerQaLabCli(program);
});
afterEach(() => {
vi.clearAllMocks();
});
it("registers the matrix and telegram live transport subcommands", () => {
it("registers discovered and built-in live transport subcommands", () => {
const qa = program.commands.find((command) => command.name() === "qa");
expect(qa).toBeDefined();
expect(qa?.commands.map((command) => command.name())).toEqual(
expect.arrayContaining(["matrix", "telegram", "credentials"]),
expect.arrayContaining([TEST_QA_RUNNER.commandName, "telegram", "credentials"]),
);
});
it("routes matrix CLI flags into the lane runtime", async () => {
await program.parseAsync([
"node",
"openclaw",
"qa",
"matrix",
"--repo-root",
"/tmp/openclaw-repo",
"--output-dir",
".artifacts/qa/matrix",
"--provider-mode",
"mock-openai",
"--model",
"mock-openai/gpt-5.4",
"--alt-model",
"mock-openai/gpt-5.4-alt",
"--scenario",
"matrix-thread-follow-up",
"--scenario",
"matrix-thread-isolation",
"--fast",
"--sut-account",
"sut-live",
]);
it("delegates discovered qa runner registration through the generic host seam", () => {
const [{ registration }] = listQaRunnerCliContributions.mock.results[0]?.value;
expect(registration.register).toHaveBeenCalledTimes(1);
});
expect(runQaMatrixCommand).toHaveBeenCalledWith({
repoRoot: "/tmp/openclaw-repo",
outputDir: ".artifacts/qa/matrix",
providerMode: "mock-openai",
primaryModel: "mock-openai/gpt-5.4",
alternateModel: "mock-openai/gpt-5.4-alt",
fastMode: true,
scenarioIds: ["matrix-thread-follow-up", "matrix-thread-isolation"],
sutAccountId: "sut-live",
credentialSource: undefined,
credentialRole: undefined,
});
it("keeps Telegram credential flags on the shared host CLI", () => {
const qa = program.commands.find((command) => command.name() === "qa");
const telegram = qa?.commands.find((command) => command.name() === "telegram");
const optionNames = telegram?.options.map((option) => option.long) ?? [];
expect(optionNames).toEqual(
expect.arrayContaining(["--credential-source", "--credential-role"]),
);
});
it("shows an install hint when a discovered runner plugin is unavailable", async () => {
listQaRunnerCliContributions.mockReset().mockReturnValue([createMissingQaRunnerContribution()]);
const missingProgram = new Command();
registerQaLabCli(missingProgram);
await expect(
missingProgram.parseAsync(["node", "openclaw", "qa", TEST_QA_RUNNER.commandName]),
).rejects.toThrow(`openclaw plugins install ${TEST_QA_RUNNER.npmSpec}`);
});
it("shows an enable hint when a discovered runner plugin is installed but blocked", async () => {
listQaRunnerCliContributions.mockReset().mockReturnValue([createBlockedQaRunnerContribution()]);
const blockedProgram = new Command();
registerQaLabCli(blockedProgram);
await expect(
blockedProgram.parseAsync(["node", "openclaw", "qa", TEST_QA_RUNNER.commandName]),
).rejects.toThrow(`Enable or allow plugin "${TEST_QA_RUNNER.pluginId}"`);
});
it("rejects discovered runners that collide with built-in qa subcommands", () => {
listQaRunnerCliContributions
.mockReset()
.mockReturnValue([createConflictingQaRunnerContribution("manual")]);
expect(() => registerQaLabCli(new Command())).toThrow(
'QA runner command "manual" conflicts with an existing qa subcommand',
);
});
it("routes telegram CLI defaults into the lane runtime", async () => {

View File

@@ -1,6 +1,6 @@
import type { Command } from "commander";
import { collectString } from "./cli-options.js";
import { LIVE_TRANSPORT_QA_CLI_REGISTRATIONS } from "./live-transports/cli.js";
import { listLiveTransportQaCliRegistrations } from "./live-transports/cli.js";
import type { QaProviderModeInput } from "./run-config.js";
import { hasQaScenarioPack } from "./scenario-catalog.js";
@@ -183,6 +183,12 @@ export function isQaLabCliAvailable(): boolean {
return hasQaScenarioPack();
}
function assertNoQaSubcommandCollision(qa: Command, commandName: string) {
if (qa.commands.some((command) => command.name() === commandName)) {
throw new Error(`QA runner command "${commandName}" conflicts with an existing qa subcommand`);
}
}
export function registerQaLabCli(program: Command) {
const qa = program
.command("qa")
@@ -284,10 +290,6 @@ export function registerQaLabCli(program: Command) {
},
);
for (const lane of LIVE_TRANSPORT_QA_CLI_REGISTRATIONS) {
lane.register(qa);
}
qa.command("character-eval")
.description("Run the character QA scenario across live models and write a judged report")
.option("--repo-root <path>", "Repository root to target when running from a neutral cwd")
@@ -579,4 +581,9 @@ export function registerQaLabCli(program: Command) {
.action(async (opts: { host?: string; port?: number }) => {
await runQaMockOpenAi(opts);
});
for (const lane of listLiveTransportQaCliRegistrations()) {
assertNoQaSubcommandCollision(qa, lane.commandName);
lane.register(qa);
}
}

View File

@@ -1,8 +1,78 @@
import { matrixQaCliRegistration } from "./matrix/cli.js";
import { listQaRunnerCliContributions } from "openclaw/plugin-sdk/qa-runner-runtime";
import type { LiveTransportQaCliRegistration } from "./shared/live-transport-cli.js";
import { telegramQaCliRegistration } from "./telegram/cli.js";
function createMissingQaRunnerCliRegistration(params: {
commandName: string;
description: string;
npmSpec: string;
}): LiveTransportQaCliRegistration {
return {
commandName: params.commandName,
register(qa) {
qa.command(params.commandName)
.description(params.description)
.action(() => {
throw new Error(
`QA runner "${params.commandName}" not installed. Install it with "openclaw plugins install ${params.npmSpec}".`,
);
});
},
};
}
function createBlockedQaRunnerCliRegistration(params: {
commandName: string;
description?: string;
pluginId: string;
}): LiveTransportQaCliRegistration {
return {
commandName: params.commandName,
register(qa) {
qa.command(params.commandName)
.description(params.description ?? `Run the ${params.commandName} live QA lane`)
.action(() => {
throw new Error(
`QA runner "${params.commandName}" is installed but not active. Enable or allow plugin "${params.pluginId}" in your OpenClaw config, then try again.`,
);
});
},
};
}
function createQaRunnerCliRegistration(
runner: ReturnType<typeof listQaRunnerCliContributions>[number],
): LiveTransportQaCliRegistration {
if (runner.status === "available") {
return runner.registration;
}
if (runner.status === "blocked") {
return createBlockedQaRunnerCliRegistration({
commandName: runner.commandName,
description: runner.description,
pluginId: runner.pluginId,
});
}
return createMissingQaRunnerCliRegistration({
commandName: runner.commandName,
description:
runner.description ??
`Run the ${runner.commandName} live QA lane (install ${runner.npmSpec} first)`,
npmSpec: runner.npmSpec,
});
}
export const LIVE_TRANSPORT_QA_CLI_REGISTRATIONS: readonly LiveTransportQaCliRegistration[] = [
telegramQaCliRegistration,
matrixQaCliRegistration,
];
export function listLiveTransportQaCliRegistrations(): readonly LiveTransportQaCliRegistration[] {
const liveRegistrations = [...LIVE_TRANSPORT_QA_CLI_REGISTRATIONS];
const discoveredRunners = listQaRunnerCliContributions();
for (const runner of discoveredRunners) {
liveRegistrations.push(createQaRunnerCliRegistration(runner));
}
return liveRegistrations;
}

View File

@@ -33,6 +33,11 @@ export type LiveTransportQaCliRegistration = {
register(qa: Command): void;
};
export type LiveTransportQaCredentialCliOptions = {
sourceDescription?: string;
roleDescription?: string;
};
export function createLazyCliRuntimeLoader<T>(load: () => Promise<T>) {
let promise: Promise<T> | null = null;
return async () => {
@@ -61,13 +66,14 @@ export function mapLiveTransportQaCommanderOptions(
export function registerLiveTransportQaCli(params: {
qa: Command;
commandName: string;
credentialOptions?: LiveTransportQaCredentialCliOptions;
description: string;
outputDirHelp: string;
scenarioHelp: string;
sutAccountHelp: string;
run: (opts: LiveTransportQaCommandOptions) => Promise<void>;
}) {
params.qa
const command = params.qa
.command(params.commandName)
.description(params.description)
.option("--repo-root <path>", "Repository root to target when running from a neutral cwd")
@@ -81,22 +87,27 @@ export function registerLiveTransportQaCli(params: {
.option("--alt-model <ref>", "Alternate provider/model ref")
.option("--scenario <id>", params.scenarioHelp, collectString, [])
.option("--fast", "Enable provider fast mode where supported", false)
.option("--sut-account <id>", params.sutAccountHelp, "sut")
.option(
.option("--sut-account <id>", params.sutAccountHelp, "sut");
if (params.credentialOptions) {
command.option(
"--credential-source <source>",
"Credential source for live lanes: env or convex (default: env)",
)
.option(
"--credential-role <role>",
"Credential role for convex auth: maintainer or ci (default: maintainer)",
)
.action(async (opts: LiveTransportQaCommanderOptions) => {
await params.run(mapLiveTransportQaCommanderOptions(opts));
});
params.credentialOptions.sourceDescription ??
"Credential source for live lanes: env or convex (default: env)",
);
if (params.credentialOptions.roleDescription) {
command.option("--credential-role <role>", params.credentialOptions.roleDescription);
}
}
command.action(async (opts: LiveTransportQaCommanderOptions) => {
await params.run(mapLiveTransportQaCommanderOptions(opts));
});
}
export function createLiveTransportQaCliRegistration(params: {
commandName: string;
credentialOptions?: LiveTransportQaCredentialCliOptions;
description: string;
outputDirHelp: string;
scenarioHelp: string;
@@ -109,6 +120,7 @@ export function createLiveTransportQaCliRegistration(params: {
registerLiveTransportQaCli({
qa,
commandName: params.commandName,
credentialOptions: params.credentialOptions,
description: params.description,
outputDirHelp: params.outputDirHelp,
scenarioHelp: params.scenarioHelp,

View File

@@ -20,6 +20,10 @@ async function runQaTelegram(opts: LiveTransportQaCommandOptions) {
export const telegramQaCliRegistration: LiveTransportQaCliRegistration =
createLiveTransportQaCliRegistration({
commandName: "telegram",
credentialOptions: {
sourceDescription: "Credential source for Telegram QA: env or convex (default: env)",
roleDescription: "Credential role for convex auth: maintainer or ci (default: maintainer)",
},
description: "Run the manual Telegram live QA lane against a private bot-to-bot group harness",
outputDirHelp: "Telegram QA artifact directory",
scenarioHelp: "Run only the named Telegram QA scenario (repeatable)",

View File

@@ -3,6 +3,7 @@ export type { OpenClawConfig } from "openclaw/plugin-sdk/config-runtime";
export { definePluginEntry } from "openclaw/plugin-sdk/plugin-entry";
export { callGatewayFromCli } from "openclaw/plugin-sdk/browser-node-runtime";
export type { PluginRuntime } from "openclaw/plugin-sdk/runtime-store";
export { defaultQaRuntimeModelForMode } from "./model-selection.runtime.js";
export {
buildQaTarget,
createQaBusThread,

View File

@@ -81,7 +81,7 @@ export async function runQaSelfCheckAgainstState(params: {
timeline,
notes: params.notes ?? [
"Vertical slice: qa-channel + qa-lab bus + private debugger surface.",
"Docker orchestration, matrix runs, and auto-fix loops remain follow-up work.",
"Docker orchestration, additional QA runners, and auto-fix loops remain follow-up work.",
],
});

View File

@@ -0,0 +1 @@
export { runQaMatrixCommand } from "./src/cli.runtime.js";

View File

@@ -0,0 +1 @@
export { qaRunnerCliRegistrations, registerMatrixQaCli } from "./src/cli.js";

View File

@@ -0,0 +1,8 @@
import { definePluginEntry } from "openclaw/plugin-sdk/plugin-entry";
export default definePluginEntry({
id: "qa-matrix",
name: "QA Matrix",
description: "Matrix QA transport runner and substrate",
register() {},
});

View File

@@ -0,0 +1,16 @@
{
"id": "qa-matrix",
"name": "QA Matrix",
"description": "Matrix QA transport runner and substrate",
"qaRunners": [
{
"commandName": "matrix",
"description": "Run the Docker-backed Matrix live QA lane against a disposable homeserver"
}
],
"configSchema": {
"type": "object",
"additionalProperties": false,
"properties": {}
}
}

View File

@@ -0,0 +1,34 @@
{
"name": "@openclaw/qa-matrix",
"version": "2026.4.12",
"description": "OpenClaw Matrix QA runner plugin",
"type": "module",
"devDependencies": {
"@openclaw/plugin-sdk": "workspace:*",
"openclaw": "workspace:*"
},
"peerDependencies": {
"openclaw": ">=2026.4.12"
},
"peerDependenciesMeta": {
"openclaw": {
"optional": true
}
},
"openclaw": {
"extensions": [
"./index.ts"
],
"install": {
"npmSpec": "@openclaw/qa-matrix",
"defaultChoice": "npm",
"minHostVersion": ">=2026.4.12"
},
"compat": {
"pluginApi": ">=2026.4.12"
},
"build": {
"openclawVersion": "2026.4.12"
}
}
}

View File

@@ -0,0 +1 @@
export { qaRunnerCliRegistrations } from "./cli.js";

View File

@@ -0,0 +1 @@
export { runMatrixQaLive } from "./src/runners/contract/runtime.js";

View File

@@ -0,0 +1,4 @@
export function collectString(value: string, previous: string[]) {
const trimmed = value.trim();
return trimmed ? [...previous, trimmed] : previous;
}

View File

@@ -0,0 +1,16 @@
import path from "node:path";
export function resolveRepoRelativeOutputDir(repoRoot: string, outputDir?: string) {
if (!outputDir) {
return undefined;
}
if (path.isAbsolute(outputDir)) {
throw new Error("--output-dir must be a relative path inside the repo root.");
}
const resolved = path.resolve(repoRoot, outputDir);
const relative = path.relative(repoRoot, resolved);
if (relative.startsWith("..") || path.isAbsolute(relative)) {
throw new Error("--output-dir must stay within the repo root.");
}
return resolved;
}

View File

@@ -2,7 +2,7 @@ import { describe, expect, it, vi } from "vitest";
const runMatrixQaLive = vi.hoisted(() => vi.fn());
vi.mock("./matrix-live.runtime.js", () => ({
vi.mock("./runners/contract/runtime.js", () => ({
runMatrixQaLive,
}));

View File

@@ -1,9 +1,9 @@
import type { LiveTransportQaCommandOptions } from "../shared/live-transport-cli.js";
import { runMatrixQaLive } from "./runners/contract/runtime.js";
import type { LiveTransportQaCommandOptions } from "./shared/live-transport-cli.js";
import {
printLiveTransportQaArtifacts,
resolveLiveTransportQaRunOptions,
} from "../shared/live-transport-cli.runtime.js";
import { runMatrixQaLive } from "./matrix-live.runtime.js";
} from "./shared/live-transport-cli.runtime.js";
export async function runQaMatrixCommand(opts: LiveTransportQaCommandOptions) {
const runOptions = resolveLiveTransportQaRunOptions(opts);

View File

@@ -0,0 +1,29 @@
import { Command } from "commander";
import { describe, expect, it } from "vitest";
import { matrixQaCliRegistration } from "./cli.js";
describe("matrix qa cli registration", () => {
it("keeps disposable Matrix lane flags focused", () => {
const qa = new Command();
matrixQaCliRegistration.register(qa);
const matrix = qa.commands.find((command) => command.name() === "matrix");
const optionNames = matrix?.options.map((option) => option.long) ?? [];
expect(optionNames).toEqual(
expect.arrayContaining([
"--repo-root",
"--output-dir",
"--provider-mode",
"--model",
"--alt-model",
"--scenario",
"--fast",
"--sut-account",
]),
);
expect(optionNames).not.toContain("--credential-source");
expect(optionNames).not.toContain("--credential-role");
});
});

View File

@@ -4,7 +4,7 @@ import {
createLiveTransportQaCliRegistration,
type LiveTransportQaCliRegistration,
type LiveTransportQaCommandOptions,
} from "../shared/live-transport-cli.js";
} from "./shared/live-transport-cli.js";
type MatrixQaCliRuntime = typeof import("./cli.runtime.js");
@@ -27,6 +27,8 @@ export const matrixQaCliRegistration: LiveTransportQaCliRegistration =
run: runQaMatrix,
});
export const qaRunnerCliRegistrations = [matrixQaCliRegistration] as const;
export function registerMatrixQaCli(qa: Command) {
matrixQaCliRegistration.register(qa);
}

View File

@@ -0,0 +1,274 @@
import { createServer } from "node:net";
import { runExec } from "openclaw/plugin-sdk/process-runtime";
import { fetchWithSsrFGuard } from "openclaw/plugin-sdk/ssrf-runtime";
export type RunCommand = (
command: string,
args: string[],
cwd: string,
) => Promise<{ stdout: string; stderr: string }>;
export type FetchLike = (input: string) => Promise<{ ok: boolean }>;
export async function fetchHealthUrl(url: string): Promise<{ ok: boolean }> {
const { response, release } = await fetchWithSsrFGuard({
url,
init: {
signal: AbortSignal.timeout(2_000),
},
policy: { allowPrivateNetwork: true },
auditContext: "qa-matrix-docker-health-check",
});
try {
return { ok: response.ok };
} finally {
await release();
}
}
export function describeError(error: unknown) {
if (error instanceof Error) {
return error.message;
}
if (typeof error === "string") {
return error;
}
return JSON.stringify(error);
}
async function isPortFree(port: number) {
return await new Promise<boolean>((resolve) => {
const server = createServer();
server.once("error", () => resolve(false));
server.listen(port, "127.0.0.1", () => {
server.close(() => resolve(true));
});
});
}
async function findFreePort() {
return await new Promise<number>((resolve, reject) => {
const server = createServer();
server.once("error", reject);
server.listen(0, () => {
const address = server.address();
if (!address || typeof address === "string") {
server.close();
reject(new Error("failed to find free port"));
return;
}
server.close((error) => {
if (error) {
reject(error);
return;
}
resolve(address.port);
});
});
});
}
export async function resolveHostPort(preferredPort: number, pinned: boolean) {
if (pinned || (await isPortFree(preferredPort))) {
return preferredPort;
}
return await findFreePort();
}
function trimCommandOutput(output: string) {
const trimmed = output.trim();
if (!trimmed) {
return "";
}
const lines = trimmed.split("\n");
return lines.length <= 120 ? trimmed : lines.slice(-120).join("\n");
}
export async function execCommand(command: string, args: string[], cwd: string) {
try {
return await runExec(command, args, { cwd, maxBuffer: 10 * 1024 * 1024 });
} catch (error) {
const failedProcess = error as Error & { stdout?: string; stderr?: string };
const renderedStdout = trimCommandOutput(failedProcess.stdout ?? "");
const renderedStderr = trimCommandOutput(failedProcess.stderr ?? "");
throw new Error(
[
`Command failed: ${[command, ...args].join(" ")}`,
renderedStderr ? `stderr:\n${renderedStderr}` : "",
renderedStdout ? `stdout:\n${renderedStdout}` : "",
]
.filter(Boolean)
.join("\n\n"),
{ cause: error },
);
}
}
export async function waitForHealth(
url: string,
deps: {
label?: string;
composeFile?: string;
fetchImpl: FetchLike;
sleepImpl: (ms: number) => Promise<unknown>;
timeoutMs?: number;
pollMs?: number;
},
) {
const timeoutMs = deps.timeoutMs ?? 360_000;
const pollMs = deps.pollMs ?? 1_000;
const startMs = Date.now();
const deadline = startMs + timeoutMs;
let lastError: unknown = null;
while (Date.now() < deadline) {
try {
const response = await deps.fetchImpl(url);
if (response.ok) {
return;
}
lastError = new Error(`Health check returned non-OK for ${url}`);
} catch (error) {
lastError = error;
}
await deps.sleepImpl(pollMs);
}
const elapsedSec = Math.round((Date.now() - startMs) / 1000);
const service = deps.label ?? url;
const lines = [
`${service} did not become healthy within ${elapsedSec}s (limit ${Math.round(timeoutMs / 1000)}s).`,
lastError ? `Last error: ${describeError(lastError)}` : "",
`Hint: check container logs with \`docker compose -f ${deps.composeFile ?? "<compose-file>"} logs\` and verify the port is not already in use.`,
];
throw new Error(lines.filter(Boolean).join("\n"));
}
async function isHealthy(url: string, fetchImpl: FetchLike) {
try {
const response = await fetchImpl(url);
return response.ok;
} catch {
return false;
}
}
function normalizeDockerServiceStatus(row?: { Health?: string; State?: string }) {
const health = row?.Health?.trim();
if (health) {
return health;
}
const state = row?.State?.trim();
if (state) {
return state;
}
return "unknown";
}
function parseDockerComposePsRows(stdout: string) {
const trimmed = stdout.trim();
if (!trimmed) {
return [] as Array<{ Health?: string; State?: string }>;
}
try {
const parsed = JSON.parse(trimmed) as
| Array<{ Health?: string; State?: string }>
| { Health?: string; State?: string };
if (Array.isArray(parsed)) {
return parsed;
}
return [parsed];
} catch {
return trimmed
.split("\n")
.map((line) => line.trim())
.filter(Boolean)
.map((line) => JSON.parse(line) as { Health?: string; State?: string });
}
}
export async function waitForDockerServiceHealth(
service: string,
composeFile: string,
repoRoot: string,
runCommand: RunCommand,
sleepImpl: (ms: number) => Promise<unknown>,
timeoutMs = 360_000,
pollMs = 1_000,
) {
const startMs = Date.now();
const deadline = startMs + timeoutMs;
let lastStatus = "unknown";
while (Date.now() < deadline) {
try {
const { stdout } = await runCommand(
"docker",
["compose", "-f", composeFile, "ps", "--format", "json", service],
repoRoot,
);
const rows = parseDockerComposePsRows(stdout);
const row = rows[0];
lastStatus = normalizeDockerServiceStatus(row);
if (lastStatus === "healthy" || lastStatus === "running") {
return;
}
} catch (error) {
lastStatus = describeError(error);
}
await sleepImpl(pollMs);
}
const elapsedSec = Math.round((Date.now() - startMs) / 1000);
throw new Error(
[
`${service} did not become healthy within ${elapsedSec}s (limit ${Math.round(timeoutMs / 1000)}s).`,
`Last status: ${lastStatus}`,
`Hint: check container logs with \`docker compose -f ${composeFile} logs ${service}\`.`,
].join("\n"),
);
}
export async function resolveComposeServiceUrl(
service: string,
port: number,
composeFile: string,
repoRoot: string,
runCommand: RunCommand,
fetchImpl?: FetchLike,
) {
const { stdout: containerStdout } = await runCommand(
"docker",
["compose", "-f", composeFile, "ps", "-q", service],
repoRoot,
);
const containerId = containerStdout.trim();
if (!containerId) {
return null;
}
const { stdout: ipStdout } = await runCommand(
"docker",
[
"inspect",
"--format",
"{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}",
containerId,
],
repoRoot,
);
const ip = ipStdout.trim();
if (!ip) {
return null;
}
const baseUrl = `http://${ip}:${port}/`;
if (!fetchImpl) {
return baseUrl;
}
return (await isHealthy(`${baseUrl}healthz`, fetchImpl)) ? baseUrl : null;
}
export const __testing = {
fetchHealthUrl,
normalizeDockerServiceStatus,
};

View File

@@ -0,0 +1,100 @@
export type QaReportCheck = {
name: string;
status: "pass" | "fail" | "skip";
details?: string;
};
export type QaReportScenario = {
name: string;
status: "pass" | "fail" | "skip";
details?: string;
steps?: QaReportCheck[];
};
function pushDetailsBlock(lines: string[], label: string, details: string, indent = "") {
if (!details.includes("\n")) {
lines.push(`${indent}- ${label}: ${details}`);
return;
}
lines.push(`${indent}- ${label}:`);
lines.push("", "```text", details, "```");
}
export function renderQaMarkdownReport(params: {
title: string;
startedAt: Date;
finishedAt: Date;
checks?: QaReportCheck[];
scenarios?: QaReportScenario[];
timeline?: string[];
notes?: string[];
}) {
const checks = params.checks ?? [];
const scenarios = params.scenarios ?? [];
const passCount =
checks.filter((check) => check.status === "pass").length +
scenarios.filter((scenario) => scenario.status === "pass").length;
const failCount =
checks.filter((check) => check.status === "fail").length +
scenarios.filter((scenario) => scenario.status === "fail").length;
const lines = [
`# ${params.title}`,
"",
`- Started: ${params.startedAt.toISOString()}`,
`- Finished: ${params.finishedAt.toISOString()}`,
`- Duration ms: ${params.finishedAt.getTime() - params.startedAt.getTime()}`,
`- Passed: ${passCount}`,
`- Failed: ${failCount}`,
"",
];
if (checks.length > 0) {
lines.push("## Checks", "");
for (const check of checks) {
lines.push(`- [${check.status === "pass" ? "x" : " "}] ${check.name}`);
if (check.details) {
pushDetailsBlock(lines, "Details", check.details, " ");
}
}
}
if (scenarios.length > 0) {
lines.push("", "## Scenarios", "");
for (const scenario of scenarios) {
lines.push(`### ${scenario.name}`);
lines.push("");
lines.push(`- Status: ${scenario.status}`);
if (scenario.details) {
pushDetailsBlock(lines, "Details", scenario.details);
}
if (scenario.steps?.length) {
lines.push("- Steps:");
for (const step of scenario.steps) {
lines.push(` - [${step.status === "pass" ? "x" : " "}] ${step.name}`);
if (step.details) {
pushDetailsBlock(lines, "Details", step.details, " ");
}
}
}
lines.push("");
}
}
if (params.timeline && params.timeline.length > 0) {
lines.push("## Timeline", "");
for (const item of params.timeline) {
lines.push(`- ${item}`);
}
}
if (params.notes && params.notes.length > 0) {
lines.push("", "## Notes", "");
for (const note of params.notes) {
lines.push(`- ${note}`);
}
}
lines.push("");
return lines.join("\n");
}

View File

@@ -0,0 +1,9 @@
export type QaProviderMode = "mock-openai" | "live-frontier";
export type QaProviderModeInput = QaProviderMode | "live-openai";
export function normalizeQaProviderMode(input: unknown): QaProviderMode {
if (input === "mock-openai") {
return "mock-openai";
}
return "live-frontier";
}

View File

@@ -0,0 +1,51 @@
import { beforeEach, describe, expect, it, vi } from "vitest";
const loadQaLabRuntimeModule = vi.hoisted(() => vi.fn());
const defaultQaRuntimeModelForMode = vi.hoisted(() => vi.fn());
vi.mock("openclaw/plugin-sdk/qa-lab-runtime", () => ({
loadQaLabRuntimeModule,
}));
describe("matrix qa model selection", () => {
beforeEach(() => {
defaultQaRuntimeModelForMode.mockReset().mockImplementation((mode, options) =>
options?.alternate ? `${mode}:alt` : `${mode}:primary`,
);
loadQaLabRuntimeModule.mockReset().mockReturnValue({
defaultQaRuntimeModelForMode,
});
});
it("delegates default model selection through qa-lab runtime defaults", async () => {
const { resolveMatrixQaModels } = await import("./model-selection.js");
expect(resolveMatrixQaModels({ providerMode: "live-openai" })).toEqual({
providerMode: "live-frontier",
primaryModel: "live-frontier:primary",
alternateModel: "live-frontier:alt",
});
expect(defaultQaRuntimeModelForMode).toHaveBeenNthCalledWith(1, "live-frontier");
expect(defaultQaRuntimeModelForMode).toHaveBeenNthCalledWith(2, "live-frontier", {
alternate: true,
});
});
it("preserves explicit model overrides", async () => {
const { resolveMatrixQaModels } = await import("./model-selection.js");
expect(
resolveMatrixQaModels({
providerMode: "mock-openai",
primaryModel: "custom-primary",
alternateModel: "custom-alt",
}),
).toEqual({
providerMode: "mock-openai",
primaryModel: "custom-primary",
alternateModel: "custom-alt",
});
expect(loadQaLabRuntimeModule).not.toHaveBeenCalled();
expect(defaultQaRuntimeModelForMode).not.toHaveBeenCalled();
});
});

View File

@@ -0,0 +1,33 @@
import { loadQaLabRuntimeModule } from "openclaw/plugin-sdk/qa-lab-runtime";
import { normalizeQaProviderMode, type QaProviderModeInput } from "../../run-config.js";
export type ResolvedMatrixQaModels = {
providerMode: ReturnType<typeof normalizeQaProviderMode>;
primaryModel: string;
alternateModel: string;
};
export function resolveMatrixQaModels(params: {
providerMode?: QaProviderModeInput;
primaryModel?: string;
alternateModel?: string;
}): ResolvedMatrixQaModels {
const providerMode = normalizeQaProviderMode(params.providerMode ?? "live-frontier");
const primaryModel = params.primaryModel?.trim();
const alternateModel = params.alternateModel?.trim();
if (primaryModel && alternateModel) {
return {
providerMode,
primaryModel,
alternateModel,
};
}
const qaLabRuntime = loadQaLabRuntimeModule();
return {
providerMode,
primaryModel: primaryModel || qaLabRuntime.defaultQaRuntimeModelForMode(providerMode),
alternateModel:
alternateModel || qaLabRuntime.defaultQaRuntimeModelForMode(providerMode, { alternate: true }),
};
}

View File

@@ -1,6 +1,6 @@
import type { OpenClawConfig } from "openclaw/plugin-sdk/config-runtime";
import { afterEach, describe, expect, it, vi } from "vitest";
import { __testing as liveTesting } from "./matrix-live.runtime.js";
import { __testing as liveTesting } from "./runtime.js";
afterEach(() => {
vi.useRealTimers();

View File

@@ -4,22 +4,20 @@ import path from "node:path";
import { setTimeout as sleep } from "node:timers/promises";
import type { OpenClawConfig } from "openclaw/plugin-sdk/config-runtime";
import { formatErrorMessage } from "openclaw/plugin-sdk/error-runtime";
import { startQaGatewayChild } from "../../gateway-child.js";
import { loadQaLabRuntimeModule } from "openclaw/plugin-sdk/qa-lab-runtime";
import type { QaReportCheck } from "../../report.js";
import { renderQaMarkdownReport } from "../../report.js";
import { type QaProviderModeInput } from "../../run-config.js";
import {
defaultQaModelForMode,
normalizeQaProviderMode,
type QaProviderModeInput,
} from "../../run-config.js";
import { startQaLiveLaneGateway } from "../shared/live-gateway.runtime.js";
import { appendLiveLaneIssue, buildLiveLaneArtifactsError } from "../shared/live-lane-helpers.js";
appendLiveLaneIssue,
buildLiveLaneArtifactsError,
} from "../../shared/live-lane-helpers.js";
import {
provisionMatrixQaRoom,
type MatrixQaObservedEvent,
type MatrixQaProvisionResult,
} from "./matrix-driver-client.js";
import { startMatrixQaHarness } from "./matrix-harness.runtime.js";
} from "../../substrate/client.js";
import { startMatrixQaHarness } from "../../substrate/harness.runtime.js";
import {
MATRIX_QA_SCENARIOS,
buildMatrixReplyDetails,
@@ -28,7 +26,22 @@ import {
runMatrixQaScenario,
type MatrixQaCanaryArtifact,
type MatrixQaScenarioArtifacts,
} from "./matrix-live-scenarios.js";
} from "./scenarios.js";
import { resolveMatrixQaModels } from "./model-selection.js";
type MatrixQaGatewayChild = {
call(
method: string,
params: Record<string, unknown>,
options?: { timeoutMs?: number },
): Promise<unknown>;
restart(): Promise<void>;
};
type MatrixQaLiveLaneGatewayHarness = {
gateway: MatrixQaGatewayChild;
stop(): Promise<void>;
};
type MatrixQaScenarioResult = {
artifacts?: MatrixQaScenarioArtifacts;
@@ -214,7 +227,7 @@ function isMatrixAccountReady(entry?: {
}
async function waitForMatrixChannelReady(
gateway: Awaited<ReturnType<typeof startQaGatewayChild>>,
gateway: MatrixQaGatewayChild,
accountId: string,
opts?: {
pollMs?: number;
@@ -255,6 +268,27 @@ async function waitForMatrixChannelReady(
throw new Error(`matrix account "${accountId}" did not become ready`);
}
async function startMatrixQaLiveLaneGateway(params: {
repoRoot: string;
transport: {
requiredPluginIds: readonly string[];
createGatewayConfig: (params: {
baseUrl: string;
}) => Pick<OpenClawConfig, "channels" | "messages">;
};
transportBaseUrl: string;
providerMode: "mock-openai" | "live-frontier";
primaryModel: string;
alternateModel: string;
fastMode?: boolean;
controlUiEnabled?: boolean;
mutateConfig?: (cfg: OpenClawConfig) => OpenClawConfig;
}): Promise<MatrixQaLiveLaneGatewayHarness> {
return (await loadQaLabRuntimeModule().startQaLiveLaneGateway(
params,
)) as MatrixQaLiveLaneGatewayHarness;
}
export async function runMatrixQaLive(params: {
fastMode?: boolean;
outputDir?: string;
@@ -271,9 +305,11 @@ export async function runMatrixQaLive(params: {
path.join(repoRoot, ".artifacts", "qa-e2e", `matrix-${Date.now().toString(36)}`);
await fs.mkdir(outputDir, { recursive: true });
const providerMode = normalizeQaProviderMode(params.providerMode ?? "live-frontier");
const primaryModel = params.primaryModel?.trim() || defaultQaModelForMode(providerMode);
const alternateModel = params.alternateModel?.trim() || defaultQaModelForMode(providerMode, true);
const { providerMode, primaryModel, alternateModel } = resolveMatrixQaModels({
providerMode: params.providerMode,
primaryModel: params.primaryModel,
alternateModel: params.alternateModel,
});
const sutAccountId = params.sutAccountId?.trim() || "sut";
const scenarios = findMatrixQaScenarios(params.scenarioIds);
const observedEvents: MatrixQaObservedEvent[] = [];
@@ -317,12 +353,12 @@ export async function runMatrixQaLive(params: {
const scenarioResults: MatrixQaScenarioResult[] = [];
const cleanupErrors: string[] = [];
let canaryArtifact: MatrixQaCanaryArtifact | undefined;
let gatewayHarness: Awaited<ReturnType<typeof startQaLiveLaneGateway>> | null = null;
let gatewayHarness: MatrixQaLiveLaneGatewayHarness | null = null;
let canaryFailed = false;
const syncState: { driver?: string; observer?: string } = {};
try {
gatewayHarness = await startQaLiveLaneGateway({
gatewayHarness = await startMatrixQaLiveLaneGateway({
repoRoot,
transport: {
requiredPluginIds: [],
@@ -555,5 +591,6 @@ export const __testing = {
buildMatrixQaConfig,
buildObservedEventsArtifact,
isMatrixAccountReady,
resolveMatrixQaModels,
waitForMatrixChannelReady,
};

View File

@@ -3,19 +3,19 @@ const { createMatrixQaClient } = vi.hoisted(() => ({
createMatrixQaClient: vi.fn(),
}));
vi.mock("./matrix-driver-client.js", () => ({
vi.mock("../../substrate/client.js", () => ({
createMatrixQaClient,
}));
import {
LIVE_TRANSPORT_BASELINE_STANDARD_SCENARIO_IDS,
findMissingLiveTransportStandardScenarios,
} from "../shared/live-transport-scenarios.js";
} from "../../shared/live-transport-scenarios.js";
import {
__testing as scenarioTesting,
MATRIX_QA_SCENARIOS,
runMatrixQaScenario,
} from "./matrix-live-scenarios.js";
} from "./scenarios.js";
describe("matrix live qa scenarios", () => {
beforeEach(() => {

View File

@@ -3,8 +3,8 @@ import {
collectLiveTransportStandardScenarioCoverage,
selectLiveTransportScenarios,
type LiveTransportScenarioDefinition,
} from "../shared/live-transport-scenarios.js";
import { createMatrixQaClient, type MatrixQaObservedEvent } from "./matrix-driver-client.js";
} from "../../shared/live-transport-scenarios.js";
import { createMatrixQaClient, type MatrixQaObservedEvent } from "../../substrate/client.js";
export type MatrixQaScenarioId =
| "matrix-thread-follow-up"

View File

@@ -0,0 +1,9 @@
import { describe, expect, it } from "vitest";
describe("matrix qa runtime api surface", () => {
it("keeps runner discovery lightweight", async () => {
const runtimeApi = await import("../runtime-api.js");
expect(Object.keys(runtimeApi).toSorted()).toEqual(["qaRunnerCliRegistrations"]);
});
});

View File

@@ -0,0 +1,18 @@
import { formatErrorMessage } from "openclaw/plugin-sdk/error-runtime";
export function appendLiveLaneIssue(issues: string[], label: string, error: unknown) {
issues.push(`${label}: ${formatErrorMessage(error)}`);
}
export function buildLiveLaneArtifactsError(params: {
heading: string;
artifacts: Record<string, string>;
details?: string[];
}) {
return [
params.heading,
...(params.details ?? []),
"Artifacts:",
...Object.entries(params.artifacts).map(([label, filePath]) => `- ${label}: ${filePath}`),
].join("\n");
}

View File

@@ -0,0 +1,40 @@
import path from "node:path";
import { resolveRepoRelativeOutputDir } from "../cli-paths.js";
import type { QaProviderMode } from "../run-config.js";
import { normalizeQaProviderMode } from "../run-config.js";
import type { LiveTransportQaCommandOptions } from "./live-transport-cli.js";
export function resolveLiveTransportQaRunOptions(
opts: LiveTransportQaCommandOptions,
): LiveTransportQaCommandOptions & {
repoRoot: string;
providerMode: QaProviderMode;
} {
return {
repoRoot: path.resolve(opts.repoRoot ?? process.cwd()),
outputDir: resolveRepoRelativeOutputDir(
path.resolve(opts.repoRoot ?? process.cwd()),
opts.outputDir,
),
providerMode:
opts.providerMode === undefined
? "live-frontier"
: normalizeQaProviderMode(opts.providerMode),
primaryModel: opts.primaryModel,
alternateModel: opts.alternateModel,
fastMode: opts.fastMode,
scenarioIds: opts.scenarioIds,
sutAccountId: opts.sutAccountId,
credentialSource: opts.credentialSource?.trim(),
credentialRole: opts.credentialRole?.trim(),
};
}
export function printLiveTransportQaArtifacts(
laneLabel: string,
artifacts: Record<string, string>,
) {
for (const [label, filePath] of Object.entries(artifacts)) {
process.stdout.write(`${laneLabel} ${label}: ${filePath}\n`);
}
}

View File

@@ -0,0 +1,132 @@
import type { Command } from "commander";
import { collectString } from "../cli-options.js";
import type { QaProviderModeInput } from "../run-config.js";
export type LiveTransportQaCommandOptions = {
repoRoot?: string;
outputDir?: string;
providerMode?: QaProviderModeInput;
primaryModel?: string;
alternateModel?: string;
fastMode?: boolean;
scenarioIds?: string[];
sutAccountId?: string;
credentialSource?: string;
credentialRole?: string;
};
type LiveTransportQaCommanderOptions = {
repoRoot?: string;
outputDir?: string;
providerMode?: QaProviderModeInput;
model?: string;
altModel?: string;
scenario?: string[];
fast?: boolean;
sutAccount?: string;
credentialSource?: string;
credentialRole?: string;
};
export type LiveTransportQaCliRegistration = {
commandName: string;
register(qa: Command): void;
};
export type LiveTransportQaCredentialCliOptions = {
sourceDescription?: string;
roleDescription?: string;
};
export function createLazyCliRuntimeLoader<T>(load: () => Promise<T>) {
let promise: Promise<T> | null = null;
return async () => {
promise ??= load();
return await promise;
};
}
export function mapLiveTransportQaCommanderOptions(
opts: LiveTransportQaCommanderOptions,
): LiveTransportQaCommandOptions {
return {
repoRoot: opts.repoRoot,
outputDir: opts.outputDir,
providerMode: opts.providerMode,
primaryModel: opts.model,
alternateModel: opts.altModel,
fastMode: opts.fast,
scenarioIds: opts.scenario,
sutAccountId: opts.sutAccount,
credentialSource: opts.credentialSource,
credentialRole: opts.credentialRole,
};
}
export function registerLiveTransportQaCli(params: {
qa: Command;
commandName: string;
credentialOptions?: LiveTransportQaCredentialCliOptions;
description: string;
outputDirHelp: string;
scenarioHelp: string;
sutAccountHelp: string;
run: (opts: LiveTransportQaCommandOptions) => Promise<void>;
}) {
const command = params.qa
.command(params.commandName)
.description(params.description)
.option("--repo-root <path>", "Repository root to target when running from a neutral cwd")
.option("--output-dir <path>", params.outputDirHelp)
.option(
"--provider-mode <mode>",
"Provider mode: mock-openai or live-frontier (legacy live-openai still works)",
"live-frontier",
)
.option("--model <ref>", "Primary provider/model ref")
.option("--alt-model <ref>", "Alternate provider/model ref")
.option("--scenario <id>", params.scenarioHelp, collectString, [])
.option("--fast", "Enable provider fast mode where supported", false)
.option("--sut-account <id>", params.sutAccountHelp, "sut");
if (params.credentialOptions) {
command.option(
"--credential-source <source>",
params.credentialOptions.sourceDescription ??
"Credential source for live lanes: env or convex (default: env)",
);
if (params.credentialOptions.roleDescription) {
command.option("--credential-role <role>", params.credentialOptions.roleDescription);
}
}
command.action(async (opts: LiveTransportQaCommanderOptions) => {
await params.run(mapLiveTransportQaCommanderOptions(opts));
});
}
export function createLiveTransportQaCliRegistration(params: {
commandName: string;
credentialOptions?: LiveTransportQaCredentialCliOptions;
description: string;
outputDirHelp: string;
scenarioHelp: string;
sutAccountHelp: string;
run: (opts: LiveTransportQaCommandOptions) => Promise<void>;
}): LiveTransportQaCliRegistration {
return {
commandName: params.commandName,
register(qa: Command) {
registerLiveTransportQaCli({
qa,
commandName: params.commandName,
credentialOptions: params.credentialOptions,
description: params.description,
outputDirHelp: params.outputDirHelp,
scenarioHelp: params.scenarioHelp,
sutAccountHelp: params.sutAccountHelp,
run: params.run,
});
},
};
}

View File

@@ -0,0 +1,149 @@
export type LiveTransportStandardScenarioId =
| "canary"
| "mention-gating"
| "allowlist-block"
| "top-level-reply-shape"
| "restart-resume"
| "thread-follow-up"
| "thread-isolation"
| "reaction-observation"
| "help-command";
export type LiveTransportScenarioDefinition<TId extends string = string> = {
id: TId;
standardId?: LiveTransportStandardScenarioId;
timeoutMs: number;
title: string;
};
export type LiveTransportStandardScenarioDefinition = {
description: string;
id: LiveTransportStandardScenarioId;
title: string;
};
export const LIVE_TRANSPORT_STANDARD_SCENARIOS: readonly LiveTransportStandardScenarioDefinition[] =
[
{
id: "canary",
title: "Transport canary",
description: "The lane can trigger one known-good reply on the real transport.",
},
{
id: "mention-gating",
title: "Mention gating",
description: "Messages without the required mention do not trigger a reply.",
},
{
id: "allowlist-block",
title: "Sender allowlist block",
description: "Non-allowlisted senders do not trigger a reply.",
},
{
id: "top-level-reply-shape",
title: "Top-level reply shape",
description: "Top-level replies stay top-level when the lane is configured that way.",
},
{
id: "restart-resume",
title: "Restart resume",
description: "The lane still responds after a gateway restart.",
},
{
id: "thread-follow-up",
title: "Thread follow-up",
description: "Threaded prompts receive threaded replies with the expected relation metadata.",
},
{
id: "thread-isolation",
title: "Thread isolation",
description: "Fresh top-level prompts stay out of prior threads.",
},
{
id: "reaction-observation",
title: "Reaction observation",
description: "Reaction events are observed and normalized correctly.",
},
{
id: "help-command",
title: "Help command",
description: "The transport-specific help command path replies successfully.",
},
] as const;
export const LIVE_TRANSPORT_BASELINE_STANDARD_SCENARIO_IDS: readonly LiveTransportStandardScenarioId[] =
[
"canary",
"mention-gating",
"allowlist-block",
"top-level-reply-shape",
"restart-resume",
] as const;
const LIVE_TRANSPORT_STANDARD_SCENARIO_ID_SET = new Set(
LIVE_TRANSPORT_STANDARD_SCENARIOS.map((scenario) => scenario.id),
);
function assertKnownStandardScenarioIds(ids: readonly LiveTransportStandardScenarioId[]) {
for (const id of ids) {
if (!LIVE_TRANSPORT_STANDARD_SCENARIO_ID_SET.has(id)) {
throw new Error(`unknown live transport standard scenario id: ${id}`);
}
}
}
export function selectLiveTransportScenarios<TDefinition extends { id: string }>(params: {
ids?: string[];
laneLabel: string;
scenarios: readonly TDefinition[];
}) {
if (!params.ids || params.ids.length === 0) {
return [...params.scenarios];
}
const requested = new Set(params.ids);
const selected = params.scenarios.filter((scenario) => params.ids?.includes(scenario.id));
const missingIds = [...requested].filter(
(id) => !selected.some((scenario) => scenario.id === id),
);
if (missingIds.length > 0) {
throw new Error(`unknown ${params.laneLabel} QA scenario id(s): ${missingIds.join(", ")}`);
}
return selected;
}
export function collectLiveTransportStandardScenarioCoverage<TId extends string>(params: {
alwaysOnStandardScenarioIds?: readonly LiveTransportStandardScenarioId[];
scenarios: readonly LiveTransportScenarioDefinition<TId>[];
}) {
const coverage: LiveTransportStandardScenarioId[] = [];
const seen = new Set<LiveTransportStandardScenarioId>();
const append = (id: LiveTransportStandardScenarioId | undefined) => {
if (!id || seen.has(id)) {
return;
}
seen.add(id);
coverage.push(id);
};
assertKnownStandardScenarioIds(params.alwaysOnStandardScenarioIds ?? []);
for (const id of params.alwaysOnStandardScenarioIds ?? []) {
append(id);
}
for (const scenario of params.scenarios) {
if (scenario.standardId) {
assertKnownStandardScenarioIds([scenario.standardId]);
}
append(scenario.standardId);
}
return coverage;
}
export function findMissingLiveTransportStandardScenarios(params: {
coveredStandardScenarioIds: readonly LiveTransportStandardScenarioId[];
expectedStandardScenarioIds: readonly LiveTransportStandardScenarioId[];
}) {
assertKnownStandardScenarioIds(params.coveredStandardScenarioIds);
assertKnownStandardScenarioIds(params.expectedStandardScenarioIds);
const covered = new Set(params.coveredStandardScenarioIds);
return params.expectedStandardScenarioIds.filter((id) => !covered.has(id));
}

View File

@@ -4,7 +4,7 @@ import {
createMatrixQaClient,
provisionMatrixQaRoom,
type MatrixQaObservedEvent,
} from "./matrix-driver-client.js";
} from "./client.js";
function resolveRequestUrl(input: RequestInfo | URL) {
if (typeof input === "string") {

View File

@@ -2,11 +2,7 @@ import { mkdtemp, readFile, rm } from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import { describe, expect, it, vi } from "vitest";
import {
__testing,
startMatrixQaHarness,
writeMatrixQaHarnessFiles,
} from "./matrix-harness.runtime.js";
import { __testing, startMatrixQaHarness, writeMatrixQaHarnessFiles } from "./harness.runtime.js";
describe("matrix harness runtime", () => {
it("writes a pinned Tuwunel compose file and redacted manifest", async () => {

View File

@@ -11,7 +11,7 @@ import {
waitForHealth,
type FetchLike,
type RunCommand,
} from "../../docker-runtime.js";
} from "../docker-runtime.js";
const MATRIX_QA_DEFAULT_IMAGE = "ghcr.io/matrix-construct/tuwunel:v1.5.1";
const MATRIX_QA_DEFAULT_SERVER_NAME = "matrix-qa.test";