refactor(qa): split Matrix QA into optional plugin (#66723)

Merged via squash. Prepared head SHA: 27241bd089 Co-authored-by: gumadeiras <5599352+gumadeiras@users.noreply.github.com> Co-authored-by: gumadeiras <5599352+gumadeiras@users.noreply.github.com> Reviewed-by: @gumadeiras
2026-05-06 07:00:43 +00:00 · 2026-04-14 16:28:57 -04:00
parent 3425823dfb
commit 82a2db71e8
69 changed files with 2026 additions and 229 deletions
--- a/extensions/qa-lab/runtime-api.ts
+++ b/extensions/qa-lab/runtime-api.ts
@@ -1 +1,2 @@
 export * from "./src/runtime-api.js";
+export { startQaLiveLaneGateway } from "./src/live-transports/shared/live-gateway.runtime.js";
--- a/extensions/qa-lab/src/cli.runtime.test.ts
+++ b/extensions/qa-lab/src/cli.runtime.test.ts
@@ -8,7 +8,6 @@ const {
  runQaSuiteFromRuntime,
  runQaCharacterEval,
  runQaMultipass,
-  runMatrixQaLive,
  runTelegramQaLive,
  startQaLabServer,
  writeQaDockerHarnessFiles,
@@ -20,7 +19,6 @@ const {
  runQaSuiteFromRuntime: vi.fn(),
  runQaCharacterEval: vi.fn(),
  runQaMultipass: vi.fn(),
-  runMatrixQaLive: vi.fn(),
  runTelegramQaLive: vi.fn(),
  startQaLabServer: vi.fn(),
  writeQaDockerHarnessFiles: vi.fn(),
@@ -52,10 +50,6 @@ vi.mock("./multipass.runtime.js", () => ({
  runQaMultipass,
 }));

-vi.mock("./live-transports/matrix/matrix-live.runtime.js", () => ({
-  runMatrixQaLive,
-}));
-
 vi.mock("./live-transports/telegram/telegram-live.runtime.js", () => ({
  runTelegramQaLive,
 }));
@@ -88,7 +82,6 @@ import {
  runQaParityReportCommand,
  runQaSuiteCommand,
 } from "./cli.runtime.js";
-import { runQaMatrixCommand } from "./live-transports/matrix/cli.runtime.js";
 import { runQaTelegramCommand } from "./live-transports/telegram/cli.runtime.js";

 describe("qa cli runtime", () => {
@@ -100,7 +93,6 @@ describe("qa cli runtime", () => {
    runQaCharacterEval.mockReset();
    runQaManualLane.mockReset();
    runQaMultipass.mockReset();
-    runMatrixQaLive.mockReset();
    runTelegramQaLive.mockReset();
    startQaLabServer.mockReset();
    writeQaDockerHarnessFiles.mockReset();
@@ -139,13 +131,6 @@ describe("qa cli runtime", () => {
      vmName: "openclaw-qa-test",
      scenarioIds: ["channel-chat-baseline"],
    });
-    runMatrixQaLive.mockResolvedValue({
-      outputDir: "/tmp/matrix",
-      reportPath: "/tmp/matrix/report.md",
-      summaryPath: "/tmp/matrix/summary.json",
-      observedEventsPath: "/tmp/matrix/observed.json",
-      scenarios: [],
-    });
    runTelegramQaLive.mockResolvedValue({
      outputDir: "/tmp/telegram",
      reportPath: "/tmp/telegram/report.md",
@@ -226,30 +211,6 @@ describe("qa cli runtime", () => {
    });
  });

-  it("resolves matrix qa repo-root-relative paths before dispatching", async () => {
-    await runQaMatrixCommand({
-      repoRoot: "/tmp/openclaw-repo",
-      outputDir: ".artifacts/qa/matrix",
-      providerMode: "live-frontier",
-      primaryModel: "openai/gpt-5.4",
-      alternateModel: "openai/gpt-5.4",
-      fastMode: true,
-      scenarioIds: ["matrix-thread-follow-up"],
-      sutAccountId: "sut-live",
-    });
-
-    expect(runMatrixQaLive).toHaveBeenCalledWith({
-      repoRoot: path.resolve("/tmp/openclaw-repo"),
-      outputDir: path.resolve("/tmp/openclaw-repo", ".artifacts/qa/matrix"),
-      providerMode: "live-frontier",
-      primaryModel: "openai/gpt-5.4",
-      alternateModel: "openai/gpt-5.4",
-      fastMode: true,
-      scenarioIds: ["matrix-thread-follow-up"],
-      sutAccountId: "sut-live",
-    });
-  });
-
  it("rejects output dirs that escape the repo root", () => {
    expect(() => resolveRepoRelativeOutputDir("/tmp/openclaw-repo", "../outside")).toThrow(
      "--output-dir must stay within the repo root.",
@@ -273,20 +234,6 @@ describe("qa cli runtime", () => {
    );
  });

-  it("defaults matrix qa runs onto the live provider lane", async () => {
-    await runQaMatrixCommand({
-      repoRoot: "/tmp/openclaw-repo",
-      scenarioIds: ["matrix-thread-follow-up"],
-    });
-
-    expect(runMatrixQaLive).toHaveBeenCalledWith(
-      expect.objectContaining({
-        repoRoot: path.resolve("/tmp/openclaw-repo"),
-        providerMode: "live-frontier",
-      }),
-    );
-  });
-
  it("normalizes legacy live-openai suite runs onto the frontier provider mode", async () => {
    await runQaSuiteCommand({
      repoRoot: "/tmp/openclaw-repo",
--- a/extensions/qa-lab/src/cli.test.ts
+++ b/extensions/qa-lab/src/cli.test.ts
@@ -1,22 +1,76 @@
 import { Command } from "commander";
+import type { QaRunnerCliContribution } from "openclaw/plugin-sdk/qa-runner-runtime";
 import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";

+const TEST_QA_RUNNER = {
+  pluginId: "qa-runner-test",
+  commandName: "runner-test",
+  description: "Run the test live QA lane",
+  npmSpec: "@openclaw/qa-runner-test",
+} as const;
+
+function createAvailableQaRunnerContribution() {
+  return {
+    pluginId: TEST_QA_RUNNER.pluginId,
+    commandName: TEST_QA_RUNNER.commandName,
+    status: "available" as const,
+    registration: {
+      commandName: TEST_QA_RUNNER.commandName,
+      register: vi.fn((qa: Command) => {
+        qa.command(TEST_QA_RUNNER.commandName).action(() => undefined);
+      }),
+    },
+  } satisfies QaRunnerCliContribution;
+}
+
+function createMissingQaRunnerContribution(): QaRunnerCliContribution {
+  return {
+    pluginId: TEST_QA_RUNNER.pluginId,
+    commandName: TEST_QA_RUNNER.commandName,
+    description: TEST_QA_RUNNER.description,
+    status: "missing",
+    npmSpec: TEST_QA_RUNNER.npmSpec,
+  };
+}
+
+function createBlockedQaRunnerContribution(): QaRunnerCliContribution {
+  return {
+    pluginId: TEST_QA_RUNNER.pluginId,
+    commandName: TEST_QA_RUNNER.commandName,
+    description: TEST_QA_RUNNER.description,
+    status: "blocked",
+  };
+}
+
+function createConflictingQaRunnerContribution(commandName: string): QaRunnerCliContribution {
+  return {
+    pluginId: TEST_QA_RUNNER.pluginId,
+    commandName,
+    description: TEST_QA_RUNNER.description,
+    status: "blocked",
+  };
+}
+
 const {
  runQaCredentialsAddCommand,
  runQaCredentialsListCommand,
  runQaCredentialsRemoveCommand,
-  runQaMatrixCommand,
  runQaTelegramCommand,
 } = vi.hoisted(() => ({
  runQaCredentialsAddCommand: vi.fn(),
  runQaCredentialsListCommand: vi.fn(),
  runQaCredentialsRemoveCommand: vi.fn(),
-  runQaMatrixCommand: vi.fn(),
  runQaTelegramCommand: vi.fn(),
 }));

-vi.mock("./live-transports/matrix/cli.runtime.js", () => ({
-  runQaMatrixCommand,
+const { listQaRunnerCliContributions } = vi.hoisted(() => ({
+  listQaRunnerCliContributions: vi.fn<() => QaRunnerCliContribution[]>(() => [
+    createAvailableQaRunnerContribution(),
+  ]),
+}));
+
+vi.mock("openclaw/plugin-sdk/qa-runner-runtime", () => ({
+  listQaRunnerCliContributions,
 }));

 vi.mock("./live-transports/telegram/cli.runtime.js", () => ({
@@ -36,63 +90,71 @@ describe("qa cli registration", () => {

  beforeEach(() => {
    program = new Command();
-    registerQaLabCli(program);
    runQaCredentialsAddCommand.mockReset();
    runQaCredentialsListCommand.mockReset();
    runQaCredentialsRemoveCommand.mockReset();
-    runQaMatrixCommand.mockReset();
    runQaTelegramCommand.mockReset();
+    listQaRunnerCliContributions
+      .mockReset()
+      .mockReturnValue([createAvailableQaRunnerContribution()]);
+    registerQaLabCli(program);
  });

  afterEach(() => {
    vi.clearAllMocks();
  });

-  it("registers the matrix and telegram live transport subcommands", () => {
+  it("registers discovered and built-in live transport subcommands", () => {
    const qa = program.commands.find((command) => command.name() === "qa");
    expect(qa).toBeDefined();
    expect(qa?.commands.map((command) => command.name())).toEqual(
-      expect.arrayContaining(["matrix", "telegram", "credentials"]),
+      expect.arrayContaining([TEST_QA_RUNNER.commandName, "telegram", "credentials"]),
    );
  });

-  it("routes matrix CLI flags into the lane runtime", async () => {
-    await program.parseAsync([
-      "node",
-      "openclaw",
-      "qa",
-      "matrix",
-      "--repo-root",
-      "/tmp/openclaw-repo",
-      "--output-dir",
-      ".artifacts/qa/matrix",
-      "--provider-mode",
-      "mock-openai",
-      "--model",
-      "mock-openai/gpt-5.4",
-      "--alt-model",
-      "mock-openai/gpt-5.4-alt",
-      "--scenario",
-      "matrix-thread-follow-up",
-      "--scenario",
-      "matrix-thread-isolation",
-      "--fast",
-      "--sut-account",
-      "sut-live",
-    ]);
+  it("delegates discovered qa runner registration through the generic host seam", () => {
+    const [{ registration }] = listQaRunnerCliContributions.mock.results[0]?.value;
+    expect(registration.register).toHaveBeenCalledTimes(1);
+  });

-    expect(runQaMatrixCommand).toHaveBeenCalledWith({
-      repoRoot: "/tmp/openclaw-repo",
-      outputDir: ".artifacts/qa/matrix",
-      providerMode: "mock-openai",
-      primaryModel: "mock-openai/gpt-5.4",
-      alternateModel: "mock-openai/gpt-5.4-alt",
-      fastMode: true,
-      scenarioIds: ["matrix-thread-follow-up", "matrix-thread-isolation"],
-      sutAccountId: "sut-live",
-      credentialSource: undefined,
-      credentialRole: undefined,
-    });
+  it("keeps Telegram credential flags on the shared host CLI", () => {
+    const qa = program.commands.find((command) => command.name() === "qa");
+    const telegram = qa?.commands.find((command) => command.name() === "telegram");
+    const optionNames = telegram?.options.map((option) => option.long) ?? [];
+
+    expect(optionNames).toEqual(
+      expect.arrayContaining(["--credential-source", "--credential-role"]),
+    );
+  });
+
+  it("shows an install hint when a discovered runner plugin is unavailable", async () => {
+    listQaRunnerCliContributions.mockReset().mockReturnValue([createMissingQaRunnerContribution()]);
+    const missingProgram = new Command();
+    registerQaLabCli(missingProgram);
+
+    await expect(
+      missingProgram.parseAsync(["node", "openclaw", "qa", TEST_QA_RUNNER.commandName]),
+    ).rejects.toThrow(`openclaw plugins install ${TEST_QA_RUNNER.npmSpec}`);
+  });
+
+  it("shows an enable hint when a discovered runner plugin is installed but blocked", async () => {
+    listQaRunnerCliContributions.mockReset().mockReturnValue([createBlockedQaRunnerContribution()]);
+    const blockedProgram = new Command();
+    registerQaLabCli(blockedProgram);
+
+    await expect(
+      blockedProgram.parseAsync(["node", "openclaw", "qa", TEST_QA_RUNNER.commandName]),
+    ).rejects.toThrow(`Enable or allow plugin "${TEST_QA_RUNNER.pluginId}"`);
+  });
+
+  it("rejects discovered runners that collide with built-in qa subcommands", () => {
+    listQaRunnerCliContributions
+      .mockReset()
+      .mockReturnValue([createConflictingQaRunnerContribution("manual")]);
+
+    expect(() => registerQaLabCli(new Command())).toThrow(
+      'QA runner command "manual" conflicts with an existing qa subcommand',
+    );
  });

  it("routes telegram CLI defaults into the lane runtime", async () => {
--- a/extensions/qa-lab/src/cli.ts
+++ b/extensions/qa-lab/src/cli.ts
@@ -1,6 +1,6 @@
 import type { Command } from "commander";
 import { collectString } from "./cli-options.js";
-import { LIVE_TRANSPORT_QA_CLI_REGISTRATIONS } from "./live-transports/cli.js";
+import { listLiveTransportQaCliRegistrations } from "./live-transports/cli.js";
 import type { QaProviderModeInput } from "./run-config.js";
 import { hasQaScenarioPack } from "./scenario-catalog.js";

@@ -183,6 +183,12 @@ export function isQaLabCliAvailable(): boolean {
  return hasQaScenarioPack();
 }

+function assertNoQaSubcommandCollision(qa: Command, commandName: string) {
+  if (qa.commands.some((command) => command.name() === commandName)) {
+    throw new Error(`QA runner command "${commandName}" conflicts with an existing qa subcommand`);
+  }
+}
+
 export function registerQaLabCli(program: Command) {
  const qa = program
    .command("qa")
@@ -284,10 +290,6 @@ export function registerQaLabCli(program: Command) {
      },
    );

-  for (const lane of LIVE_TRANSPORT_QA_CLI_REGISTRATIONS) {
-    lane.register(qa);
-  }
-
  qa.command("character-eval")
    .description("Run the character QA scenario across live models and write a judged report")
    .option("--repo-root <path>", "Repository root to target when running from a neutral cwd")
@@ -579,4 +581,9 @@ export function registerQaLabCli(program: Command) {
    .action(async (opts: { host?: string; port?: number }) => {
      await runQaMockOpenAi(opts);
    });
+
+  for (const lane of listLiveTransportQaCliRegistrations()) {
+    assertNoQaSubcommandCollision(qa, lane.commandName);
+    lane.register(qa);
+  }
 }
--- a/extensions/qa-lab/src/live-transports/cli.ts
+++ b/extensions/qa-lab/src/live-transports/cli.ts
@@ -1,8 +1,78 @@
-import { matrixQaCliRegistration } from "./matrix/cli.js";
+import { listQaRunnerCliContributions } from "openclaw/plugin-sdk/qa-runner-runtime";
 import type { LiveTransportQaCliRegistration } from "./shared/live-transport-cli.js";
 import { telegramQaCliRegistration } from "./telegram/cli.js";

+function createMissingQaRunnerCliRegistration(params: {
+  commandName: string;
+  description: string;
+  npmSpec: string;
+}): LiveTransportQaCliRegistration {
+  return {
+    commandName: params.commandName,
+    register(qa) {
+      qa.command(params.commandName)
+        .description(params.description)
+        .action(() => {
+          throw new Error(
+            `QA runner "${params.commandName}" not installed. Install it with "openclaw plugins install ${params.npmSpec}".`,
+          );
+        });
+    },
+  };
+}
+
+function createBlockedQaRunnerCliRegistration(params: {
+  commandName: string;
+  description?: string;
+  pluginId: string;
+}): LiveTransportQaCliRegistration {
+  return {
+    commandName: params.commandName,
+    register(qa) {
+      qa.command(params.commandName)
+        .description(params.description ?? `Run the ${params.commandName} live QA lane`)
+        .action(() => {
+          throw new Error(
+            `QA runner "${params.commandName}" is installed but not active. Enable or allow plugin "${params.pluginId}" in your OpenClaw config, then try again.`,
+          );
+        });
+    },
+  };
+}
+
+function createQaRunnerCliRegistration(
+  runner: ReturnType<typeof listQaRunnerCliContributions>[number],
+): LiveTransportQaCliRegistration {
+  if (runner.status === "available") {
+    return runner.registration;
+  }
+  if (runner.status === "blocked") {
+    return createBlockedQaRunnerCliRegistration({
+      commandName: runner.commandName,
+      description: runner.description,
+      pluginId: runner.pluginId,
+    });
+  }
+  return createMissingQaRunnerCliRegistration({
+    commandName: runner.commandName,
+    description:
+      runner.description ??
+      `Run the ${runner.commandName} live QA lane (install ${runner.npmSpec} first)`,
+    npmSpec: runner.npmSpec,
+  });
+}
+
 export const LIVE_TRANSPORT_QA_CLI_REGISTRATIONS: readonly LiveTransportQaCliRegistration[] = [
  telegramQaCliRegistration,
-  matrixQaCliRegistration,
 ];
+
+export function listLiveTransportQaCliRegistrations(): readonly LiveTransportQaCliRegistration[] {
+  const liveRegistrations = [...LIVE_TRANSPORT_QA_CLI_REGISTRATIONS];
+  const discoveredRunners = listQaRunnerCliContributions();
+
+  for (const runner of discoveredRunners) {
+    liveRegistrations.push(createQaRunnerCliRegistration(runner));
+  }
+
+  return liveRegistrations;
+}
--- a/extensions/qa-lab/src/live-transports/shared/live-transport-cli.ts
+++ b/extensions/qa-lab/src/live-transports/shared/live-transport-cli.ts
@@ -33,6 +33,11 @@ export type LiveTransportQaCliRegistration = {
  register(qa: Command): void;
 };

+export type LiveTransportQaCredentialCliOptions = {
+  sourceDescription?: string;
+  roleDescription?: string;
+};
+
 export function createLazyCliRuntimeLoader<T>(load: () => Promise<T>) {
  let promise: Promise<T> | null = null;
  return async () => {
@@ -61,13 +66,14 @@ export function mapLiveTransportQaCommanderOptions(
 export function registerLiveTransportQaCli(params: {
  qa: Command;
  commandName: string;
+  credentialOptions?: LiveTransportQaCredentialCliOptions;
  description: string;
  outputDirHelp: string;
  scenarioHelp: string;
  sutAccountHelp: string;
  run: (opts: LiveTransportQaCommandOptions) => Promise<void>;
 }) {
-  params.qa
+  const command = params.qa
    .command(params.commandName)
    .description(params.description)
    .option("--repo-root <path>", "Repository root to target when running from a neutral cwd")
@@ -81,22 +87,27 @@ export function registerLiveTransportQaCli(params: {
    .option("--alt-model <ref>", "Alternate provider/model ref")
    .option("--scenario <id>", params.scenarioHelp, collectString, [])
    .option("--fast", "Enable provider fast mode where supported", false)
-    .option("--sut-account <id>", params.sutAccountHelp, "sut")
-    .option(
+    .option("--sut-account <id>", params.sutAccountHelp, "sut");
+
+  if (params.credentialOptions) {
+    command.option(
      "--credential-source <source>",
-      "Credential source for live lanes: env or convex (default: env)",
-    )
-    .option(
-      "--credential-role <role>",
-      "Credential role for convex auth: maintainer or ci (default: maintainer)",
-    )
-    .action(async (opts: LiveTransportQaCommanderOptions) => {
-      await params.run(mapLiveTransportQaCommanderOptions(opts));
-    });
+      params.credentialOptions.sourceDescription ??
+        "Credential source for live lanes: env or convex (default: env)",
+    );
+    if (params.credentialOptions.roleDescription) {
+      command.option("--credential-role <role>", params.credentialOptions.roleDescription);
+    }
+  }
+
+  command.action(async (opts: LiveTransportQaCommanderOptions) => {
+    await params.run(mapLiveTransportQaCommanderOptions(opts));
+  });
 }

 export function createLiveTransportQaCliRegistration(params: {
  commandName: string;
+  credentialOptions?: LiveTransportQaCredentialCliOptions;
  description: string;
  outputDirHelp: string;
  scenarioHelp: string;
@@ -109,6 +120,7 @@ export function createLiveTransportQaCliRegistration(params: {
      registerLiveTransportQaCli({
        qa,
        commandName: params.commandName,
+        credentialOptions: params.credentialOptions,
        description: params.description,
        outputDirHelp: params.outputDirHelp,
        scenarioHelp: params.scenarioHelp,
--- a/extensions/qa-lab/src/live-transports/telegram/cli.ts
+++ b/extensions/qa-lab/src/live-transports/telegram/cli.ts
@@ -20,6 +20,10 @@ async function runQaTelegram(opts: LiveTransportQaCommandOptions) {
 export const telegramQaCliRegistration: LiveTransportQaCliRegistration =
  createLiveTransportQaCliRegistration({
    commandName: "telegram",
+    credentialOptions: {
+      sourceDescription: "Credential source for Telegram QA: env or convex (default: env)",
+      roleDescription: "Credential role for convex auth: maintainer or ci (default: maintainer)",
+    },
    description: "Run the manual Telegram live QA lane against a private bot-to-bot group harness",
    outputDirHelp: "Telegram QA artifact directory",
    scenarioHelp: "Run only the named Telegram QA scenario (repeatable)",
--- a/extensions/qa-lab/src/runtime-api.ts
+++ b/extensions/qa-lab/src/runtime-api.ts
@@ -3,6 +3,7 @@ export type { OpenClawConfig } from "openclaw/plugin-sdk/config-runtime";
 export { definePluginEntry } from "openclaw/plugin-sdk/plugin-entry";
 export { callGatewayFromCli } from "openclaw/plugin-sdk/browser-node-runtime";
 export type { PluginRuntime } from "openclaw/plugin-sdk/runtime-store";
+export { defaultQaRuntimeModelForMode } from "./model-selection.runtime.js";
 export {
  buildQaTarget,
  createQaBusThread,
--- a/extensions/qa-lab/src/self-check.ts
+++ b/extensions/qa-lab/src/self-check.ts
@@ -81,7 +81,7 @@ export async function runQaSelfCheckAgainstState(params: {
    timeline,
    notes: params.notes ?? [
      "Vertical slice: qa-channel + qa-lab bus + private debugger surface.",
-      "Docker orchestration, matrix runs, and auto-fix loops remain follow-up work.",
+      "Docker orchestration, additional QA runners, and auto-fix loops remain follow-up work.",
    ],
  });

--- a/extensions/qa-matrix/cli.runtime.ts
+++ b/extensions/qa-matrix/cli.runtime.ts
@@ -0,0 +1 @@
+export { runQaMatrixCommand } from "./src/cli.runtime.js";
--- a/extensions/qa-matrix/cli.ts
+++ b/extensions/qa-matrix/cli.ts
@@ -0,0 +1 @@
+export { qaRunnerCliRegistrations, registerMatrixQaCli } from "./src/cli.js";
--- a/extensions/qa-matrix/index.ts
+++ b/extensions/qa-matrix/index.ts
@@ -0,0 +1,8 @@
+import { definePluginEntry } from "openclaw/plugin-sdk/plugin-entry";
+
+export default definePluginEntry({
+  id: "qa-matrix",
+  name: "QA Matrix",
+  description: "Matrix QA transport runner and substrate",
+  register() {},
+});
--- a/extensions/qa-matrix/openclaw.plugin.json
+++ b/extensions/qa-matrix/openclaw.plugin.json
@@ -0,0 +1,16 @@
+{
+  "id": "qa-matrix",
+  "name": "QA Matrix",
+  "description": "Matrix QA transport runner and substrate",
+  "qaRunners": [
+    {
+      "commandName": "matrix",
+      "description": "Run the Docker-backed Matrix live QA lane against a disposable homeserver"
+    }
+  ],
+  "configSchema": {
+    "type": "object",
+    "additionalProperties": false,
+    "properties": {}
+  }
+}
--- a/extensions/qa-matrix/package.json
+++ b/extensions/qa-matrix/package.json
@@ -0,0 +1,34 @@
+{
+  "name": "@openclaw/qa-matrix",
+  "version": "2026.4.12",
+  "description": "OpenClaw Matrix QA runner plugin",
+  "type": "module",
+  "devDependencies": {
+    "@openclaw/plugin-sdk": "workspace:*",
+    "openclaw": "workspace:*"
+  },
+  "peerDependencies": {
+    "openclaw": ">=2026.4.12"
+  },
+  "peerDependenciesMeta": {
+    "openclaw": {
+      "optional": true
+    }
+  },
+  "openclaw": {
+    "extensions": [
+      "./index.ts"
+    ],
+    "install": {
+      "npmSpec": "@openclaw/qa-matrix",
+      "defaultChoice": "npm",
+      "minHostVersion": ">=2026.4.12"
+    },
+    "compat": {
+      "pluginApi": ">=2026.4.12"
+    },
+    "build": {
+      "openclawVersion": "2026.4.12"
+    }
+  }
+}
--- a/extensions/qa-matrix/runtime-api.ts
+++ b/extensions/qa-matrix/runtime-api.ts
@@ -0,0 +1 @@
+export { qaRunnerCliRegistrations } from "./cli.js";
--- a/extensions/qa-matrix/runtime.ts
+++ b/extensions/qa-matrix/runtime.ts
@@ -0,0 +1 @@
+export { runMatrixQaLive } from "./src/runners/contract/runtime.js";
--- a/extensions/qa-matrix/src/cli-options.ts
+++ b/extensions/qa-matrix/src/cli-options.ts
@@ -0,0 +1,4 @@
+export function collectString(value: string, previous: string[]) {
+  const trimmed = value.trim();
+  return trimmed ? [...previous, trimmed] : previous;
+}
--- a/extensions/qa-matrix/src/cli-paths.ts
+++ b/extensions/qa-matrix/src/cli-paths.ts
@@ -0,0 +1,16 @@
+import path from "node:path";
+
+export function resolveRepoRelativeOutputDir(repoRoot: string, outputDir?: string) {
+  if (!outputDir) {
+    return undefined;
+  }
+  if (path.isAbsolute(outputDir)) {
+    throw new Error("--output-dir must be a relative path inside the repo root.");
+  }
+  const resolved = path.resolve(repoRoot, outputDir);
+  const relative = path.relative(repoRoot, resolved);
+  if (relative.startsWith("..") || path.isAbsolute(relative)) {
+    throw new Error("--output-dir must stay within the repo root.");
+  }
+  return resolved;
+}
--- a/extensions/qa-lab/src/live-transports/matrix/cli.runtime.test.ts
+++ b/extensions/qa-lab/src/live-transports/matrix/cli.runtime.test.ts
@@ -2,7 +2,7 @@ import { describe, expect, it, vi } from "vitest";

 const runMatrixQaLive = vi.hoisted(() => vi.fn());

-vi.mock("./matrix-live.runtime.js", () => ({
+vi.mock("./runners/contract/runtime.js", () => ({
  runMatrixQaLive,
 }));

--- a/extensions/qa-lab/src/live-transports/matrix/cli.runtime.ts
+++ b/extensions/qa-lab/src/live-transports/matrix/cli.runtime.ts
@@ -1,9 +1,9 @@
-import type { LiveTransportQaCommandOptions } from "../shared/live-transport-cli.js";
+import { runMatrixQaLive } from "./runners/contract/runtime.js";
+import type { LiveTransportQaCommandOptions } from "./shared/live-transport-cli.js";
 import {
  printLiveTransportQaArtifacts,
  resolveLiveTransportQaRunOptions,
-} from "../shared/live-transport-cli.runtime.js";
-import { runMatrixQaLive } from "./matrix-live.runtime.js";
+} from "./shared/live-transport-cli.runtime.js";

 export async function runQaMatrixCommand(opts: LiveTransportQaCommandOptions) {
  const runOptions = resolveLiveTransportQaRunOptions(opts);
--- a/extensions/qa-matrix/src/cli.test.ts
+++ b/extensions/qa-matrix/src/cli.test.ts
@@ -0,0 +1,29 @@
+import { Command } from "commander";
+import { describe, expect, it } from "vitest";
+import { matrixQaCliRegistration } from "./cli.js";
+
+describe("matrix qa cli registration", () => {
+  it("keeps disposable Matrix lane flags focused", () => {
+    const qa = new Command();
+
+    matrixQaCliRegistration.register(qa);
+
+    const matrix = qa.commands.find((command) => command.name() === "matrix");
+    const optionNames = matrix?.options.map((option) => option.long) ?? [];
+
+    expect(optionNames).toEqual(
+      expect.arrayContaining([
+        "--repo-root",
+        "--output-dir",
+        "--provider-mode",
+        "--model",
+        "--alt-model",
+        "--scenario",
+        "--fast",
+        "--sut-account",
+      ]),
+    );
+    expect(optionNames).not.toContain("--credential-source");
+    expect(optionNames).not.toContain("--credential-role");
+  });
+});
--- a/extensions/qa-lab/src/live-transports/matrix/cli.ts
+++ b/extensions/qa-lab/src/live-transports/matrix/cli.ts
@@ -4,7 +4,7 @@ import {
  createLiveTransportQaCliRegistration,
  type LiveTransportQaCliRegistration,
  type LiveTransportQaCommandOptions,
-} from "../shared/live-transport-cli.js";
+} from "./shared/live-transport-cli.js";

 type MatrixQaCliRuntime = typeof import("./cli.runtime.js");

@@ -27,6 +27,8 @@ export const matrixQaCliRegistration: LiveTransportQaCliRegistration =
    run: runQaMatrix,
  });

+export const qaRunnerCliRegistrations = [matrixQaCliRegistration] as const;
+
 export function registerMatrixQaCli(qa: Command) {
  matrixQaCliRegistration.register(qa);
 }
--- a/extensions/qa-matrix/src/docker-runtime.ts
+++ b/extensions/qa-matrix/src/docker-runtime.ts
@@ -0,0 +1,274 @@
+import { createServer } from "node:net";
+import { runExec } from "openclaw/plugin-sdk/process-runtime";
+import { fetchWithSsrFGuard } from "openclaw/plugin-sdk/ssrf-runtime";
+
+export type RunCommand = (
+  command: string,
+  args: string[],
+  cwd: string,
+) => Promise<{ stdout: string; stderr: string }>;
+
+export type FetchLike = (input: string) => Promise<{ ok: boolean }>;
+
+export async function fetchHealthUrl(url: string): Promise<{ ok: boolean }> {
+  const { response, release } = await fetchWithSsrFGuard({
+    url,
+    init: {
+      signal: AbortSignal.timeout(2_000),
+    },
+    policy: { allowPrivateNetwork: true },
+    auditContext: "qa-matrix-docker-health-check",
+  });
+  try {
+    return { ok: response.ok };
+  } finally {
+    await release();
+  }
+}
+
+export function describeError(error: unknown) {
+  if (error instanceof Error) {
+    return error.message;
+  }
+  if (typeof error === "string") {
+    return error;
+  }
+  return JSON.stringify(error);
+}
+
+async function isPortFree(port: number) {
+  return await new Promise<boolean>((resolve) => {
+    const server = createServer();
+    server.once("error", () => resolve(false));
+    server.listen(port, "127.0.0.1", () => {
+      server.close(() => resolve(true));
+    });
+  });
+}
+
+async function findFreePort() {
+  return await new Promise<number>((resolve, reject) => {
+    const server = createServer();
+    server.once("error", reject);
+    server.listen(0, () => {
+      const address = server.address();
+      if (!address || typeof address === "string") {
+        server.close();
+        reject(new Error("failed to find free port"));
+        return;
+      }
+      server.close((error) => {
+        if (error) {
+          reject(error);
+          return;
+        }
+        resolve(address.port);
+      });
+    });
+  });
+}
+
+export async function resolveHostPort(preferredPort: number, pinned: boolean) {
+  if (pinned || (await isPortFree(preferredPort))) {
+    return preferredPort;
+  }
+  return await findFreePort();
+}
+
+function trimCommandOutput(output: string) {
+  const trimmed = output.trim();
+  if (!trimmed) {
+    return "";
+  }
+  const lines = trimmed.split("\n");
+  return lines.length <= 120 ? trimmed : lines.slice(-120).join("\n");
+}
+
+export async function execCommand(command: string, args: string[], cwd: string) {
+  try {
+    return await runExec(command, args, { cwd, maxBuffer: 10 * 1024 * 1024 });
+  } catch (error) {
+    const failedProcess = error as Error & { stdout?: string; stderr?: string };
+    const renderedStdout = trimCommandOutput(failedProcess.stdout ?? "");
+    const renderedStderr = trimCommandOutput(failedProcess.stderr ?? "");
+    throw new Error(
+      [
+        `Command failed: ${[command, ...args].join(" ")}`,
+        renderedStderr ? `stderr:\n${renderedStderr}` : "",
+        renderedStdout ? `stdout:\n${renderedStdout}` : "",
+      ]
+        .filter(Boolean)
+        .join("\n\n"),
+      { cause: error },
+    );
+  }
+}
+
+export async function waitForHealth(
+  url: string,
+  deps: {
+    label?: string;
+    composeFile?: string;
+    fetchImpl: FetchLike;
+    sleepImpl: (ms: number) => Promise<unknown>;
+    timeoutMs?: number;
+    pollMs?: number;
+  },
+) {
+  const timeoutMs = deps.timeoutMs ?? 360_000;
+  const pollMs = deps.pollMs ?? 1_000;
+  const startMs = Date.now();
+  const deadline = startMs + timeoutMs;
+  let lastError: unknown = null;
+
+  while (Date.now() < deadline) {
+    try {
+      const response = await deps.fetchImpl(url);
+      if (response.ok) {
+        return;
+      }
+      lastError = new Error(`Health check returned non-OK for ${url}`);
+    } catch (error) {
+      lastError = error;
+    }
+    await deps.sleepImpl(pollMs);
+  }
+
+  const elapsedSec = Math.round((Date.now() - startMs) / 1000);
+  const service = deps.label ?? url;
+  const lines = [
+    `${service} did not become healthy within ${elapsedSec}s (limit ${Math.round(timeoutMs / 1000)}s).`,
+    lastError ? `Last error: ${describeError(lastError)}` : "",
+    `Hint: check container logs with \`docker compose -f ${deps.composeFile ?? "<compose-file>"} logs\` and verify the port is not already in use.`,
+  ];
+  throw new Error(lines.filter(Boolean).join("\n"));
+}
+
+async function isHealthy(url: string, fetchImpl: FetchLike) {
+  try {
+    const response = await fetchImpl(url);
+    return response.ok;
+  } catch {
+    return false;
+  }
+}
+
+function normalizeDockerServiceStatus(row?: { Health?: string; State?: string }) {
+  const health = row?.Health?.trim();
+  if (health) {
+    return health;
+  }
+  const state = row?.State?.trim();
+  if (state) {
+    return state;
+  }
+  return "unknown";
+}
+
+function parseDockerComposePsRows(stdout: string) {
+  const trimmed = stdout.trim();
+  if (!trimmed) {
+    return [] as Array<{ Health?: string; State?: string }>;
+  }
+
+  try {
+    const parsed = JSON.parse(trimmed) as
+      | Array<{ Health?: string; State?: string }>
+      | { Health?: string; State?: string };
+    if (Array.isArray(parsed)) {
+      return parsed;
+    }
+    return [parsed];
+  } catch {
+    return trimmed
+      .split("\n")
+      .map((line) => line.trim())
+      .filter(Boolean)
+      .map((line) => JSON.parse(line) as { Health?: string; State?: string });
+  }
+}
+
+export async function waitForDockerServiceHealth(
+  service: string,
+  composeFile: string,
+  repoRoot: string,
+  runCommand: RunCommand,
+  sleepImpl: (ms: number) => Promise<unknown>,
+  timeoutMs = 360_000,
+  pollMs = 1_000,
+) {
+  const startMs = Date.now();
+  const deadline = startMs + timeoutMs;
+  let lastStatus = "unknown";
+
+  while (Date.now() < deadline) {
+    try {
+      const { stdout } = await runCommand(
+        "docker",
+        ["compose", "-f", composeFile, "ps", "--format", "json", service],
+        repoRoot,
+      );
+      const rows = parseDockerComposePsRows(stdout);
+      const row = rows[0];
+      lastStatus = normalizeDockerServiceStatus(row);
+      if (lastStatus === "healthy" || lastStatus === "running") {
+        return;
+      }
+    } catch (error) {
+      lastStatus = describeError(error);
+    }
+    await sleepImpl(pollMs);
+  }
+
+  const elapsedSec = Math.round((Date.now() - startMs) / 1000);
+  throw new Error(
+    [
+      `${service} did not become healthy within ${elapsedSec}s (limit ${Math.round(timeoutMs / 1000)}s).`,
+      `Last status: ${lastStatus}`,
+      `Hint: check container logs with \`docker compose -f ${composeFile} logs ${service}\`.`,
+    ].join("\n"),
+  );
+}
+
+export async function resolveComposeServiceUrl(
+  service: string,
+  port: number,
+  composeFile: string,
+  repoRoot: string,
+  runCommand: RunCommand,
+  fetchImpl?: FetchLike,
+) {
+  const { stdout: containerStdout } = await runCommand(
+    "docker",
+    ["compose", "-f", composeFile, "ps", "-q", service],
+    repoRoot,
+  );
+  const containerId = containerStdout.trim();
+  if (!containerId) {
+    return null;
+  }
+  const { stdout: ipStdout } = await runCommand(
+    "docker",
+    [
+      "inspect",
+      "--format",
+      "{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}",
+      containerId,
+    ],
+    repoRoot,
+  );
+  const ip = ipStdout.trim();
+  if (!ip) {
+    return null;
+  }
+  const baseUrl = `http://${ip}:${port}/`;
+  if (!fetchImpl) {
+    return baseUrl;
+  }
+  return (await isHealthy(`${baseUrl}healthz`, fetchImpl)) ? baseUrl : null;
+}
+
+export const __testing = {
+  fetchHealthUrl,
+  normalizeDockerServiceStatus,
+};
--- a/extensions/qa-matrix/src/report.ts
+++ b/extensions/qa-matrix/src/report.ts
@@ -0,0 +1,100 @@
+export type QaReportCheck = {
+  name: string;
+  status: "pass" | "fail" | "skip";
+  details?: string;
+};
+
+export type QaReportScenario = {
+  name: string;
+  status: "pass" | "fail" | "skip";
+  details?: string;
+  steps?: QaReportCheck[];
+};
+
+function pushDetailsBlock(lines: string[], label: string, details: string, indent = "") {
+  if (!details.includes("\n")) {
+    lines.push(`${indent}- ${label}: ${details}`);
+    return;
+  }
+  lines.push(`${indent}- ${label}:`);
+  lines.push("", "```text", details, "```");
+}
+
+export function renderQaMarkdownReport(params: {
+  title: string;
+  startedAt: Date;
+  finishedAt: Date;
+  checks?: QaReportCheck[];
+  scenarios?: QaReportScenario[];
+  timeline?: string[];
+  notes?: string[];
+}) {
+  const checks = params.checks ?? [];
+  const scenarios = params.scenarios ?? [];
+  const passCount =
+    checks.filter((check) => check.status === "pass").length +
+    scenarios.filter((scenario) => scenario.status === "pass").length;
+  const failCount =
+    checks.filter((check) => check.status === "fail").length +
+    scenarios.filter((scenario) => scenario.status === "fail").length;
+
+  const lines = [
+    `# ${params.title}`,
+    "",
+    `- Started: ${params.startedAt.toISOString()}`,
+    `- Finished: ${params.finishedAt.toISOString()}`,
+    `- Duration ms: ${params.finishedAt.getTime() - params.startedAt.getTime()}`,
+    `- Passed: ${passCount}`,
+    `- Failed: ${failCount}`,
+    "",
+  ];
+
+  if (checks.length > 0) {
+    lines.push("## Checks", "");
+    for (const check of checks) {
+      lines.push(`- [${check.status === "pass" ? "x" : " "}] ${check.name}`);
+      if (check.details) {
+        pushDetailsBlock(lines, "Details", check.details, "  ");
+      }
+    }
+  }
+
+  if (scenarios.length > 0) {
+    lines.push("", "## Scenarios", "");
+    for (const scenario of scenarios) {
+      lines.push(`### ${scenario.name}`);
+      lines.push("");
+      lines.push(`- Status: ${scenario.status}`);
+      if (scenario.details) {
+        pushDetailsBlock(lines, "Details", scenario.details);
+      }
+      if (scenario.steps?.length) {
+        lines.push("- Steps:");
+        for (const step of scenario.steps) {
+          lines.push(`  - [${step.status === "pass" ? "x" : " "}] ${step.name}`);
+          if (step.details) {
+            pushDetailsBlock(lines, "Details", step.details, "    ");
+          }
+        }
+      }
+      lines.push("");
+    }
+  }
+
+  if (params.timeline && params.timeline.length > 0) {
+    lines.push("## Timeline", "");
+    for (const item of params.timeline) {
+      lines.push(`- ${item}`);
+    }
+  }
+
+  if (params.notes && params.notes.length > 0) {
+    lines.push("", "## Notes", "");
+    for (const note of params.notes) {
+      lines.push(`- ${note}`);
+    }
+  }
+
+  lines.push("");
+  return lines.join("\n");
+}
--- a/extensions/qa-matrix/src/run-config.ts
+++ b/extensions/qa-matrix/src/run-config.ts
@@ -0,0 +1,9 @@
+export type QaProviderMode = "mock-openai" | "live-frontier";
+export type QaProviderModeInput = QaProviderMode | "live-openai";
+
+export function normalizeQaProviderMode(input: unknown): QaProviderMode {
+  if (input === "mock-openai") {
+    return "mock-openai";
+  }
+  return "live-frontier";
+}
--- a/extensions/qa-matrix/src/runners/contract/model-selection.test.ts
+++ b/extensions/qa-matrix/src/runners/contract/model-selection.test.ts
@@ -0,0 +1,51 @@
+import { beforeEach, describe, expect, it, vi } from "vitest";
+
+const loadQaLabRuntimeModule = vi.hoisted(() => vi.fn());
+const defaultQaRuntimeModelForMode = vi.hoisted(() => vi.fn());
+
+vi.mock("openclaw/plugin-sdk/qa-lab-runtime", () => ({
+  loadQaLabRuntimeModule,
+}));
+
+describe("matrix qa model selection", () => {
+  beforeEach(() => {
+    defaultQaRuntimeModelForMode.mockReset().mockImplementation((mode, options) =>
+      options?.alternate ? `${mode}:alt` : `${mode}:primary`,
+    );
+    loadQaLabRuntimeModule.mockReset().mockReturnValue({
+      defaultQaRuntimeModelForMode,
+    });
+  });
+
+  it("delegates default model selection through qa-lab runtime defaults", async () => {
+    const { resolveMatrixQaModels } = await import("./model-selection.js");
+
+    expect(resolveMatrixQaModels({ providerMode: "live-openai" })).toEqual({
+      providerMode: "live-frontier",
+      primaryModel: "live-frontier:primary",
+      alternateModel: "live-frontier:alt",
+    });
+    expect(defaultQaRuntimeModelForMode).toHaveBeenNthCalledWith(1, "live-frontier");
+    expect(defaultQaRuntimeModelForMode).toHaveBeenNthCalledWith(2, "live-frontier", {
+      alternate: true,
+    });
+  });
+
+  it("preserves explicit model overrides", async () => {
+    const { resolveMatrixQaModels } = await import("./model-selection.js");
+
+    expect(
+      resolveMatrixQaModels({
+        providerMode: "mock-openai",
+        primaryModel: "custom-primary",
+        alternateModel: "custom-alt",
+      }),
+    ).toEqual({
+      providerMode: "mock-openai",
+      primaryModel: "custom-primary",
+      alternateModel: "custom-alt",
+    });
+    expect(loadQaLabRuntimeModule).not.toHaveBeenCalled();
+    expect(defaultQaRuntimeModelForMode).not.toHaveBeenCalled();
+  });
+});
--- a/extensions/qa-matrix/src/runners/contract/model-selection.ts
+++ b/extensions/qa-matrix/src/runners/contract/model-selection.ts
@@ -0,0 +1,33 @@
+import { loadQaLabRuntimeModule } from "openclaw/plugin-sdk/qa-lab-runtime";
+import { normalizeQaProviderMode, type QaProviderModeInput } from "../../run-config.js";
+
+export type ResolvedMatrixQaModels = {
+  providerMode: ReturnType<typeof normalizeQaProviderMode>;
+  primaryModel: string;
+  alternateModel: string;
+};
+
+export function resolveMatrixQaModels(params: {
+  providerMode?: QaProviderModeInput;
+  primaryModel?: string;
+  alternateModel?: string;
+}): ResolvedMatrixQaModels {
+  const providerMode = normalizeQaProviderMode(params.providerMode ?? "live-frontier");
+  const primaryModel = params.primaryModel?.trim();
+  const alternateModel = params.alternateModel?.trim();
+  if (primaryModel && alternateModel) {
+    return {
+      providerMode,
+      primaryModel,
+      alternateModel,
+    };
+  }
+
+  const qaLabRuntime = loadQaLabRuntimeModule();
+  return {
+    providerMode,
+    primaryModel: primaryModel || qaLabRuntime.defaultQaRuntimeModelForMode(providerMode),
+    alternateModel:
+      alternateModel || qaLabRuntime.defaultQaRuntimeModelForMode(providerMode, { alternate: true }),
+  };
+}
--- a/extensions/qa-lab/src/live-transports/matrix/matrix-live.runtime.test.ts
+++ b/extensions/qa-lab/src/live-transports/matrix/matrix-live.runtime.test.ts
@@ -1,6 +1,6 @@
 import type { OpenClawConfig } from "openclaw/plugin-sdk/config-runtime";
 import { afterEach, describe, expect, it, vi } from "vitest";
-import { __testing as liveTesting } from "./matrix-live.runtime.js";
+import { __testing as liveTesting } from "./runtime.js";

 afterEach(() => {
  vi.useRealTimers();
--- a/extensions/qa-lab/src/live-transports/matrix/matrix-live.runtime.ts
+++ b/extensions/qa-lab/src/live-transports/matrix/matrix-live.runtime.ts
@@ -4,22 +4,20 @@ import path from "node:path";
 import { setTimeout as sleep } from "node:timers/promises";
 import type { OpenClawConfig } from "openclaw/plugin-sdk/config-runtime";
 import { formatErrorMessage } from "openclaw/plugin-sdk/error-runtime";
-import { startQaGatewayChild } from "../../gateway-child.js";
+import { loadQaLabRuntimeModule } from "openclaw/plugin-sdk/qa-lab-runtime";
 import type { QaReportCheck } from "../../report.js";
 import { renderQaMarkdownReport } from "../../report.js";
+import { type QaProviderModeInput } from "../../run-config.js";
 import {
-  defaultQaModelForMode,
-  normalizeQaProviderMode,
-  type QaProviderModeInput,
-} from "../../run-config.js";
-import { startQaLiveLaneGateway } from "../shared/live-gateway.runtime.js";
-import { appendLiveLaneIssue, buildLiveLaneArtifactsError } from "../shared/live-lane-helpers.js";
+  appendLiveLaneIssue,
+  buildLiveLaneArtifactsError,
+} from "../../shared/live-lane-helpers.js";
 import {
  provisionMatrixQaRoom,
  type MatrixQaObservedEvent,
  type MatrixQaProvisionResult,
-} from "./matrix-driver-client.js";
-import { startMatrixQaHarness } from "./matrix-harness.runtime.js";
+} from "../../substrate/client.js";
+import { startMatrixQaHarness } from "../../substrate/harness.runtime.js";
 import {
  MATRIX_QA_SCENARIOS,
  buildMatrixReplyDetails,
@@ -28,7 +26,22 @@ import {
  runMatrixQaScenario,
  type MatrixQaCanaryArtifact,
  type MatrixQaScenarioArtifacts,
-} from "./matrix-live-scenarios.js";
+} from "./scenarios.js";
+import { resolveMatrixQaModels } from "./model-selection.js";
+
+type MatrixQaGatewayChild = {
+  call(
+    method: string,
+    params: Record<string, unknown>,
+    options?: { timeoutMs?: number },
+  ): Promise<unknown>;
+  restart(): Promise<void>;
+};
+
+type MatrixQaLiveLaneGatewayHarness = {
+  gateway: MatrixQaGatewayChild;
+  stop(): Promise<void>;
+};

 type MatrixQaScenarioResult = {
  artifacts?: MatrixQaScenarioArtifacts;
@@ -214,7 +227,7 @@ function isMatrixAccountReady(entry?: {
 }

 async function waitForMatrixChannelReady(
-  gateway: Awaited<ReturnType<typeof startQaGatewayChild>>,
+  gateway: MatrixQaGatewayChild,
  accountId: string,
  opts?: {
    pollMs?: number;
@@ -255,6 +268,27 @@ async function waitForMatrixChannelReady(
  throw new Error(`matrix account "${accountId}" did not become ready`);
 }

+async function startMatrixQaLiveLaneGateway(params: {
+  repoRoot: string;
+  transport: {
+    requiredPluginIds: readonly string[];
+    createGatewayConfig: (params: {
+      baseUrl: string;
+    }) => Pick<OpenClawConfig, "channels" | "messages">;
+  };
+  transportBaseUrl: string;
+  providerMode: "mock-openai" | "live-frontier";
+  primaryModel: string;
+  alternateModel: string;
+  fastMode?: boolean;
+  controlUiEnabled?: boolean;
+  mutateConfig?: (cfg: OpenClawConfig) => OpenClawConfig;
+}): Promise<MatrixQaLiveLaneGatewayHarness> {
+  return (await loadQaLabRuntimeModule().startQaLiveLaneGateway(
+    params,
+  )) as MatrixQaLiveLaneGatewayHarness;
+}
+
 export async function runMatrixQaLive(params: {
  fastMode?: boolean;
  outputDir?: string;
@@ -271,9 +305,11 @@ export async function runMatrixQaLive(params: {
    path.join(repoRoot, ".artifacts", "qa-e2e", `matrix-${Date.now().toString(36)}`);
  await fs.mkdir(outputDir, { recursive: true });

-  const providerMode = normalizeQaProviderMode(params.providerMode ?? "live-frontier");
-  const primaryModel = params.primaryModel?.trim() || defaultQaModelForMode(providerMode);
-  const alternateModel = params.alternateModel?.trim() || defaultQaModelForMode(providerMode, true);
+  const { providerMode, primaryModel, alternateModel } = resolveMatrixQaModels({
+    providerMode: params.providerMode,
+    primaryModel: params.primaryModel,
+    alternateModel: params.alternateModel,
+  });
  const sutAccountId = params.sutAccountId?.trim() || "sut";
  const scenarios = findMatrixQaScenarios(params.scenarioIds);
  const observedEvents: MatrixQaObservedEvent[] = [];
@@ -317,12 +353,12 @@ export async function runMatrixQaLive(params: {
  const scenarioResults: MatrixQaScenarioResult[] = [];
  const cleanupErrors: string[] = [];
  let canaryArtifact: MatrixQaCanaryArtifact | undefined;
-  let gatewayHarness: Awaited<ReturnType<typeof startQaLiveLaneGateway>> | null = null;
+  let gatewayHarness: MatrixQaLiveLaneGatewayHarness | null = null;
  let canaryFailed = false;
  const syncState: { driver?: string; observer?: string } = {};

  try {
-    gatewayHarness = await startQaLiveLaneGateway({
+    gatewayHarness = await startMatrixQaLiveLaneGateway({
      repoRoot,
      transport: {
        requiredPluginIds: [],
@@ -555,5 +591,6 @@ export const __testing = {
  buildMatrixQaConfig,
  buildObservedEventsArtifact,
  isMatrixAccountReady,
+  resolveMatrixQaModels,
  waitForMatrixChannelReady,
 };
--- a/extensions/qa-lab/src/live-transports/matrix/matrix-live-scenarios.test.ts
+++ b/extensions/qa-lab/src/live-transports/matrix/matrix-live-scenarios.test.ts
@@ -3,19 +3,19 @@ const { createMatrixQaClient } = vi.hoisted(() => ({
  createMatrixQaClient: vi.fn(),
 }));

-vi.mock("./matrix-driver-client.js", () => ({
+vi.mock("../../substrate/client.js", () => ({
  createMatrixQaClient,
 }));

 import {
  LIVE_TRANSPORT_BASELINE_STANDARD_SCENARIO_IDS,
  findMissingLiveTransportStandardScenarios,
-} from "../shared/live-transport-scenarios.js";
+} from "../../shared/live-transport-scenarios.js";
 import {
  __testing as scenarioTesting,
  MATRIX_QA_SCENARIOS,
  runMatrixQaScenario,
-} from "./matrix-live-scenarios.js";
+} from "./scenarios.js";

 describe("matrix live qa scenarios", () => {
  beforeEach(() => {
--- a/extensions/qa-lab/src/live-transports/matrix/matrix-live-scenarios.ts
+++ b/extensions/qa-lab/src/live-transports/matrix/matrix-live-scenarios.ts
@@ -3,8 +3,8 @@ import {
  collectLiveTransportStandardScenarioCoverage,
  selectLiveTransportScenarios,
  type LiveTransportScenarioDefinition,
-} from "../shared/live-transport-scenarios.js";
-import { createMatrixQaClient, type MatrixQaObservedEvent } from "./matrix-driver-client.js";
+} from "../../shared/live-transport-scenarios.js";
+import { createMatrixQaClient, type MatrixQaObservedEvent } from "../../substrate/client.js";

 export type MatrixQaScenarioId =
  | "matrix-thread-follow-up"
--- a/extensions/qa-matrix/src/runtime-api.test.ts
+++ b/extensions/qa-matrix/src/runtime-api.test.ts
@@ -0,0 +1,9 @@
+import { describe, expect, it } from "vitest";
+
+describe("matrix qa runtime api surface", () => {
+  it("keeps runner discovery lightweight", async () => {
+    const runtimeApi = await import("../runtime-api.js");
+
+    expect(Object.keys(runtimeApi).toSorted()).toEqual(["qaRunnerCliRegistrations"]);
+  });
+});
--- a/extensions/qa-matrix/src/shared/live-lane-helpers.ts
+++ b/extensions/qa-matrix/src/shared/live-lane-helpers.ts
@@ -0,0 +1,18 @@
+import { formatErrorMessage } from "openclaw/plugin-sdk/error-runtime";
+
+export function appendLiveLaneIssue(issues: string[], label: string, error: unknown) {
+  issues.push(`${label}: ${formatErrorMessage(error)}`);
+}
+
+export function buildLiveLaneArtifactsError(params: {
+  heading: string;
+  artifacts: Record<string, string>;
+  details?: string[];
+}) {
+  return [
+    params.heading,
+    ...(params.details ?? []),
+    "Artifacts:",
+    ...Object.entries(params.artifacts).map(([label, filePath]) => `- ${label}: ${filePath}`),
+  ].join("\n");
+}
--- a/extensions/qa-matrix/src/shared/live-transport-cli.runtime.ts
+++ b/extensions/qa-matrix/src/shared/live-transport-cli.runtime.ts
@@ -0,0 +1,40 @@
+import path from "node:path";
+import { resolveRepoRelativeOutputDir } from "../cli-paths.js";
+import type { QaProviderMode } from "../run-config.js";
+import { normalizeQaProviderMode } from "../run-config.js";
+import type { LiveTransportQaCommandOptions } from "./live-transport-cli.js";
+
+export function resolveLiveTransportQaRunOptions(
+  opts: LiveTransportQaCommandOptions,
+): LiveTransportQaCommandOptions & {
+  repoRoot: string;
+  providerMode: QaProviderMode;
+} {
+  return {
+    repoRoot: path.resolve(opts.repoRoot ?? process.cwd()),
+    outputDir: resolveRepoRelativeOutputDir(
+      path.resolve(opts.repoRoot ?? process.cwd()),
+      opts.outputDir,
+    ),
+    providerMode:
+      opts.providerMode === undefined
+        ? "live-frontier"
+        : normalizeQaProviderMode(opts.providerMode),
+    primaryModel: opts.primaryModel,
+    alternateModel: opts.alternateModel,
+    fastMode: opts.fastMode,
+    scenarioIds: opts.scenarioIds,
+    sutAccountId: opts.sutAccountId,
+    credentialSource: opts.credentialSource?.trim(),
+    credentialRole: opts.credentialRole?.trim(),
+  };
+}
+
+export function printLiveTransportQaArtifacts(
+  laneLabel: string,
+  artifacts: Record<string, string>,
+) {
+  for (const [label, filePath] of Object.entries(artifacts)) {
+    process.stdout.write(`${laneLabel} ${label}: ${filePath}\n`);
+  }
+}
--- a/extensions/qa-matrix/src/shared/live-transport-cli.ts
+++ b/extensions/qa-matrix/src/shared/live-transport-cli.ts
@@ -0,0 +1,132 @@
+import type { Command } from "commander";
+import { collectString } from "../cli-options.js";
+import type { QaProviderModeInput } from "../run-config.js";
+
+export type LiveTransportQaCommandOptions = {
+  repoRoot?: string;
+  outputDir?: string;
+  providerMode?: QaProviderModeInput;
+  primaryModel?: string;
+  alternateModel?: string;
+  fastMode?: boolean;
+  scenarioIds?: string[];
+  sutAccountId?: string;
+  credentialSource?: string;
+  credentialRole?: string;
+};
+
+type LiveTransportQaCommanderOptions = {
+  repoRoot?: string;
+  outputDir?: string;
+  providerMode?: QaProviderModeInput;
+  model?: string;
+  altModel?: string;
+  scenario?: string[];
+  fast?: boolean;
+  sutAccount?: string;
+  credentialSource?: string;
+  credentialRole?: string;
+};
+
+export type LiveTransportQaCliRegistration = {
+  commandName: string;
+  register(qa: Command): void;
+};
+
+export type LiveTransportQaCredentialCliOptions = {
+  sourceDescription?: string;
+  roleDescription?: string;
+};
+
+export function createLazyCliRuntimeLoader<T>(load: () => Promise<T>) {
+  let promise: Promise<T> | null = null;
+  return async () => {
+    promise ??= load();
+    return await promise;
+  };
+}
+
+export function mapLiveTransportQaCommanderOptions(
+  opts: LiveTransportQaCommanderOptions,
+): LiveTransportQaCommandOptions {
+  return {
+    repoRoot: opts.repoRoot,
+    outputDir: opts.outputDir,
+    providerMode: opts.providerMode,
+    primaryModel: opts.model,
+    alternateModel: opts.altModel,
+    fastMode: opts.fast,
+    scenarioIds: opts.scenario,
+    sutAccountId: opts.sutAccount,
+    credentialSource: opts.credentialSource,
+    credentialRole: opts.credentialRole,
+  };
+}
+
+export function registerLiveTransportQaCli(params: {
+  qa: Command;
+  commandName: string;
+  credentialOptions?: LiveTransportQaCredentialCliOptions;
+  description: string;
+  outputDirHelp: string;
+  scenarioHelp: string;
+  sutAccountHelp: string;
+  run: (opts: LiveTransportQaCommandOptions) => Promise<void>;
+}) {
+  const command = params.qa
+    .command(params.commandName)
+    .description(params.description)
+    .option("--repo-root <path>", "Repository root to target when running from a neutral cwd")
+    .option("--output-dir <path>", params.outputDirHelp)
+    .option(
+      "--provider-mode <mode>",
+      "Provider mode: mock-openai or live-frontier (legacy live-openai still works)",
+      "live-frontier",
+    )
+    .option("--model <ref>", "Primary provider/model ref")
+    .option("--alt-model <ref>", "Alternate provider/model ref")
+    .option("--scenario <id>", params.scenarioHelp, collectString, [])
+    .option("--fast", "Enable provider fast mode where supported", false)
+    .option("--sut-account <id>", params.sutAccountHelp, "sut");
+
+  if (params.credentialOptions) {
+    command.option(
+      "--credential-source <source>",
+      params.credentialOptions.sourceDescription ??
+        "Credential source for live lanes: env or convex (default: env)",
+    );
+    if (params.credentialOptions.roleDescription) {
+      command.option("--credential-role <role>", params.credentialOptions.roleDescription);
+    }
+  }
+
+  command.action(async (opts: LiveTransportQaCommanderOptions) => {
+    await params.run(mapLiveTransportQaCommanderOptions(opts));
+  });
+}
+
+export function createLiveTransportQaCliRegistration(params: {
+  commandName: string;
+  credentialOptions?: LiveTransportQaCredentialCliOptions;
+  description: string;
+  outputDirHelp: string;
+  scenarioHelp: string;
+  sutAccountHelp: string;
+  run: (opts: LiveTransportQaCommandOptions) => Promise<void>;
+}): LiveTransportQaCliRegistration {
+  return {
+    commandName: params.commandName,
+    register(qa: Command) {
+      registerLiveTransportQaCli({
+        qa,
+        commandName: params.commandName,
+        credentialOptions: params.credentialOptions,
+        description: params.description,
+        outputDirHelp: params.outputDirHelp,
+        scenarioHelp: params.scenarioHelp,
+        sutAccountHelp: params.sutAccountHelp,
+        run: params.run,
+      });
+    },
+  };
+}
--- a/extensions/qa-matrix/src/shared/live-transport-scenarios.ts
+++ b/extensions/qa-matrix/src/shared/live-transport-scenarios.ts
@@ -0,0 +1,149 @@
+export type LiveTransportStandardScenarioId =
+  | "canary"
+  | "mention-gating"
+  | "allowlist-block"
+  | "top-level-reply-shape"
+  | "restart-resume"
+  | "thread-follow-up"
+  | "thread-isolation"
+  | "reaction-observation"
+  | "help-command";
+
+export type LiveTransportScenarioDefinition<TId extends string = string> = {
+  id: TId;
+  standardId?: LiveTransportStandardScenarioId;
+  timeoutMs: number;
+  title: string;
+};
+
+export type LiveTransportStandardScenarioDefinition = {
+  description: string;
+  id: LiveTransportStandardScenarioId;
+  title: string;
+};
+
+export const LIVE_TRANSPORT_STANDARD_SCENARIOS: readonly LiveTransportStandardScenarioDefinition[] =
+  [
+    {
+      id: "canary",
+      title: "Transport canary",
+      description: "The lane can trigger one known-good reply on the real transport.",
+    },
+    {
+      id: "mention-gating",
+      title: "Mention gating",
+      description: "Messages without the required mention do not trigger a reply.",
+    },
+    {
+      id: "allowlist-block",
+      title: "Sender allowlist block",
+      description: "Non-allowlisted senders do not trigger a reply.",
+    },
+    {
+      id: "top-level-reply-shape",
+      title: "Top-level reply shape",
+      description: "Top-level replies stay top-level when the lane is configured that way.",
+    },
+    {
+      id: "restart-resume",
+      title: "Restart resume",
+      description: "The lane still responds after a gateway restart.",
+    },
+    {
+      id: "thread-follow-up",
+      title: "Thread follow-up",
+      description: "Threaded prompts receive threaded replies with the expected relation metadata.",
+    },
+    {
+      id: "thread-isolation",
+      title: "Thread isolation",
+      description: "Fresh top-level prompts stay out of prior threads.",
+    },
+    {
+      id: "reaction-observation",
+      title: "Reaction observation",
+      description: "Reaction events are observed and normalized correctly.",
+    },
+    {
+      id: "help-command",
+      title: "Help command",
+      description: "The transport-specific help command path replies successfully.",
+    },
+  ] as const;
+
+export const LIVE_TRANSPORT_BASELINE_STANDARD_SCENARIO_IDS: readonly LiveTransportStandardScenarioId[] =
+  [
+    "canary",
+    "mention-gating",
+    "allowlist-block",
+    "top-level-reply-shape",
+    "restart-resume",
+  ] as const;
+
+const LIVE_TRANSPORT_STANDARD_SCENARIO_ID_SET = new Set(
+  LIVE_TRANSPORT_STANDARD_SCENARIOS.map((scenario) => scenario.id),
+);
+
+function assertKnownStandardScenarioIds(ids: readonly LiveTransportStandardScenarioId[]) {
+  for (const id of ids) {
+    if (!LIVE_TRANSPORT_STANDARD_SCENARIO_ID_SET.has(id)) {
+      throw new Error(`unknown live transport standard scenario id: ${id}`);
+    }
+  }
+}
+
+export function selectLiveTransportScenarios<TDefinition extends { id: string }>(params: {
+  ids?: string[];
+  laneLabel: string;
+  scenarios: readonly TDefinition[];
+}) {
+  if (!params.ids || params.ids.length === 0) {
+    return [...params.scenarios];
+  }
+  const requested = new Set(params.ids);
+  const selected = params.scenarios.filter((scenario) => params.ids?.includes(scenario.id));
+  const missingIds = [...requested].filter(
+    (id) => !selected.some((scenario) => scenario.id === id),
+  );
+  if (missingIds.length > 0) {
+    throw new Error(`unknown ${params.laneLabel} QA scenario id(s): ${missingIds.join(", ")}`);
+  }
+  return selected;
+}
+
+export function collectLiveTransportStandardScenarioCoverage<TId extends string>(params: {
+  alwaysOnStandardScenarioIds?: readonly LiveTransportStandardScenarioId[];
+  scenarios: readonly LiveTransportScenarioDefinition<TId>[];
+}) {
+  const coverage: LiveTransportStandardScenarioId[] = [];
+  const seen = new Set<LiveTransportStandardScenarioId>();
+  const append = (id: LiveTransportStandardScenarioId | undefined) => {
+    if (!id || seen.has(id)) {
+      return;
+    }
+    seen.add(id);
+    coverage.push(id);
+  };
+
+  assertKnownStandardScenarioIds(params.alwaysOnStandardScenarioIds ?? []);
+  for (const id of params.alwaysOnStandardScenarioIds ?? []) {
+    append(id);
+  }
+  for (const scenario of params.scenarios) {
+    if (scenario.standardId) {
+      assertKnownStandardScenarioIds([scenario.standardId]);
+    }
+    append(scenario.standardId);
+  }
+  return coverage;
+}
+
+export function findMissingLiveTransportStandardScenarios(params: {
+  coveredStandardScenarioIds: readonly LiveTransportStandardScenarioId[];
+  expectedStandardScenarioIds: readonly LiveTransportStandardScenarioId[];
+}) {
+  assertKnownStandardScenarioIds(params.coveredStandardScenarioIds);
+  assertKnownStandardScenarioIds(params.expectedStandardScenarioIds);
+  const covered = new Set(params.coveredStandardScenarioIds);
+  return params.expectedStandardScenarioIds.filter((id) => !covered.has(id));
+}
--- a/extensions/qa-lab/src/live-transports/matrix/matrix-driver-client.test.ts
+++ b/extensions/qa-lab/src/live-transports/matrix/matrix-driver-client.test.ts
@@ -4,7 +4,7 @@ import {
  createMatrixQaClient,
  provisionMatrixQaRoom,
  type MatrixQaObservedEvent,
-} from "./matrix-driver-client.js";
+} from "./client.js";

 function resolveRequestUrl(input: RequestInfo | URL) {
  if (typeof input === "string") {
--- a/extensions/qa-lab/src/live-transports/matrix/matrix-driver-client.ts
+++ b/extensions/qa-lab/src/live-transports/matrix/matrix-driver-client.ts
--- a/extensions/qa-lab/src/live-transports/matrix/matrix-harness.runtime.test.ts
+++ b/extensions/qa-lab/src/live-transports/matrix/matrix-harness.runtime.test.ts
@@ -2,11 +2,7 @@ import { mkdtemp, readFile, rm } from "node:fs/promises";
 import os from "node:os";
 import path from "node:path";
 import { describe, expect, it, vi } from "vitest";
-import {
-  __testing,
-  startMatrixQaHarness,
-  writeMatrixQaHarnessFiles,
-} from "./matrix-harness.runtime.js";
+import { __testing, startMatrixQaHarness, writeMatrixQaHarnessFiles } from "./harness.runtime.js";

 describe("matrix harness runtime", () => {
  it("writes a pinned Tuwunel compose file and redacted manifest", async () => {
--- a/extensions/qa-lab/src/live-transports/matrix/matrix-harness.runtime.ts
+++ b/extensions/qa-lab/src/live-transports/matrix/matrix-harness.runtime.ts
@@ -11,7 +11,7 @@ import {
  waitForHealth,
  type FetchLike,
  type RunCommand,
-} from "../../docker-runtime.js";
+} from "../docker-runtime.js";

 const MATRIX_QA_DEFAULT_IMAGE = "ghcr.io/matrix-construct/tuwunel:v1.5.1";
 const MATRIX_QA_DEFAULT_SERVER_NAME = "matrix-qa.test";
				`@@ -0,0 +1 @@`
				`export { runQaMatrixCommand } from "./src/cli.runtime.js";`
				`@@ -0,0 +1 @@`
				`export { qaRunnerCliRegistrations, registerMatrixQaCli } from "./src/cli.js";`
				`@@ -0,0 +1 @@`
				`export { qaRunnerCliRegistrations } from "./cli.js";`
				`@@ -0,0 +1 @@`
				`export { runMatrixQaLive } from "./src/runners/contract/runtime.js";`