From d5b326523f82ca910ed73ef5389e884e71676e3d Mon Sep 17 00:00:00 2001
From: Josh Avant <830519+joshavant@users.noreply.github.com>
Date: Sun, 19 Apr 2026 21:13:27 -0500
Subject: [PATCH] qa-lab: make live lanes CI-ready for v1 E2E automation
 (#69122)

* qa-lab: harden CI defaults and failure semantics for live lanes

* qa-lab: add unit tests for suite progress logging defaults

* qa-lab: cover malformed multipass summary edge cases

* qa-lab: share suite summary failure counting helper

* qa-lab: test allow-failures parse wiring and sanitize progress ids

* fix: note qa CI live-lane defaults in changelog (#69122) (thanks @joshavant)
---
 CHANGELOG.md                                  |   1 +
 docs/concepts/qa-e2e-automation.md            |  10 +-
 docs/help/testing.md                          |  12 +-
 extensions/qa-lab/src/cli.runtime.test.ts     | 276 ++++++++++++++++++
 extensions/qa-lab/src/cli.runtime.ts          |  42 +++
 extensions/qa-lab/src/cli.test.ts             |  25 ++
 extensions/qa-lab/src/cli.ts                  |   8 +
 .../shared/credential-lease.runtime.test.ts   |  59 ++++
 .../shared/credential-lease.runtime.ts        |  10 +-
 .../shared/live-transport-cli.runtime.ts      |   1 +
 .../shared/live-transport-cli.ts              |   8 +
 .../live-transports/telegram/cli.runtime.ts   |   9 +-
 .../src/live-transports/telegram/cli.ts       |   3 +-
 .../qa-lab/src/multipass.runtime.test.ts      |  11 +
 extensions/qa-lab/src/multipass.runtime.ts    |   3 +
 extensions/qa-lab/src/suite-summary.test.ts   |  36 +++
 extensions/qa-lab/src/suite-summary.ts        |  64 ++++
 extensions/qa-lab/src/suite.test.ts           |  56 +++-
 extensions/qa-lab/src/suite.ts                | 112 +++++--
 extensions/qa-matrix/src/run-config.test.ts   |  19 ++
 extensions/qa-matrix/src/run-config.ts        |   9 +-
 21 files changed, 737 insertions(+), 37 deletions(-)
 create mode 100644 extensions/qa-lab/src/suite-summary.test.ts
 create mode 100644 extensions/qa-lab/src/suite-summary.ts
 create mode 100644 extensions/qa-matrix/src/run-config.test.ts
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 82bc36856fa..43149bf1465 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,6 +8,7 @@ Docs: https://docs.openclaw.ai
 
 - Plugins/tasks: add a detached runtime registration contract so plugin executors can own detached task lifecycle and cancellation without reaching into core task internals. (#68915) Thanks @mbelinky.
 - Terminal/logging: optimize `sanitizeForLog()` by replacing the iterative control-character stripping loop with a single regex pass while preserving the existing ANSI-first sanitization behavior. (#67205) Thanks @bulutmuf.
+- QA/CI: make `openclaw qa suite` and `openclaw qa telegram` fail by default when scenarios fail, add `--allow-failures` for artifact-only runs, and tighten live-lane defaults for CI automation. (#69122) Thanks @joshavant.
 
 ### Fixes
 
diff --git a/docs/concepts/qa-e2e-automation.md b/docs/concepts/qa-e2e-automation.md
index 14953f3aa81..c6219b2f1ab 100644
--- a/docs/concepts/qa-e2e-automation.md
+++ b/docs/concepts/qa-e2e-automation.md
@@ -80,6 +80,8 @@ disposable server. It requires `OPENCLAW_QA_TELEGRAM_GROUP_ID`,
 private group. The SUT bot must have a Telegram username, and bot-to-bot
 observation works best when both bots have Bot-to-Bot Communication Mode
 enabled in `@BotFather`.
+The command exits non-zero when any scenario fails. Use `--allow-failures` when
+you want artifacts without a failing exit code.
 
 Live transport lanes now share one smaller contract instead of each inventing
 their own scenario list shape:
@@ -107,9 +109,11 @@ inside the guest, runs `qa suite`, then copies the normal QA report and
 summary back into `.artifacts/qa-e2e/...` on the host.
 It reuses the same scenario-selection behavior as `qa suite` on the host.
 Host and Multipass suite runs execute multiple selected scenarios in parallel
-with isolated gateway workers by default, up to 64 workers or the selected
-scenario count. Use `--concurrency <count>` to tune the worker count, or
-`--concurrency 1` for serial execution.
+with isolated gateway workers by default. `qa-channel` defaults to concurrency
+4, capped by the selected scenario count. Use `--concurrency <count>` to tune
+the worker count, or `--concurrency 1` for serial execution.
+The command exits non-zero when any scenario fails. Use `--allow-failures` when
+you want artifacts without a failing exit code.
 Live runs forward the supported QA auth inputs that are practical for the
 guest: env-based provider keys, the QA live provider config path, and
 `CODEX_HOME` when present. Keep `--output-dir` under the repo root so the guest
diff --git a/docs/help/testing.md b/docs/help/testing.md
index 3aa61439670..a7260d8163d 100644
--- a/docs/help/testing.md
+++ b/docs/help/testing.md
@@ -49,9 +49,11 @@ These commands sit beside the main test suites when you need QA-lab realism:
 - `pnpm openclaw qa suite`
   - Runs repo-backed QA scenarios directly on the host.
   - Runs multiple selected scenarios in parallel by default with isolated
-    gateway workers, up to 64 workers or the selected scenario count. Use
-    `--concurrency <count>` to tune the worker count, or `--concurrency 1` for
-    the older serial lane.
+    gateway workers. `qa-channel` defaults to concurrency 4 (bounded by the
+    selected scenario count). Use `--concurrency <count>` to tune the worker
+    count, or `--concurrency 1` for the older serial lane.
+  - Exits non-zero when any scenario fails. Use `--allow-failures` when you
+    want artifacts without a failing exit code.
   - Supports provider modes `live-frontier`, `mock-openai`, and `aimock`.
     `aimock` starts a local AIMock-backed provider server for experimental
     fixture and protocol-mock coverage without replacing the scenario-aware
@@ -86,6 +88,8 @@ These commands sit beside the main test suites when you need QA-lab realism:
   - Runs the Telegram live QA lane against a real private group using the driver and SUT bot tokens from env.
   - Requires `OPENCLAW_QA_TELEGRAM_GROUP_ID`, `OPENCLAW_QA_TELEGRAM_DRIVER_BOT_TOKEN`, and `OPENCLAW_QA_TELEGRAM_SUT_BOT_TOKEN`. The group id must be the numeric Telegram chat id.
   - Supports `--credential-source convex` for shared pooled credentials. Use env mode by default, or set `OPENCLAW_QA_CREDENTIAL_SOURCE=convex` to opt into pooled leases.
+  - Exits non-zero when any scenario fails. Use `--allow-failures` when you
+    want artifacts without a failing exit code.
   - Requires two distinct bots in the same private group, with the SUT bot exposing a Telegram username.
   - For stable bot-to-bot observation, enable Bot-to-Bot Communication Mode in `@BotFather` for both bots and ensure the driver bot can observe group bot traffic.
   - Writes a Telegram QA report, summary, and observed-messages artifact under `.artifacts/qa-e2e/...`.
@@ -118,7 +122,7 @@ Required env vars:
   - `OPENCLAW_QA_CONVEX_SECRET_CI` for `ci`
 - Credential role selection:
   - CLI: `--credential-role maintainer|ci`
-  - Env default: `OPENCLAW_QA_CREDENTIAL_ROLE` (defaults to `maintainer`)
+  - Env default: `OPENCLAW_QA_CREDENTIAL_ROLE` (defaults to `ci` in CI, `maintainer` otherwise)
 
 Optional env vars:
 
diff --git a/extensions/qa-lab/src/cli.runtime.test.ts b/extensions/qa-lab/src/cli.runtime.test.ts
index 418d8a001c1..d0af18443c2 100644
--- a/extensions/qa-lab/src/cli.runtime.test.ts
+++ b/extensions/qa-lab/src/cli.runtime.test.ts
@@ -103,6 +103,7 @@ describe("qa cli runtime", () => {
       watchUrl: "http://127.0.0.1:43124",
       reportPath: "/tmp/report.md",
       summaryPath: "/tmp/summary.json",
+      scenarios: [],
     });
     runQaCharacterEval.mockResolvedValue({
       reportPath: "/tmp/character-report.md",
@@ -199,6 +200,7 @@ describe("qa cli runtime", () => {
       primaryModel: "openai/gpt-5.4",
       alternateModel: "openai/gpt-5.4",
       fastMode: true,
+      allowFailures: undefined,
       scenarioIds: ["telegram-help-command"],
       sutAccountId: "sut-live",
     });
@@ -223,10 +225,68 @@ describe("qa cli runtime", () => {
       expect.objectContaining({
         repoRoot: path.resolve("/tmp/openclaw-repo"),
         providerMode: "live-frontier",
+        allowFailures: undefined,
       }),
     );
   });
 
+  it("sets a failing exit code when telegram scenarios fail", async () => {
+    const priorExitCode = process.exitCode;
+    process.exitCode = undefined;
+    runTelegramQaLive.mockResolvedValueOnce({
+      outputDir: "/tmp/telegram",
+      reportPath: "/tmp/telegram/report.md",
+      summaryPath: "/tmp/telegram/summary.json",
+      observedMessagesPath: "/tmp/telegram/observed.json",
+      scenarios: [
+        {
+          id: "telegram-help-command",
+          title: "Telegram help command reply",
+          status: "fail",
+          details: "missing expected text",
+        },
+      ],
+    });
+
+    try {
+      await runQaTelegramCommand({
+        repoRoot: "/tmp/openclaw-repo",
+      });
+      expect(process.exitCode).toBe(1);
+    } finally {
+      process.exitCode = priorExitCode;
+    }
+  });
+
+  it("keeps telegram exit code clear when --allow-failures is set", async () => {
+    const priorExitCode = process.exitCode;
+    process.exitCode = undefined;
+    runTelegramQaLive.mockResolvedValueOnce({
+      outputDir: "/tmp/telegram",
+      reportPath: "/tmp/telegram/report.md",
+      summaryPath: "/tmp/telegram/summary.json",
+      observedMessagesPath: "/tmp/telegram/observed.json",
+      scenarios: [
+        {
+          id: "telegram-help-command",
+          title: "Telegram help command reply",
+          status: "fail",
+          details: "missing expected text",
+        },
+      ],
+    });
+
+    try {
+      await runQaTelegramCommand({
+        repoRoot: "/tmp/openclaw-repo",
+        allowFailures: true,
+      });
+      expect(process.exitCode).toBeUndefined();
+    } finally {
+      process.exitCode = priorExitCode;
+    }
+  });
+
   it("passes host suite concurrency through", async () => {
     await runQaSuiteCommand({
       repoRoot: "/tmp/openclaw-repo",
@@ -244,6 +304,59 @@ describe("qa cli runtime", () => {
     );
   });
 
+  it("sets a failing exit code when host suite scenarios fail", async () => {
+    const priorExitCode = process.exitCode;
+    process.exitCode = undefined;
+    runQaSuiteFromRuntime.mockResolvedValueOnce({
+      watchUrl: "http://127.0.0.1:43124",
+      reportPath: "/tmp/report.md",
+      summaryPath: "/tmp/summary.json",
+      scenarios: [
+        {
+          name: "channel chat baseline",
+          status: "fail",
+          steps: [],
+        },
+      ],
+    });
+
+    try {
+      await runQaSuiteCommand({
+        repoRoot: "/tmp/openclaw-repo",
+      });
+      expect(process.exitCode).toBe(1);
+    } finally {
+      process.exitCode = priorExitCode;
+    }
+  });
+
+  it("keeps host suite exit code clear when --allow-failures is set", async () => {
+    const priorExitCode = process.exitCode;
+    process.exitCode = undefined;
+    runQaSuiteFromRuntime.mockResolvedValueOnce({
+      watchUrl: "http://127.0.0.1:43124",
+      reportPath: "/tmp/report.md",
+      summaryPath: "/tmp/summary.json",
+      scenarios: [
+        {
+          name: "channel chat baseline",
+          status: "fail",
+          steps: [],
+        },
+      ],
+    });
+
+    try {
+      await runQaSuiteCommand({
+        repoRoot: "/tmp/openclaw-repo",
+        allowFailures: true,
+      });
+      expect(process.exitCode).toBeUndefined();
+    } finally {
+      process.exitCode = priorExitCode;
+    }
+  });
+
   it("passes host suite CLI auth mode through", async () => {
     await runQaSuiteCommand({
       repoRoot: "/tmp/openclaw-repo",
@@ -475,6 +588,7 @@ describe("qa cli runtime", () => {
       runner: "multipass",
       providerMode: "mock-openai",
       scenarioIds: ["channel-chat-baseline"],
+      allowFailures: true,
       concurrency: 3,
       image: "lts",
       cpus: 2,
@@ -490,6 +604,7 @@ describe("qa cli runtime", () => {
       primaryModel: undefined,
       alternateModel: undefined,
       fastMode: undefined,
+      allowFailures: true,
       scenarioIds: ["channel-chat-baseline"],
       concurrency: 3,
       image: "lts",
@@ -508,6 +623,7 @@ describe("qa cli runtime", () => {
       primaryModel: "openai/gpt-5.4",
       alternateModel: "openai/gpt-5.4",
       fastMode: true,
+      allowFailures: true,
       scenarioIds: ["channel-chat-baseline"],
     });
 
@@ -519,11 +635,171 @@ describe("qa cli runtime", () => {
         primaryModel: "openai/gpt-5.4",
         alternateModel: "openai/gpt-5.4",
         fastMode: true,
+        allowFailures: true,
         scenarioIds: ["channel-chat-baseline"],
       }),
     );
   });
 
+  it("sets a failing exit code when multipass summary reports failed scenarios", async () => {
+    const repoRoot = await fs.mkdtemp(path.join(os.tmpdir(), "qa-multipass-summary-"));
+    const summaryPath = path.join(repoRoot, "qa-suite-summary.json");
+    await fs.writeFile(
+      summaryPath,
+      JSON.stringify({
+        counts: {
+          total: 2,
+          passed: 1,
+          failed: 1,
+        },
+      }),
+      "utf8",
+    );
+    runQaMultipass.mockResolvedValueOnce({
+      outputDir: repoRoot,
+      reportPath: path.join(repoRoot, "qa-suite-report.md"),
+      summaryPath,
+      hostLogPath: path.join(repoRoot, "multipass-host.log"),
+      bootstrapLogPath: path.join(repoRoot, "multipass-guest-bootstrap.log"),
+      guestScriptPath: path.join(repoRoot, "multipass-guest-run.sh"),
+      vmName: "openclaw-qa-test",
+      scenarioIds: ["channel-chat-baseline"],
+    });
+    const priorExitCode = process.exitCode;
+    process.exitCode = undefined;
+
+    try {
+      await runQaSuiteCommand({
+        repoRoot: "/tmp/openclaw-repo",
+        runner: "multipass",
+      });
+      expect(process.exitCode).toBe(1);
+    } finally {
+      process.exitCode = priorExitCode;
+      await fs.rm(repoRoot, { recursive: true, force: true });
+    }
+  });
+
+  it("rejects malformed multipass summary JSON", async () => {
+    const repoRoot = await fs.mkdtemp(path.join(os.tmpdir(), "qa-multipass-summary-"));
+    const summaryPath = path.join(repoRoot, "qa-suite-summary.json");
+    await fs.writeFile(summaryPath, "{not-json", "utf8");
+    runQaMultipass.mockResolvedValueOnce({
+      outputDir: repoRoot,
+      reportPath: path.join(repoRoot, "qa-suite-report.md"),
+      summaryPath,
+      hostLogPath: path.join(repoRoot, "multipass-host.log"),
+      bootstrapLogPath: path.join(repoRoot, "multipass-guest-bootstrap.log"),
+      guestScriptPath: path.join(repoRoot, "multipass-guest-run.sh"),
+      vmName: "openclaw-qa-test",
+      scenarioIds: ["channel-chat-baseline"],
+    });
+
+    try {
+      await expect(
+        runQaSuiteCommand({
+          repoRoot: "/tmp/openclaw-repo",
+          runner: "multipass",
+        }),
+      ).rejects.toThrow("Could not parse QA summary JSON");
+    } finally {
+      await fs.rm(repoRoot, { recursive: true, force: true });
+    }
+  });
+
+  it("rejects unreadable multipass summary JSON with read/parse wording", async () => {
+    const repoRoot = await fs.mkdtemp(path.join(os.tmpdir(), "qa-multipass-summary-"));
+    const summaryPath = path.join(repoRoot, "qa-suite-summary.json");
+    runQaMultipass.mockResolvedValueOnce({
+      outputDir: repoRoot,
+      reportPath: path.join(repoRoot, "qa-suite-report.md"),
+      summaryPath,
+      hostLogPath: path.join(repoRoot, "multipass-host.log"),
+      bootstrapLogPath: path.join(repoRoot, "multipass-guest-bootstrap.log"),
+      guestScriptPath: path.join(repoRoot, "multipass-guest-run.sh"),
+      vmName: "openclaw-qa-test",
+      scenarioIds: ["channel-chat-baseline"],
+    });
+
+    try {
+      await expect(
+        runQaSuiteCommand({
+          repoRoot: "/tmp/openclaw-repo",
+          runner: "multipass",
+        }),
+      ).rejects.toThrow("Could not read QA summary JSON");
+    } finally {
+      await fs.rm(repoRoot, { recursive: true, force: true });
+    }
+  });
+
+  it("rejects partial multipass summary JSON without failure fields", async () => {
+    const repoRoot = await fs.mkdtemp(path.join(os.tmpdir(), "qa-multipass-summary-"));
+    const summaryPath = path.join(repoRoot, "qa-suite-summary.json");
+    await fs.writeFile(summaryPath, JSON.stringify({ counts: { total: 2, passed: 2 } }), "utf8");
+    runQaMultipass.mockResolvedValueOnce({
+      outputDir: repoRoot,
+      reportPath: path.join(repoRoot, "qa-suite-report.md"),
+      summaryPath,
+      hostLogPath: path.join(repoRoot, "multipass-host.log"),
+      bootstrapLogPath: path.join(repoRoot, "multipass-guest-bootstrap.log"),
+      guestScriptPath: path.join(repoRoot, "multipass-guest-run.sh"),
+      vmName: "openclaw-qa-test",
+      scenarioIds: ["channel-chat-baseline"],
+    });
+
+    try {
+      await expect(
+        runQaSuiteCommand({
+          repoRoot: "/tmp/openclaw-repo",
+          runner: "multipass",
+        }),
+      ).rejects.toThrow("did not include counts.failed or scenarios[].status");
+    } finally {
+      await fs.rm(repoRoot, { recursive: true, force: true });
+    }
+  });
+
+  it("keeps multipass exit code clear when --allow-failures is set", async () => {
+    const repoRoot = await fs.mkdtemp(path.join(os.tmpdir(), "qa-multipass-summary-"));
+    const summaryPath = path.join(repoRoot, "qa-suite-summary.json");
+    await fs.writeFile(
+      summaryPath,
+      JSON.stringify({
+        counts: {
+          total: 2,
+          passed: 1,
+          failed: 1,
+        },
+      }),
+      "utf8",
+    );
+    runQaMultipass.mockResolvedValueOnce({
+      outputDir: repoRoot,
+      reportPath: path.join(repoRoot, "qa-suite-report.md"),
+      summaryPath,
+      hostLogPath: path.join(repoRoot, "multipass-host.log"),
+      bootstrapLogPath: path.join(repoRoot, "multipass-guest-bootstrap.log"),
+      guestScriptPath: path.join(repoRoot, "multipass-guest-run.sh"),
+      vmName: "openclaw-qa-test",
+      scenarioIds: ["channel-chat-baseline"],
+    });
+    const priorExitCode = process.exitCode;
+    process.exitCode = undefined;
+
+    try {
+      await runQaSuiteCommand({
+        repoRoot: "/tmp/openclaw-repo",
+        runner: "multipass",
+        allowFailures: true,
+      });
+      expect(process.exitCode).toBeUndefined();
+    } finally {
+      process.exitCode = priorExitCode;
+      await fs.rm(repoRoot, { recursive: true, force: true });
+    }
+  });
+
   it("passes provider-qualified mock parity suite selection through to the host runner", async () => {
     await runQaSuiteCommand({
       repoRoot: "/tmp/openclaw-repo",
diff --git a/extensions/qa-lab/src/cli.runtime.ts b/extensions/qa-lab/src/cli.runtime.ts
index 8fa2ba216a2..7d2de4464cc 100644
--- a/extensions/qa-lab/src/cli.runtime.ts
+++ b/extensions/qa-lab/src/cli.runtime.ts
@@ -39,6 +39,7 @@ import {
 } from "./run-config.js";
 import { readQaScenarioPack } from "./scenario-catalog.js";
 import { runQaSuiteFromRuntime } from "./suite-launch.runtime.js";
+import { readQaSuiteFailedScenarioCountFromSummary } from "./suite-summary.js";
 
 type InterruptibleServer = {
   baseUrl: string;
@@ -121,6 +122,34 @@ function parseQaPositiveIntegerOption(label: string, value: number | undefined)
   return Math.floor(value);
 }
 
+async function readQaFailedScenarioCountFromSummary(summaryPath: string) {
+  let summaryText: string;
+  try {
+    summaryText = await fs.readFile(summaryPath, "utf8");
+  } catch (error) {
+    throw new Error(
+      `Could not read QA summary JSON at ${summaryPath}: ${formatErrorMessage(error)}`,
+      { cause: error },
+    );
+  }
+  let payload: unknown;
+  try {
+    payload = JSON.parse(summaryText) as unknown;
+  } catch (error) {
+    throw new Error(
+      `Could not parse QA summary JSON at ${summaryPath}: ${formatErrorMessage(error)}`,
+      { cause: error },
+    );
+  }
+  const failedScenarioCount = readQaSuiteFailedScenarioCountFromSummary(payload);
+  if (failedScenarioCount !== null) {
+    return failedScenarioCount;
+  }
+  throw new Error(
+    `QA summary at ${summaryPath} did not include counts.failed or scenarios[].status.`,
+  );
+}
+
 function parseQaCliBackendAuthMode(value: string | undefined): QaCliBackendAuthMode | undefined {
   const normalized = value?.trim().toLowerCase();
   if (!normalized) {
@@ -329,6 +358,7 @@ export async function runQaSuiteCommand(opts: {
   parityPack?: string;
   scenarioIds?: string[];
   concurrency?: number;
+  allowFailures?: boolean;
   image?: string;
   cpus?: number;
   memory?: string;
@@ -341,6 +371,7 @@ export async function runQaSuiteCommand(opts: {
     parityPack: opts.parityPack,
     scenarioIds: opts.scenarioIds,
   });
+  const allowFailures = opts.allowFailures === true;
   if (runner !== "host" && runner !== "multipass") {
     throw new Error(`--runner must be one of host or multipass, got "${opts.runner}".`);
   }
@@ -367,6 +398,7 @@ export async function runQaSuiteCommand(opts: {
       primaryModel: opts.primaryModel,
       alternateModel: opts.alternateModel,
       fastMode: opts.fastMode,
+      allowFailures: true,
       scenarioIds,
       ...(opts.concurrency !== undefined
         ? { concurrency: parseQaPositiveIntegerOption("--concurrency", opts.concurrency) }
@@ -381,6 +413,12 @@ export async function runQaSuiteCommand(opts: {
     process.stdout.write(`QA Multipass summary: ${result.summaryPath}\n`);
     process.stdout.write(`QA Multipass host log: ${result.hostLogPath}\n`);
     process.stdout.write(`QA Multipass bootstrap log: ${result.bootstrapLogPath}\n`);
+    if (!allowFailures) {
+      const failedScenarioCount = await readQaFailedScenarioCountFromSummary(result.summaryPath);
+      if (failedScenarioCount > 0) {
+        process.exitCode = 1;
+      }
+    }
     return;
   }
   const result = await runQaSuiteFromRuntime({
@@ -400,6 +438,10 @@ export async function runQaSuiteCommand(opts: {
   process.stdout.write(`QA suite watch: ${result.watchUrl}\n`);
   process.stdout.write(`QA suite report: ${result.reportPath}\n`);
   process.stdout.write(`QA suite summary: ${result.summaryPath}\n`);
+  const failedScenarioCount = readQaSuiteFailedScenarioCountFromSummary(result);
+  if (!allowFailures && failedScenarioCount !== null && failedScenarioCount > 0) {
+    process.exitCode = 1;
+  }
 }
 
 export async function runQaParityReportCommand(opts: {
diff --git a/extensions/qa-lab/src/cli.test.ts b/extensions/qa-lab/src/cli.test.ts
index 5db64663f3e..0a87ba9f328 100644
--- a/extensions/qa-lab/src/cli.test.ts
+++ b/extensions/qa-lab/src/cli.test.ts
@@ -46,6 +46,7 @@ const {
   runQaCredentialsRemoveCommand,
   runQaCoverageReportCommand,
   runQaProviderServerCommand,
+  runQaSuiteCommand,
   runQaTelegramCommand,
 } = vi.hoisted(() => ({
   runQaCredentialsAddCommand: vi.fn(),
@@ -53,6 +54,7 @@ const {
   runQaCredentialsRemoveCommand: vi.fn(),
   runQaCoverageReportCommand: vi.fn(),
   runQaProviderServerCommand: vi.fn(),
+  runQaSuiteCommand: vi.fn(),
   runQaTelegramCommand: vi.fn(),
 }));
 
@@ -76,6 +78,7 @@ vi.mock("./cli.runtime.js", () => ({
   runQaCredentialsRemoveCommand,
   runQaCoverageReportCommand,
   runQaProviderServerCommand,
+  runQaSuiteCommand,
 }));
 
 import { registerQaLabCli } from "./cli.js";
@@ -90,6 +93,7 @@ describe("qa cli registration", () => {
     runQaCredentialsRemoveCommand.mockReset();
     runQaCoverageReportCommand.mockReset();
     runQaProviderServerCommand.mockReset();
+    runQaSuiteCommand.mockReset();
     runQaTelegramCommand.mockReset();
     listQaRunnerCliContributions
       .mockReset()
@@ -188,6 +192,7 @@ describe("qa cli registration", () => {
       primaryModel: undefined,
       alternateModel: undefined,
       fastMode: false,
+      allowFailures: false,
       scenarioIds: [],
       sutAccountId: "sut",
       credentialSource: undefined,
@@ -195,6 +200,26 @@ describe("qa cli registration", () => {
     });
   });
 
+  it("forwards --allow-failures for telegram runs", async () => {
+    await program.parseAsync(["node", "openclaw", "qa", "telegram", "--allow-failures"]);
+
+    expect(runQaTelegramCommand).toHaveBeenCalledWith(
+      expect.objectContaining({
+        allowFailures: true,
+      }),
+    );
+  });
+
+  it("forwards --allow-failures for suite runs", async () => {
+    await program.parseAsync(["node", "openclaw", "qa", "suite", "--allow-failures"]);
+
+    expect(runQaSuiteCommand).toHaveBeenCalledWith(
+      expect.objectContaining({
+        allowFailures: true,
+      }),
+    );
+  });
+
   it("routes credential add flags into the qa runtime command", async () => {
     await program.parseAsync([
       "node",
diff --git a/extensions/qa-lab/src/cli.ts b/extensions/qa-lab/src/cli.ts
index d4f1feea9aa..e08d759ecc0 100644
--- a/extensions/qa-lab/src/cli.ts
+++ b/extensions/qa-lab/src/cli.ts
@@ -35,6 +35,7 @@ async function runQaSuite(opts: {
   primaryModel?: string;
   alternateModel?: string;
   fastMode?: boolean;
+  allowFailures?: boolean;
   cliAuthMode?: string;
   parityPack?: string;
   scenarioIds?: string[];
@@ -238,6 +239,11 @@ export function registerQaLabCli(program: Command) {
     .option("--concurrency <count>", "Scenario worker concurrency", (value: string) =>
       Number(value),
     )
+    .option(
+      "--allow-failures",
+      "Write artifacts without setting a failing exit code when scenarios fail",
+      false,
+    )
     .option("--fast", "Enable provider fast mode where supported", false)
     .option("--image <alias>", "Multipass image alias")
     .option("--cpus <count>", "Multipass vCPU count", (value: string) => Number(value))
@@ -256,6 +262,7 @@ export function registerQaLabCli(program: Command) {
         parityPack?: string;
         scenario?: string[];
         concurrency?: number;
+        allowFailures?: boolean;
         fast?: boolean;
         image?: string;
         cpus?: number;
@@ -275,6 +282,7 @@ export function registerQaLabCli(program: Command) {
           parityPack: opts.parityPack,
           scenarioIds: opts.scenario,
           concurrency: opts.concurrency,
+          allowFailures: opts.allowFailures,
           image: opts.image,
           cpus: opts.cpus,
           memory: opts.memory,
diff --git a/extensions/qa-lab/src/live-transports/shared/credential-lease.runtime.test.ts b/extensions/qa-lab/src/live-transports/shared/credential-lease.runtime.test.ts
index 9853256e03e..9a0a6631c60 100644
--- a/extensions/qa-lab/src/live-transports/shared/credential-lease.runtime.test.ts
+++ b/extensions/qa-lab/src/live-transports/shared/credential-lease.runtime.test.ts
@@ -80,6 +80,65 @@ describe("credential lease runtime", () => {
     expect(headers.authorization).toBe("Bearer maintainer-secret");
   });
 
+  it("defaults convex credential role to maintainer outside CI", async () => {
+    const fetchImpl = vi.fn<typeof fetch>().mockResolvedValueOnce(
+      jsonResponse({
+        status: "ok",
+        credentialId: "cred-maintainer-default",
+        leaseToken: "lease-maintainer-default",
+        payload: { groupId: "-100123", driverToken: "driver", sutToken: "sut" },
+      }),
+    );
+
+    await acquireQaCredentialLease({
+      kind: "telegram",
+      source: "convex",
+      env: {
+        OPENCLAW_QA_CONVEX_SITE_URL: "https://qa-cred.example.convex.site",
+        OPENCLAW_QA_CONVEX_SECRET_MAINTAINER: "maintainer-secret",
+      },
+      fetchImpl,
+      resolveEnvPayload: () => ({ groupId: "-1", driverToken: "unused", sutToken: "unused" }),
+      parsePayload: (payload) =>
+        payload as { groupId: string; driverToken: string; sutToken: string },
+    });
+
+    const firstCall = fetchImpl.mock.calls[0];
+    const firstInit = firstCall?.[1];
+    const headers = firstInit?.headers as Record<string, string>;
+    expect(headers.authorization).toBe("Bearer maintainer-secret");
+  });
+
+  it("defaults convex credential role to ci when CI=true", async () => {
+    const fetchImpl = vi.fn<typeof fetch>().mockResolvedValueOnce(
+      jsonResponse({
+        status: "ok",
+        credentialId: "cred-ci-default",
+        leaseToken: "lease-ci-default",
+        payload: { groupId: "-100123", driverToken: "driver", sutToken: "sut" },
+      }),
+    );
+
+    await acquireQaCredentialLease({
+      kind: "telegram",
+      source: "convex",
+      env: {
+        CI: "true",
+        OPENCLAW_QA_CONVEX_SITE_URL: "https://qa-cred.example.convex.site",
+        OPENCLAW_QA_CONVEX_SECRET_CI: "ci-secret",
+      },
+      fetchImpl,
+      resolveEnvPayload: () => ({ groupId: "-1", driverToken: "unused", sutToken: "unused" }),
+      parsePayload: (payload) =>
+        payload as { groupId: string; driverToken: string; sutToken: string },
+    });
+
+    const firstCall = fetchImpl.mock.calls[0];
+    const firstInit = firstCall?.[1];
+    const headers = firstInit?.headers as Record<string, string>;
+    expect(headers.authorization).toBe("Bearer ci-secret");
+  });
+
   it("retries convex acquire while the pool is exhausted", async () => {
     const fetchImpl = vi
       .fn<typeof fetch>()
diff --git a/extensions/qa-lab/src/live-transports/shared/credential-lease.runtime.ts b/extensions/qa-lab/src/live-transports/shared/credential-lease.runtime.ts
index c2e38a779f8..a7bc31aaccc 100644
--- a/extensions/qa-lab/src/live-transports/shared/credential-lease.runtime.ts
+++ b/extensions/qa-lab/src/live-transports/shared/credential-lease.runtime.ts
@@ -114,8 +114,12 @@ function normalizeQaCredentialSource(value: string | undefined): QaCredentialLea
   throw new Error(`Credential source must be one of env or convex, got "${value}".`);
 }
 
-function normalizeQaCredentialRole(value: string | undefined): QaCredentialRole {
-  const normalized = value?.trim().toLowerCase() || "maintainer";
+function normalizeQaCredentialRole(
+  value: string | undefined,
+  env: NodeJS.ProcessEnv = process.env,
+): QaCredentialRole {
+  const defaultRole = isTruthyOptIn(env.CI) ? "ci" : "maintainer";
+  const normalized = value?.trim().toLowerCase() || defaultRole;
   if (normalized === "maintainer" || normalized === "ci") {
     return normalized;
   }
@@ -350,7 +354,7 @@ export async function acquireQaCredentialLease<TPayload>(
     };
   }
 
-  const role = normalizeQaCredentialRole(opts.role ?? env.OPENCLAW_QA_CREDENTIAL_ROLE);
+  const role = normalizeQaCredentialRole(opts.role ?? env.OPENCLAW_QA_CREDENTIAL_ROLE, env);
   const config = resolveConvexCredentialBrokerConfig({
     env,
     role,
diff --git a/extensions/qa-lab/src/live-transports/shared/live-transport-cli.runtime.ts b/extensions/qa-lab/src/live-transports/shared/live-transport-cli.runtime.ts
index 2cc32651fe8..5ab58e4e7b9 100644
--- a/extensions/qa-lab/src/live-transports/shared/live-transport-cli.runtime.ts
+++ b/extensions/qa-lab/src/live-transports/shared/live-transport-cli.runtime.ts
@@ -24,6 +24,7 @@ export function resolveLiveTransportQaRunOptions(
     primaryModel: opts.primaryModel,
     alternateModel: opts.alternateModel,
     fastMode: opts.fastMode,
+    allowFailures: opts.allowFailures,
     scenarioIds: opts.scenarioIds,
     sutAccountId: opts.sutAccountId,
     credentialSource: opts.credentialSource?.trim(),
diff --git a/extensions/qa-lab/src/live-transports/shared/live-transport-cli.ts b/extensions/qa-lab/src/live-transports/shared/live-transport-cli.ts
index 047eab12f20..f75c384782a 100644
--- a/extensions/qa-lab/src/live-transports/shared/live-transport-cli.ts
+++ b/extensions/qa-lab/src/live-transports/shared/live-transport-cli.ts
@@ -10,6 +10,7 @@ export type LiveTransportQaCommandOptions = {
   primaryModel?: string;
   alternateModel?: string;
   fastMode?: boolean;
+  allowFailures?: boolean;
   scenarioIds?: string[];
   sutAccountId?: string;
   credentialSource?: string;
@@ -24,6 +25,7 @@ type LiveTransportQaCommanderOptions = {
   altModel?: string;
   scenario?: string[];
   fast?: boolean;
+  allowFailures?: boolean;
   sutAccount?: string;
   credentialSource?: string;
   credentialRole?: string;
@@ -57,6 +59,7 @@ export function mapLiveTransportQaCommanderOptions(
     primaryModel: opts.model,
     alternateModel: opts.altModel,
     fastMode: opts.fast,
+    allowFailures: opts.allowFailures,
     scenarioIds: opts.scenario,
     sutAccountId: opts.sutAccount,
     credentialSource: opts.credentialSource,
@@ -84,6 +87,11 @@ export function registerLiveTransportQaCli(params: {
     .option("--alt-model <ref>", "Alternate provider/model ref")
     .option("--scenario <id>", params.scenarioHelp, collectString, [])
     .option("--fast", "Enable provider fast mode where supported", false)
+    .option(
+      "--allow-failures",
+      "Write artifacts without setting a failing exit code when scenarios fail",
+      false,
+    )
     .option("--sut-account <id>", params.sutAccountHelp, "sut");
 
   if (params.credentialOptions) {
diff --git a/extensions/qa-lab/src/live-transports/telegram/cli.runtime.ts b/extensions/qa-lab/src/live-transports/telegram/cli.runtime.ts
index b99e37b1a70..c07e4ea8d04 100644
--- a/extensions/qa-lab/src/live-transports/telegram/cli.runtime.ts
+++ b/extensions/qa-lab/src/live-transports/telegram/cli.runtime.ts
@@ -6,10 +6,17 @@ import {
 import { runTelegramQaLive } from "./telegram-live.runtime.js";
 
 export async function runQaTelegramCommand(opts: LiveTransportQaCommandOptions) {
-  const result = await runTelegramQaLive(resolveLiveTransportQaRunOptions(opts));
+  const runOptions = resolveLiveTransportQaRunOptions(opts);
+  const result = await runTelegramQaLive(runOptions);
   printLiveTransportQaArtifacts("Telegram QA", {
     report: result.reportPath,
     summary: result.summaryPath,
     "observed messages": result.observedMessagesPath,
   });
+  if (
+    !runOptions.allowFailures &&
+    result.scenarios.some((scenario) => scenario.status === "fail")
+  ) {
+    process.exitCode = 1;
+  }
 }
diff --git a/extensions/qa-lab/src/live-transports/telegram/cli.ts b/extensions/qa-lab/src/live-transports/telegram/cli.ts
index 6237476b16f..2a40142d578 100644
--- a/extensions/qa-lab/src/live-transports/telegram/cli.ts
+++ b/extensions/qa-lab/src/live-transports/telegram/cli.ts
@@ -22,7 +22,8 @@ export const telegramQaCliRegistration: LiveTransportQaCliRegistration =
     commandName: "telegram",
     credentialOptions: {
       sourceDescription: "Credential source for Telegram QA: env or convex (default: env)",
-      roleDescription: "Credential role for convex auth: maintainer or ci (default: maintainer)",
+      roleDescription:
+        "Credential role for convex auth: maintainer or ci (default: ci in CI, maintainer otherwise)",
     },
     description: "Run the manual Telegram live QA lane against a private bot-to-bot group harness",
     outputDirHelp: "Telegram QA artifact directory",
diff --git a/extensions/qa-lab/src/multipass.runtime.test.ts b/extensions/qa-lab/src/multipass.runtime.test.ts
index e48d70b4fee..eacf9248f9f 100644
--- a/extensions/qa-lab/src/multipass.runtime.test.ts
+++ b/extensions/qa-lab/src/multipass.runtime.test.ts
@@ -140,6 +140,17 @@ describe("qa multipass runtime", () => {
     expect(script).toContain("'--provider-mode' 'live-frontier'");
   });
 
+  it("forwards --allow-failures into the guest qa suite command when requested", () => {
+    const plan = createQaMultipassPlan({
+      repoRoot: process.cwd(),
+      outputDir: path.join(process.cwd(), ".artifacts", "qa-e2e", "multipass-allow-failures-test"),
+      allowFailures: true,
+      scenarioIds: ["channel-chat-baseline"],
+    });
+
+    expect(plan.qaCommand).toEqual(expect.arrayContaining(["--allow-failures"]));
+  });
+
   it("redacts forwarded live secrets in the persisted artifact script", () => {
     vi.stubEnv("OPENAI_API_KEY", "test-openai-key");
     const plan = createQaMultipassPlan({
diff --git a/extensions/qa-lab/src/multipass.runtime.ts b/extensions/qa-lab/src/multipass.runtime.ts
index b93c1359fa1..e8f48f2e250 100644
--- a/extensions/qa-lab/src/multipass.runtime.ts
+++ b/extensions/qa-lab/src/multipass.runtime.ts
@@ -237,6 +237,7 @@ export function createQaMultipassPlan(params: {
   primaryModel?: string;
   alternateModel?: string;
   fastMode?: boolean;
+  allowFailures?: boolean;
   scenarioIds?: string[];
   concurrency?: number;
   image?: string;
@@ -275,6 +276,7 @@ export function createQaMultipassPlan(params: {
       ...(params.primaryModel ? ["--model", params.primaryModel] : []),
       ...(params.alternateModel ? ["--alt-model", params.alternateModel] : []),
       ...(params.fastMode ? ["--fast"] : []),
+      ...(params.allowFailures ? ["--allow-failures"] : []),
       ...(params.concurrency ? ["--concurrency", String(params.concurrency)] : []),
     ],
     scenarioIds,
@@ -544,6 +546,7 @@ export async function runQaMultipass(params: {
   primaryModel?: string;
   alternateModel?: string;
   fastMode?: boolean;
+  allowFailures?: boolean;
   scenarioIds?: string[];
   concurrency?: number;
   image?: string;
diff --git a/extensions/qa-lab/src/suite-summary.test.ts b/extensions/qa-lab/src/suite-summary.test.ts
new file mode 100644
index 00000000000..39357a6ee37
--- /dev/null
+++ b/extensions/qa-lab/src/suite-summary.test.ts
@@ -0,0 +1,36 @@
+import { describe, expect, it } from "vitest";
+import {
+  countQaSuiteFailedScenarios,
+  readQaSuiteFailedScenarioCountFromSummary,
+} from "./suite-summary.js";
+
+describe("qa suite summary helpers", () => {
+  it("counts failed scenarios from scenario statuses", () => {
+    expect(
+      countQaSuiteFailedScenarios([{ status: "pass" }, { status: "fail" }, { status: "fail" }]),
+    ).toBe(2);
+  });
+
+  it("prefers counts.failed when available", () => {
+    expect(
+      readQaSuiteFailedScenarioCountFromSummary({
+        counts: { failed: 3.8 },
+        scenarios: [{ status: "pass" }, { status: "fail" }],
+      }),
+    ).toBe(3);
+  });
+
+  it("falls back to scenario statuses when counts.failed is missing", () => {
+    expect(
+      readQaSuiteFailedScenarioCountFromSummary({
+        counts: { total: 2 },
+        scenarios: [{ status: "pass" }, { status: "fail" }],
+      }),
+    ).toBe(1);
+  });
+
+  it("returns null for unsupported summary shapes", () => {
+    expect(readQaSuiteFailedScenarioCountFromSummary({ counts: { total: 2 } })).toBeNull();
+    expect(readQaSuiteFailedScenarioCountFromSummary("not-json-object")).toBeNull();
+  });
+});
diff --git a/extensions/qa-lab/src/suite-summary.ts b/extensions/qa-lab/src/suite-summary.ts
new file mode 100644
index 00000000000..e0d06c8d999
--- /dev/null
+++ b/extensions/qa-lab/src/suite-summary.ts
@@ -0,0 +1,64 @@
+import type { QaProviderMode } from "./model-selection.js";
+
+export type QaSuiteSummaryScenario = {
+  name: string;
+  status: "pass" | "fail";
+  steps: unknown[];
+  details?: string;
+};
+
+export type QaSuiteSummaryJson = {
+  scenarios: QaSuiteSummaryScenario[];
+  counts: {
+    total: number;
+    passed: number;
+    failed: number;
+  };
+  run: {
+    startedAt: string;
+    finishedAt: string;
+    providerMode: QaProviderMode;
+    primaryModel: string;
+    primaryProvider: string | null;
+    primaryModelName: string | null;
+    alternateModel: string;
+    alternateProvider: string | null;
+    alternateModelName: string | null;
+    fastMode: boolean;
+    concurrency: number;
+    scenarioIds: string[] | null;
+  };
+};
+
+type QaSuiteScenarioStatus = Pick<QaSuiteSummaryScenario, "status">;
+
+export function countQaSuiteFailedScenarios(
+  scenarios: ReadonlyArray<QaSuiteScenarioStatus>,
+): number {
+  let failed = 0;
+  for (const scenario of scenarios) {
+    if (scenario.status === "fail") {
+      failed += 1;
+    }
+  }
+  return failed;
+}
+
+export function readQaSuiteFailedScenarioCountFromSummary(summary: unknown): number | null {
+  if (!summary || typeof summary !== "object") {
+    return null;
+  }
+  const payload = summary as {
+    counts?: {
+      failed?: unknown;
+    };
+    scenarios?: Array<QaSuiteScenarioStatus>;
+  };
+  if (typeof payload.counts?.failed === "number" && Number.isFinite(payload.counts.failed)) {
+    return Math.max(0, Math.floor(payload.counts.failed));
+  }
+  if (Array.isArray(payload.scenarios)) {
+    return countQaSuiteFailedScenarios(payload.scenarios);
+  }
+  return null;
+}
diff --git a/extensions/qa-lab/src/suite.test.ts b/extensions/qa-lab/src/suite.test.ts
index 7f5a5e1fa06..bbcacd83f33 100644
--- a/extensions/qa-lab/src/suite.test.ts
+++ b/extensions/qa-lab/src/suite.test.ts
@@ -1,5 +1,5 @@
 import { describe, expect, it, vi } from "vitest";
-import { runQaSuite } from "./suite.js";
+import { qaSuiteProgressTesting, runQaSuite } from "./suite.js";
 
 describe("qa suite", () => {
   it("rejects unsupported transport ids before starting the lab", async () => {
@@ -14,4 +14,58 @@ describe("qa suite", () => {
 
     expect(startLab).not.toHaveBeenCalled();
   });
+
+  it("parses progress env booleans", () => {
+    expect(qaSuiteProgressTesting.parseQaSuiteBooleanEnv("true")).toBe(true);
+    expect(qaSuiteProgressTesting.parseQaSuiteBooleanEnv("on")).toBe(true);
+    expect(qaSuiteProgressTesting.parseQaSuiteBooleanEnv("false")).toBe(false);
+    expect(qaSuiteProgressTesting.parseQaSuiteBooleanEnv("off")).toBe(false);
+    expect(qaSuiteProgressTesting.parseQaSuiteBooleanEnv("maybe")).toBeUndefined();
+  });
+
+  it("defaults progress logging from CI when no override is set", () => {
+    expect(qaSuiteProgressTesting.shouldLogQaSuiteProgress({ CI: "true" })).toBe(true);
+    expect(qaSuiteProgressTesting.shouldLogQaSuiteProgress({ CI: "false" })).toBe(false);
+  });
+
+  it("applies OPENCLAW_QA_SUITE_PROGRESS override and falls back on invalid values", () => {
+    expect(
+      qaSuiteProgressTesting.shouldLogQaSuiteProgress({
+        CI: "false",
+        OPENCLAW_QA_SUITE_PROGRESS: "true",
+      }),
+    ).toBe(true);
+    expect(
+      qaSuiteProgressTesting.shouldLogQaSuiteProgress({
+        CI: "true",
+        OPENCLAW_QA_SUITE_PROGRESS: "false",
+      }),
+    ).toBe(false);
+    expect(
+      qaSuiteProgressTesting.shouldLogQaSuiteProgress({
+        CI: "false",
+        OPENCLAW_QA_SUITE_PROGRESS: "on",
+      }),
+    ).toBe(true);
+    expect(
+      qaSuiteProgressTesting.shouldLogQaSuiteProgress({
+        CI: "true",
+        OPENCLAW_QA_SUITE_PROGRESS: "off",
+      }),
+    ).toBe(false);
+    expect(
+      qaSuiteProgressTesting.shouldLogQaSuiteProgress({
+        CI: "true",
+        OPENCLAW_QA_SUITE_PROGRESS: "definitely",
+      }),
+    ).toBe(true);
+  });
+
+  it("sanitizes scenario ids for progress logs", () => {
+    expect(qaSuiteProgressTesting.sanitizeQaSuiteProgressValue("scenario-id")).toBe("scenario-id");
+    expect(qaSuiteProgressTesting.sanitizeQaSuiteProgressValue("scenario\nid\tvalue")).toBe(
+      "scenario id value",
+    );
+    expect(qaSuiteProgressTesting.sanitizeQaSuiteProgressValue("\u0000\u0001")).toBe("<empty>");
+  });
 });
diff --git a/extensions/qa-lab/src/suite.ts b/extensions/qa-lab/src/suite.ts
index a91c39093fd..911b0c50a6f 100644
--- a/extensions/qa-lab/src/suite.ts
+++ b/extensions/qa-lab/src/suite.ts
@@ -46,6 +46,7 @@ import {
 import { createQaSuiteScenarioFlowApi } from "./suite-runtime-flow.js";
 import { waitForGatewayHealthy, waitForTransportReady } from "./suite-runtime-gateway.js";
 import type { QaSuiteRuntimeEnv } from "./suite-runtime-types.js";
+import { countQaSuiteFailedScenarios, type QaSuiteSummaryJson } from "./suite-summary.js";
 import { closeQaWebSessions } from "./web-runtime.js";
 
 type QaSuiteStep = {
@@ -84,6 +85,49 @@ export type QaSuiteRunParams = {
   controlUiEnabled?: boolean;
 };
 
+function parseQaSuiteBooleanEnv(value: string | undefined): boolean | undefined {
+  const normalized = value?.trim().toLowerCase();
+  if (!normalized) {
+    return undefined;
+  }
+  if (normalized === "1" || normalized === "true" || normalized === "yes" || normalized === "on") {
+    return true;
+  }
+  if (normalized === "0" || normalized === "false" || normalized === "no" || normalized === "off") {
+    return false;
+  }
+  return undefined;
+}
+
+function shouldLogQaSuiteProgress(env: NodeJS.ProcessEnv = process.env) {
+  const override = parseQaSuiteBooleanEnv(env.OPENCLAW_QA_SUITE_PROGRESS);
+  if (override !== undefined) {
+    return override;
+  }
+  return parseQaSuiteBooleanEnv(env.CI) === true;
+}
+
+function writeQaSuiteProgress(enabled: boolean, message: string) {
+  if (!enabled) {
+    return;
+  }
+  process.stderr.write(`[qa-suite] ${message}\n`);
+}
+
+function sanitizeQaSuiteProgressValue(value: string): string {
+  let normalized = "";
+  for (const char of value) {
+    const code = char.codePointAt(0);
+    if (code === undefined) {
+      continue;
+    }
+    const isControl = code <= 0x1f || (code >= 0x7f && code <= 0x9f);
+    normalized += isControl ? " " : char;
+  }
+  normalized = normalized.replace(/\s+/gu, " ").trim();
+  return normalized.length > 0 ? normalized : "<empty>";
+}
+
 function requireQaSuiteStartLab(startLab: QaSuiteStartLabFn | undefined): QaSuiteStartLabFn {
   if (startLab) {
     return startLab;
@@ -223,28 +267,7 @@ export type QaSuiteSummaryJsonParams = {
  * import this type instead of re-declaring the shape, so changes to the
  * summary schema propagate through to every consumer at type-check time.
  */
-export type QaSuiteSummaryJson = {
-  scenarios: QaSuiteScenarioResult[];
-  counts: {
-    total: number;
-    passed: number;
-    failed: number;
-  };
-  run: {
-    startedAt: string;
-    finishedAt: string;
-    providerMode: QaProviderMode;
-    primaryModel: string;
-    primaryProvider: string | null;
-    primaryModelName: string | null;
-    alternateModel: string;
-    alternateProvider: string | null;
-    alternateModelName: string | null;
-    fastMode: boolean;
-    concurrency: number;
-    scenarioIds: string[] | null;
-  };
-};
+export type { QaSuiteSummaryJson } from "./suite-summary.js";
 
 /**
  * Pure-ish JSON builder for qa-suite-summary.json. Exported so the GPT-5.4
@@ -268,7 +291,7 @@ export function buildQaSuiteSummaryJson(params: QaSuiteSummaryJsonParams): QaSui
     counts: {
       total: params.scenarios.length,
       passed: params.scenarios.filter((scenario) => scenario.status === "pass").length,
-      failed: params.scenarios.filter((scenario) => scenario.status === "fail").length,
+      failed: countQaSuiteFailedScenarios(params.scenarios),
     },
     run: {
       startedAt: params.startedAt.toISOString(),
@@ -359,6 +382,11 @@ export async function runQaSuite(params?: QaSuiteRunParams): Promise<QaSuiteResu
     selectedCatalogScenarios.length,
     defaultQaSuiteConcurrencyForTransport(transportId),
   );
+  const progressEnabled = shouldLogQaSuiteProgress();
+  writeQaSuiteProgress(
+    progressEnabled,
+    `run start: scenarios=${selectedCatalogScenarios.length} concurrency=${concurrency} transport=${transportId}`,
+  );
 
   if (concurrency > 1 && selectedCatalogScenarios.length > 1) {
     const ownsLab = !params?.lab;
@@ -396,6 +424,11 @@ export async function runQaSuite(params?: QaSuiteRunParams): Promise<QaSuiteResu
         selectedCatalogScenarios,
         concurrency,
         async (scenario, index): Promise<QaSuiteScenarioResult> => {
+          const scenarioIdForLog = sanitizeQaSuiteProgressValue(scenario.id);
+          writeQaSuiteProgress(
+            progressEnabled,
+            `scenario start (${index + 1}/${selectedCatalogScenarios.length}): ${scenarioIdForLog}`,
+          );
           liveScenarioOutcomes[index] = {
             id: scenario.id,
             name: scenario.title,
@@ -447,6 +480,10 @@ export async function runQaSuite(params?: QaSuiteRunParams): Promise<QaSuiteResu
               finishedAt: new Date().toISOString(),
             };
             updateScenarioRun();
+            writeQaSuiteProgress(
+              progressEnabled,
+              `scenario ${scenarioResult.status} (${index + 1}/${selectedCatalogScenarios.length}): ${scenarioIdForLog}`,
+            );
             return scenarioResult;
           } catch (error) {
             const details = formatErrorMessage(error);
@@ -472,11 +509,16 @@ export async function runQaSuite(params?: QaSuiteRunParams): Promise<QaSuiteResu
               finishedAt: new Date().toISOString(),
             };
             updateScenarioRun();
+            writeQaSuiteProgress(
+              progressEnabled,
+              `scenario fail (${index + 1}/${selectedCatalogScenarios.length}): ${scenarioIdForLog}`,
+            );
             return scenarioResult;
           }
         },
       );
       const finishedAt = new Date();
+      const failedCount = scenarios.filter((scenario) => scenario.status === "fail").length;
       lab.setScenarioRun({
         kind: "suite",
         status: "completed",
@@ -511,6 +553,10 @@ export async function runQaSuite(params?: QaSuiteRunParams): Promise<QaSuiteResu
         markdown: report,
         generatedAt: finishedAt.toISOString(),
       } satisfies QaLabLatestReport);
+      writeQaSuiteProgress(
+        progressEnabled,
+        `run complete: passed=${scenarios.length - failedCount} failed=${failedCount} total=${scenarios.length}`,
+      );
       return {
         outputDir,
         reportPath,
@@ -607,6 +653,11 @@ export async function runQaSuite(params?: QaSuiteRunParams): Promise<QaSuiteResu
     });
 
     for (const [index, scenario] of selectedCatalogScenarios.entries()) {
+      const scenarioIdForLog = sanitizeQaSuiteProgressValue(scenario.id);
+      writeQaSuiteProgress(
+        progressEnabled,
+        `scenario start (${index + 1}/${selectedCatalogScenarios.length}): ${scenarioIdForLog}`,
+      );
       liveScenarioOutcomes[index] = {
         id: scenario.id,
         name: scenario.title,
@@ -622,6 +673,10 @@ export async function runQaSuite(params?: QaSuiteRunParams): Promise<QaSuiteResu
 
       const result = await runScenarioDefinition(env, scenario);
       scenarios.push(result);
+      writeQaSuiteProgress(
+        progressEnabled,
+        `scenario ${result.status} (${index + 1}/${selectedCatalogScenarios.length}): ${scenarioIdForLog}`,
+      );
       liveScenarioOutcomes[index] = {
         id: scenario.id,
         name: scenario.title,
@@ -640,6 +695,7 @@ export async function runQaSuite(params?: QaSuiteRunParams): Promise<QaSuiteResu
     }
 
     const finishedAt = new Date();
+    const failedCount = scenarios.filter((scenario) => scenario.status === "fail").length;
     if (scenarios.some((scenario) => scenario.status === "fail")) {
       preserveGatewayRuntimeDir = path.join(outputDir, "artifacts", "gateway-runtime");
     }
@@ -674,6 +730,10 @@ export async function runQaSuite(params?: QaSuiteRunParams): Promise<QaSuiteResu
       generatedAt: finishedAt.toISOString(),
     } satisfies QaLabLatestReport;
     lab.setLatestReport(latestReport);
+    writeQaSuiteProgress(
+      progressEnabled,
+      `run complete: passed=${scenarios.length - failedCount} failed=${failedCount} total=${scenarios.length}`,
+    );
 
     return {
       outputDir,
@@ -706,3 +766,9 @@ export async function runQaSuite(params?: QaSuiteRunParams): Promise<QaSuiteResu
     }
   }
 }
+
+export const qaSuiteProgressTesting = {
+  parseQaSuiteBooleanEnv,
+  sanitizeQaSuiteProgressValue,
+  shouldLogQaSuiteProgress,
+};
diff --git a/extensions/qa-matrix/src/run-config.test.ts b/extensions/qa-matrix/src/run-config.test.ts
new file mode 100644
index 00000000000..c7c6b45101a
--- /dev/null
+++ b/extensions/qa-matrix/src/run-config.test.ts
@@ -0,0 +1,19 @@
+import { describe, expect, it } from "vitest";
+import { normalizeQaProviderMode } from "./run-config.js";
+
+describe("matrix qa run config", () => {
+  it("defaults to live-frontier when provider mode is omitted", () => {
+    expect(normalizeQaProviderMode(undefined)).toBe("live-frontier");
+    expect(normalizeQaProviderMode("")).toBe("live-frontier");
+  });
+
+  it("keeps legacy live-openai as an alias for live-frontier", () => {
+    expect(normalizeQaProviderMode("live-openai")).toBe("live-frontier");
+  });
+
+  it("rejects unknown provider modes", () => {
+    expect(() => normalizeQaProviderMode("mystery-mode")).toThrow(
+      "unknown QA provider mode: mystery-mode",
+    );
+  });
+});
diff --git a/extensions/qa-matrix/src/run-config.ts b/extensions/qa-matrix/src/run-config.ts
index 36ea7f59efc..a7b8eedc653 100644
--- a/extensions/qa-matrix/src/run-config.ts
+++ b/extensions/qa-matrix/src/run-config.ts
@@ -2,8 +2,15 @@ export type QaProviderMode = "mock-openai" | "live-frontier";
 export type QaProviderModeInput = QaProviderMode | "live-openai";
 
 export function normalizeQaProviderMode(input: unknown): QaProviderMode {
+  if (input === undefined || input === null || input === "") {
+    return "live-frontier";
+  }
   if (input === "mock-openai") {
     return "mock-openai";
   }
-  return "live-frontier";
+  if (input === "live-frontier" || input === "live-openai") {
+    return "live-frontier";
+  }
+  const details = typeof input === "string" ? `: ${input}` : "";
+  throw new Error(`unknown QA provider mode${details}`);
 }