qa: harden parity gate execution (#70045)

2026-05-06 06:40:44 +00:00 · 2026-04-22 03:08:25 -05:00
parent bee2e0f38f
commit dab46a7e98
8 changed files with 504 additions and 40 deletions
--- a/.github/workflows/parity-gate.yml
+++ b/.github/workflows/parity-gate.yml
@@ -41,6 +41,7 @@ jobs:
      # followthrough gate that expects a fast post-approval read within a 30s
      # agent.wait timeout.
      QA_PARITY_CONCURRENCY: "1"
+      OPENCLAW_QA_TRANSPORT_READY_TIMEOUT_MS: "180000"
      OPENAI_API_KEY: ""
      ANTHROPIC_API_KEY: ""
      OPENCLAW_LIVE_OPENAI_KEY: ""
@@ -70,6 +71,13 @@ jobs:
      - name: Build private QA runtime
        run: pnpm build

+      - name: Run parity preflight
+        run: |
+          pnpm openclaw qa suite \
+            --provider-mode mock-openai \
+            --model openai/gpt-5.4 \
+            --alt-model anthropic/claude-opus-4-6 \
+            --preflight
      - name: Run GPT-5.4 lane
        run: |
          pnpm openclaw qa suite \
--- a/extensions/qa-lab/src/cli.runtime.test.ts
+++ b/extensions/qa-lab/src/cli.runtime.test.ts
@@ -83,9 +83,30 @@ import type { QaProviderModeInput } from "./run-config.js";

 describe("qa cli runtime", () => {
  let stdoutWrite: ReturnType<typeof vi.spyOn>;
+  let stderrWrite: ReturnType<typeof vi.spyOn>;
+  let suiteArtifactsDir: string;
+  let suiteReportPath: string;
+  let suiteSummaryPath: string;

-  beforeEach(() => {
+  beforeEach(async () => {
+    suiteArtifactsDir = await fs.mkdtemp(path.join(os.tmpdir(), "qa-suite-runtime-"));
+    suiteReportPath = path.join(suiteArtifactsDir, "qa-suite-report.md");
+    suiteSummaryPath = path.join(suiteArtifactsDir, "qa-suite-summary.json");
+    await fs.writeFile(suiteReportPath, "# QA Suite Report\n", "utf8");
+    await fs.writeFile(
+      suiteSummaryPath,
+      JSON.stringify({
+        counts: {
+          total: 1,
+          passed: 1,
+          failed: 0,
+        },
+        scenarios: [],
+      }),
+      "utf8",
+    );
    stdoutWrite = vi.spyOn(process.stdout, "write").mockReturnValue(true);
+    stderrWrite = vi.spyOn(process.stderr, "write").mockReturnValue(true);
    runQaSuiteFromRuntime.mockReset();
    runQaCharacterEval.mockReset();
    runQaManualLane.mockReset();
@@ -101,8 +122,8 @@ describe("qa cli runtime", () => {
    );
    runQaSuiteFromRuntime.mockResolvedValue({
      watchUrl: "http://127.0.0.1:43124",
-      reportPath: "/tmp/report.md",
-      summaryPath: "/tmp/summary.json",
+      reportPath: suiteReportPath,
+      summaryPath: suiteSummaryPath,
      scenarios: [],
    });
    runQaCharacterEval.mockResolvedValue({
@@ -153,9 +174,11 @@ describe("qa cli runtime", () => {
    });
  });

-  afterEach(() => {
+  afterEach(async () => {
    stdoutWrite.mockRestore();
+    stderrWrite.mockRestore();
    vi.clearAllMocks();
+    await fs.rm(suiteArtifactsDir, { recursive: true, force: true });
  });

  it("resolves suite repo-root-relative paths before dispatching", async () => {
@@ -307,10 +330,22 @@ describe("qa cli runtime", () => {
  it("sets a failing exit code when host suite scenarios fail", async () => {
    const priorExitCode = process.exitCode;
    process.exitCode = undefined;
+    await fs.writeFile(
+      suiteSummaryPath,
+      JSON.stringify({
+        counts: {
+          total: 1,
+          passed: 0,
+          failed: 1,
+        },
+        scenarios: [{ name: "channel chat baseline", status: "fail" }],
+      }),
+      "utf8",
+    );
    runQaSuiteFromRuntime.mockResolvedValueOnce({
      watchUrl: "http://127.0.0.1:43124",
-      reportPath: "/tmp/report.md",
-      summaryPath: "/tmp/summary.json",
+      reportPath: suiteReportPath,
+      summaryPath: suiteSummaryPath,
      scenarios: [
        {
          name: "channel chat baseline",
@@ -333,10 +368,22 @@ describe("qa cli runtime", () => {
  it("keeps host suite exit code clear when --allow-failures is set", async () => {
    const priorExitCode = process.exitCode;
    process.exitCode = undefined;
+    await fs.writeFile(
+      suiteSummaryPath,
+      JSON.stringify({
+        counts: {
+          total: 1,
+          passed: 0,
+          failed: 1,
+        },
+        scenarios: [{ name: "channel chat baseline", status: "fail" }],
+      }),
+      "utf8",
+    );
    runQaSuiteFromRuntime.mockResolvedValueOnce({
      watchUrl: "http://127.0.0.1:43124",
-      reportPath: "/tmp/report.md",
-      summaryPath: "/tmp/summary.json",
+      reportPath: suiteReportPath,
+      summaryPath: suiteSummaryPath,
      scenarios: [
        {
          name: "channel chat baseline",
@@ -357,6 +404,203 @@ describe("qa cli runtime", () => {
    }
  });

+  it("retries host suite runs once for retryable infra failures", async () => {
+    runQaSuiteFromRuntime
+      .mockRejectedValueOnce(new Error("agent.wait timeout while waiting for transport ready"))
+      .mockResolvedValueOnce({
+        watchUrl: "http://127.0.0.1:43124",
+        reportPath: suiteReportPath,
+        summaryPath: suiteSummaryPath,
+        scenarios: [],
+      });
+
+    await runQaSuiteCommand({
+      repoRoot: "/tmp/openclaw-repo",
+    });
+
+    expect(runQaSuiteFromRuntime).toHaveBeenCalledTimes(2);
+    expect(stderrWrite).toHaveBeenCalledWith(
+      expect.stringContaining("[qa-suite] infra retry 1/1: agent.wait timeout"),
+    );
+  });
+
+  it("retries host suite runs once for qa-channel readiness timeouts", async () => {
+    runQaSuiteFromRuntime
+      .mockRejectedValueOnce(
+        new Error(
+          "timed out after 180000ms waiting for qa-channel ready; last status: no qa-channel accounts reported",
+        ),
+      )
+      .mockResolvedValueOnce({
+        watchUrl: "http://127.0.0.1:43124",
+        reportPath: suiteReportPath,
+        summaryPath: suiteSummaryPath,
+        scenarios: [],
+      });
+
+    await runQaSuiteCommand({
+      repoRoot: "/tmp/openclaw-repo",
+    });
+
+    expect(runQaSuiteFromRuntime).toHaveBeenCalledTimes(2);
+    expect(stderrWrite).toHaveBeenCalledWith(
+      expect.stringContaining(
+        "[qa-suite] infra retry 1/1: timed out after 180000ms waiting for qa-channel ready",
+      ),
+    );
+  });
+
+  it("does not retry host suite runs for generic timeout wording", async () => {
+    runQaSuiteFromRuntime.mockRejectedValueOnce(
+      new Error("approval-turn timed out waiting for post-approval read"),
+    );
+
+    await expect(
+      runQaSuiteCommand({
+        repoRoot: "/tmp/openclaw-repo",
+      }),
+    ).rejects.toThrow("approval-turn timed out waiting for post-approval read");
+
+    expect(runQaSuiteFromRuntime).toHaveBeenCalledTimes(1);
+  });
+
+  it("does not retry host suite runs for semantic failures", async () => {
+    const priorExitCode = process.exitCode;
+    process.exitCode = undefined;
+    await fs.writeFile(
+      suiteSummaryPath,
+      JSON.stringify({
+        counts: {
+          total: 1,
+          passed: 0,
+          failed: 1,
+        },
+        scenarios: [{ name: "channel chat baseline", status: "fail" }],
+      }),
+      "utf8",
+    );
+    runQaSuiteFromRuntime.mockResolvedValueOnce({
+      watchUrl: "http://127.0.0.1:43124",
+      reportPath: suiteReportPath,
+      summaryPath: suiteSummaryPath,
+      scenarios: [
+        {
+          name: "channel chat baseline",
+          status: "fail",
+          steps: [],
+        },
+      ],
+    });
+
+    try {
+      await runQaSuiteCommand({
+        repoRoot: "/tmp/openclaw-repo",
+      });
+      expect(runQaSuiteFromRuntime).toHaveBeenCalledTimes(1);
+      expect(process.exitCode).toBe(1);
+    } finally {
+      process.exitCode = priorExitCode;
+    }
+  });
+
+  it("runs a host-only parity preflight against the sentinel scenario", async () => {
+    await runQaSuiteCommand({
+      repoRoot: "/tmp/openclaw-repo",
+      providerMode: "mock-openai",
+      primaryModel: "openai/gpt-5.4",
+      alternateModel: "anthropic/claude-opus-4-6",
+      preflight: true,
+    });
+
+    expect(runQaSuiteFromRuntime).toHaveBeenCalledWith({
+      repoRoot: path.resolve("/tmp/openclaw-repo"),
+      outputDir: expect.stringMatching(
+        /^\/tmp\/openclaw-repo\/\.artifacts\/qa-e2e\/preflight\/suite-/,
+      ),
+      transportId: "qa-channel",
+      providerMode: "mock-openai",
+      primaryModel: "openai/gpt-5.4",
+      alternateModel: "anthropic/claude-opus-4-6",
+      scenarioIds: ["approval-turn-tool-followthrough"],
+      concurrency: 1,
+    });
+    expect(stdoutWrite).toHaveBeenCalledWith(
+      expect.stringContaining("QA parity preflight summary:"),
+    );
+  });
+
+  it("throws when parity preflight finds a failing sentinel scenario", async () => {
+    await fs.writeFile(
+      suiteSummaryPath,
+      JSON.stringify({
+        counts: {
+          total: 1,
+          passed: 0,
+          failed: 1,
+        },
+        scenarios: [{ name: "approval turn tool followthrough", status: "fail" }],
+      }),
+      "utf8",
+    );
+    runQaSuiteFromRuntime.mockResolvedValueOnce({
+      watchUrl: "http://127.0.0.1:43124",
+      reportPath: suiteReportPath,
+      summaryPath: suiteSummaryPath,
+      scenarios: [{ name: "approval turn tool followthrough", status: "fail", steps: [] }],
+    });
+
+    await expect(
+      runQaSuiteCommand({
+        repoRoot: "/tmp/openclaw-repo",
+        preflight: true,
+      }),
+    ).rejects.toThrow("QA parity preflight failed with 1 failing scenario.");
+  });
+
+  it("keeps parity preflight exit code clear when --allow-failures is set", async () => {
+    const priorExitCode = process.exitCode;
+    process.exitCode = undefined;
+    await fs.writeFile(
+      suiteSummaryPath,
+      JSON.stringify({
+        counts: {
+          total: 1,
+          passed: 0,
+          failed: 1,
+        },
+        scenarios: [{ name: "approval turn tool followthrough", status: "fail" }],
+      }),
+      "utf8",
+    );
+    runQaSuiteFromRuntime.mockResolvedValueOnce({
+      watchUrl: "http://127.0.0.1:43124",
+      reportPath: suiteReportPath,
+      summaryPath: suiteSummaryPath,
+      scenarios: [{ name: "approval turn tool followthrough", status: "fail", steps: [] }],
+    });
+
+    try {
+      await runQaSuiteCommand({
+        repoRoot: "/tmp/openclaw-repo",
+        preflight: true,
+        allowFailures: true,
+      });
+      expect(process.exitCode).toBeUndefined();
+    } finally {
+      process.exitCode = priorExitCode;
+    }
+  });
+
+  it("rejects preflight on the multipass runner", async () => {
+    await expect(
+      runQaSuiteCommand({
+        repoRoot: "/tmp/openclaw-repo",
+        runner: "multipass",
+        preflight: true,
+      }),
+    ).rejects.toThrow("--preflight requires --runner host.");
+  });
+
  it("passes host suite CLI auth mode through", async () => {
    await runQaSuiteCommand({
      repoRoot: "/tmp/openclaw-repo",
--- a/extensions/qa-lab/src/cli.runtime.ts
+++ b/extensions/qa-lab/src/cli.runtime.ts
@@ -30,7 +30,7 @@ import {
  type QaCredentialRecord,
 } from "./qa-credentials-admin.runtime.js";
 import { normalizeQaThinkingLevel, type QaThinkingLevel } from "./qa-gateway-config.js";
-import { normalizeQaTransportId } from "./qa-transport-registry.js";
+import { normalizeQaTransportId, type QaTransportId } from "./qa-transport-registry.js";
 import {
  defaultQaModelForMode,
  normalizeQaProviderMode,
@@ -41,6 +41,8 @@ import { readQaScenarioPack } from "./scenario-catalog.js";
 import { runQaSuiteFromRuntime } from "./suite-launch.runtime.js";
 import { readQaSuiteFailedScenarioCountFromSummary } from "./suite-summary.js";

+const QA_SUITE_INFRA_RETRY_LIMIT = 1;
+
 type InterruptibleServer = {
  baseUrl: string;
  stop(): Promise<void>;
@@ -152,6 +154,98 @@ async function readQaFailedScenarioCountFromSummary(summaryPath: string) {
  );
 }

+function isQaSuiteInfraRetryableError(error: unknown) {
+  const message = formatErrorMessage(error).toLowerCase();
+  return (
+    message.includes("agent.wait timeout") ||
+    message.includes("qa cli timed out") ||
+    message.includes("readyz") ||
+    message.includes("gateway healthy") ||
+    message.includes("transport ready") ||
+    message.includes("waiting for qa-channel ready") ||
+    message.includes("econnreset") ||
+    message.includes("econnrefused") ||
+    message.includes("socket hang up") ||
+    message.includes("could not read qa summary json") ||
+    message.includes("could not parse qa summary json") ||
+    message.includes("did not include counts.failed or scenarios[].status") ||
+    message.includes("did not produce report artifact")
+  );
+}
+
+async function assertQaSuiteArtifacts(result: { reportPath: string; summaryPath: string }) {
+  try {
+    await fs.access(result.reportPath);
+  } catch (error) {
+    throw new Error(
+      `QA suite did not produce report artifact at ${result.reportPath}: ${formatErrorMessage(error)}`,
+      { cause: error },
+    );
+  }
+  await readQaFailedScenarioCountFromSummary(result.summaryPath);
+}
+
+async function runQaSuiteFromRuntimeWithInfraRetry(
+  params: Parameters<typeof runQaSuiteFromRuntime>[0],
+  maxRetries = QA_SUITE_INFRA_RETRY_LIMIT,
+) {
+  for (let attempt = 0; attempt <= maxRetries; attempt += 1) {
+    try {
+      const result = await runQaSuiteFromRuntime(params);
+      await assertQaSuiteArtifacts(result);
+      return result;
+    } catch (error) {
+      const retryable = isQaSuiteInfraRetryableError(error);
+      if (!retryable || attempt >= maxRetries) {
+        throw error;
+      }
+      process.stderr.write(
+        `[qa-suite] infra retry ${attempt + 1}/${maxRetries}: ${formatErrorMessage(error)}\n`,
+      );
+    }
+  }
+  throw new Error("unreachable qa suite retry state");
+}
+
+async function runQaParityPreflight(params: {
+  repoRoot: string;
+  transportId: QaTransportId;
+  providerMode: QaProviderMode;
+  primaryModel?: string;
+  alternateModel?: string;
+  allowFailures?: boolean;
+}) {
+  const outputDir = path.join(
+    params.repoRoot,
+    ".artifacts",
+    "qa-e2e",
+    "preflight",
+    `suite-${Date.now().toString(36)}`,
+  );
+  const result = await runQaSuiteFromRuntimeWithInfraRetry({
+    repoRoot: params.repoRoot,
+    outputDir,
+    transportId: params.transportId,
+    providerMode: params.providerMode,
+    primaryModel: params.primaryModel,
+    alternateModel: params.alternateModel,
+    scenarioIds: ["approval-turn-tool-followthrough"],
+    concurrency: 1,
+  });
+  process.stdout.write(`QA parity preflight watch: ${result.watchUrl}\n`);
+  process.stdout.write(`QA parity preflight report: ${result.reportPath}\n`);
+  process.stdout.write(`QA parity preflight summary: ${result.summaryPath}\n`);
+  const failedScenarioCount = await readQaFailedScenarioCountFromSummary(result.summaryPath);
+  if (failedScenarioCount > 0) {
+    if (params.allowFailures === true) {
+      return;
+    }
+    throw new Error(
+      `QA parity preflight failed with ${failedScenarioCount} failing scenario${failedScenarioCount === 1 ? "" : "s"}.`,
+    );
+  }
+}
+
 function parseQaCliBackendAuthMode(value: string | undefined): QaCliBackendAuthMode | undefined {
  const normalized = value?.trim().toLowerCase();
  if (!normalized) {
@@ -365,6 +459,7 @@ export async function runQaSuiteCommand(opts: {
  cpus?: number;
  memory?: string;
  disk?: string;
+  preflight?: boolean;
 }) {
  const repoRoot = path.resolve(opts.repoRoot ?? process.cwd());
  const transportId = normalizeQaTransportId(opts.transportId);
@@ -379,6 +474,9 @@ export async function runQaSuiteCommand(opts: {
  }
  const providerMode = normalizeQaProviderMode(opts.providerMode);
  const claudeCliAuthMode = parseQaCliBackendAuthMode(opts.cliAuthMode);
+  if (opts.preflight === true && runner !== "host") {
+    throw new Error("--preflight requires --runner host.");
+  }
  if (
    runner === "host" &&
    (opts.image !== undefined ||
@@ -423,7 +521,18 @@ export async function runQaSuiteCommand(opts: {
    }
    return;
  }
-  const result = await runQaSuiteFromRuntime({
+  if (opts.preflight === true) {
+    await runQaParityPreflight({
+      repoRoot,
+      transportId,
+      providerMode,
+      primaryModel: opts.primaryModel,
+      alternateModel: opts.alternateModel,
+      allowFailures,
+    });
+    return;
+  }
+  const result = await runQaSuiteFromRuntimeWithInfraRetry({
    repoRoot,
    outputDir: resolveRepoRelativeOutputDir(repoRoot, opts.outputDir),
    transportId,
--- a/extensions/qa-lab/src/cli.ts
+++ b/extensions/qa-lab/src/cli.ts
@@ -45,6 +45,7 @@ async function runQaSuite(opts: {
  cpus?: number;
  memory?: string;
  disk?: string;
+  preflight?: boolean;
 }) {
  const runtime = await loadQaLabCliRuntime();
  await runtime.runQaSuiteCommand(opts);
@@ -239,6 +240,7 @@ export function registerQaLabCli(program: Command) {
    .option("--concurrency <count>", "Scenario worker concurrency", (value: string) =>
      Number(value),
    )
+    .option("--preflight", "Run a single-scenario bootstrap preflight and stop", false)
    .option(
      "--allow-failures",
      "Write artifacts without setting a failing exit code when scenarios fail",
@@ -268,6 +270,7 @@ export function registerQaLabCli(program: Command) {
        cpus?: number;
        memory?: string;
        disk?: string;
+        preflight?: boolean;
      }) => {
        await runQaSuite({
          repoRoot: opts.repoRoot,
@@ -287,6 +290,7 @@ export function registerQaLabCli(program: Command) {
          cpus: opts.cpus,
          memory: opts.memory,
          disk: opts.disk,
+          preflight: opts.preflight,
        });
      },
    );
--- a/extensions/qa-lab/src/qa-channel-transport.test.ts
+++ b/extensions/qa-lab/src/qa-channel-transport.test.ts
@@ -63,6 +63,38 @@ describe("qa channel transport", () => {
    expect(call).toHaveBeenCalledTimes(2);
  });

+  it("surfaces the last reported qa-channel account status on timeout", async () => {
+    const transport = createQaChannelTransport(createQaBusState());
+    const call = vi.fn().mockResolvedValue({
+      channelAccounts: {
+        "qa-channel": [{ accountId: "default", running: false, restartPending: true }],
+      },
+    });
+
+    await expect(
+      transport.waitReady({
+        gateway: { call },
+        timeoutMs: 5,
+        pollIntervalMs: 1,
+      }),
+    ).rejects.toThrow(
+      'timed out after 5ms waiting for qa-channel ready; last status: {"accountId":"default","running":false,"restartPending":true}',
+    );
+  });
+
+  it("surfaces the last probe error on timeout", async () => {
+    const transport = createQaChannelTransport(createQaBusState());
+    const call = vi.fn().mockRejectedValue(new Error("channels.status exploded"));
+
+    await expect(
+      transport.waitReady({
+        gateway: { call },
+        timeoutMs: 5,
+        pollIntervalMs: 1,
+      }),
+    ).rejects.toThrow("last probe error: channels.status exploded");
+  });
+
  it("inherits the shared normalized message capabilities", async () => {
    const transport = createQaChannelTransport(createQaBusState());

--- a/extensions/qa-lab/src/qa-channel-transport.ts
+++ b/extensions/qa-lab/src/qa-channel-transport.ts
@@ -1,7 +1,9 @@
+import { setTimeout as sleep } from "node:timers/promises";
 import type { OpenClawConfig } from "openclaw/plugin-sdk/config-runtime";
+import { formatErrorMessage } from "openclaw/plugin-sdk/error-runtime";
 import type { QaBusState } from "./bus-state.js";
 import { getQaProvider } from "./providers/index.js";
-import { QaStateBackedTransportAdapter, waitForQaTransportCondition } from "./qa-transport.js";
+import { QaStateBackedTransportAdapter } from "./qa-transport.js";
 import type {
  QaTransportActionName,
  QaTransportGatewayConfig,
@@ -20,33 +22,54 @@ async function waitForQaChannelReady(params: {
  timeoutMs?: number;
  pollIntervalMs?: number;
 }) {
-  await waitForQaTransportCondition(
-    async () => {
-      try {
-        const payload = (await params.gateway.call(
-          "channels.status",
-          { probe: false, timeoutMs: 2_000 },
-          { timeoutMs: 5_000 },
-        )) as {
-          channelAccounts?: Record<
-            string,
-            Array<{
-              accountId?: string;
-              running?: boolean;
-              restartPending?: boolean;
-            }>
-          >;
-        };
-        const accounts = payload.channelAccounts?.[QA_CHANNEL_ID] ?? [];
-        const account =
-          accounts.find((entry) => entry.accountId === QA_CHANNEL_ACCOUNT_ID) ?? accounts[0];
-        return account?.running && account.restartPending !== true ? true : undefined;
-      } catch {
-        return undefined;
+  const timeoutMs = params.timeoutMs ?? 45_000;
+  const pollIntervalMs = params.pollIntervalMs ?? 500;
+  const startedAt = Date.now();
+  let lastAccountStatus = "no qa-channel accounts reported";
+  let lastProbeError: string | null = null;
+
+  while (Date.now() - startedAt < timeoutMs) {
+    try {
+      const payload = (await params.gateway.call(
+        "channels.status",
+        { probe: false, timeoutMs: 2_000 },
+        { timeoutMs: 5_000 },
+      )) as {
+        channelAccounts?: Record<
+          string,
+          Array<{
+            accountId?: string;
+            running?: boolean;
+            restartPending?: boolean;
+          }>
+        >;
+      };
+      const accounts = payload.channelAccounts?.[QA_CHANNEL_ID] ?? [];
+      const account =
+        accounts.find((entry) => entry.accountId === QA_CHANNEL_ACCOUNT_ID) ?? accounts[0];
+      lastProbeError = null;
+      lastAccountStatus = account
+        ? JSON.stringify({
+            accountId: account.accountId ?? null,
+            running: account.running ?? null,
+            restartPending: account.restartPending ?? null,
+          })
+        : "no qa-channel accounts reported";
+      if (account?.running && account.restartPending !== true) {
+        return;
      }
-    },
-    params.timeoutMs ?? 45_000,
-    params.pollIntervalMs ?? 500,
+    } catch (error) {
+      lastProbeError = formatErrorMessage(error);
+    }
+    await sleep(pollIntervalMs);
+  }
+
+  throw new Error(
+    [
+      `timed out after ${timeoutMs}ms waiting for qa-channel ready`,
+      `last status: ${lastAccountStatus}`,
+      ...(lastProbeError ? [`last probe error: ${lastProbeError}`] : []),
+    ].join("; "),
  );
 }

--- a/extensions/qa-lab/src/suite.test.ts
+++ b/extensions/qa-lab/src/suite.test.ts
@@ -28,6 +28,23 @@ describe("qa suite", () => {
    expect(qaSuiteProgressTesting.shouldLogQaSuiteProgress({ CI: "false" })).toBe(false);
  });

+  it("resolves transport-ready timeout from params and env", () => {
+    expect(qaSuiteProgressTesting.resolveQaSuiteTransportReadyTimeoutMs(undefined, {})).toBe(
+      120_000,
+    );
+    expect(
+      qaSuiteProgressTesting.resolveQaSuiteTransportReadyTimeoutMs(undefined, {
+        OPENCLAW_QA_TRANSPORT_READY_TIMEOUT_MS: "180000",
+      }),
+    ).toBe(180_000);
+    expect(
+      qaSuiteProgressTesting.resolveQaSuiteTransportReadyTimeoutMs(undefined, {
+        OPENCLAW_QA_TRANSPORT_READY_TIMEOUT_MS: "bad",
+      }),
+    ).toBe(120_000);
+    expect(qaSuiteProgressTesting.resolveQaSuiteTransportReadyTimeoutMs(90_000, {})).toBe(90_000);
+  });
+
  it("applies OPENCLAW_QA_SUITE_PROGRESS override and falls back on invalid values", () => {
    expect(
      qaSuiteProgressTesting.shouldLogQaSuiteProgress({
--- a/extensions/qa-lab/src/suite.ts
+++ b/extensions/qa-lab/src/suite.ts
@@ -83,6 +83,7 @@ export type QaSuiteRunParams = {
  startLab?: QaSuiteStartLabFn;
  concurrency?: number;
  controlUiEnabled?: boolean;
+  transportReadyTimeoutMs?: number;
 };

 function parseQaSuiteBooleanEnv(value: string | undefined): boolean | undefined {
@@ -107,6 +108,28 @@ function shouldLogQaSuiteProgress(env: NodeJS.ProcessEnv = process.env) {
  return parseQaSuiteBooleanEnv(env.CI) === true;
 }

+function resolveQaSuiteTransportReadyTimeoutMs(
+  explicitTimeoutMs?: number,
+  env: NodeJS.ProcessEnv = process.env,
+) {
+  if (
+    typeof explicitTimeoutMs === "number" &&
+    Number.isFinite(explicitTimeoutMs) &&
+    explicitTimeoutMs > 0
+  ) {
+    return Math.floor(explicitTimeoutMs);
+  }
+  const raw = env.OPENCLAW_QA_TRANSPORT_READY_TIMEOUT_MS;
+  if (!raw) {
+    return 120_000;
+  }
+  const parsed = Number(raw);
+  if (!Number.isFinite(parsed) || parsed < 1) {
+    return 120_000;
+  }
+  return Math.floor(parsed);
+}
+
 function writeQaSuiteProgress(enabled: boolean, message: string) {
  if (!enabled) {
    return;
@@ -628,12 +651,15 @@ export async function runQaSuite(params?: QaSuiteRunParams): Promise<QaSuiteResu

  let preserveGatewayRuntimeDir: string | undefined;
  try {
+    const transportReadyTimeoutMs = resolveQaSuiteTransportReadyTimeoutMs(
+      params?.transportReadyTimeoutMs,
+    );
    // The gateway child already waits for /readyz before returning, but the
    // selected transport can still be finishing account startup. Pay that
    // readiness cost once here so the first scenario does not race bootstrap.
-    await waitForTransportReady(env, 120_000).catch(async () => {
-      await waitForGatewayHealthy(env, 120_000);
-      await waitForTransportReady(env, 120_000);
+    await waitForTransportReady(env, transportReadyTimeoutMs).catch(async () => {
+      await waitForGatewayHealthy(env, transportReadyTimeoutMs);
+      await waitForTransportReady(env, transportReadyTimeoutMs);
    });
    await sleep(1_000);
    const scenarios: QaSuiteScenarioResult[] = [];
@@ -769,6 +795,7 @@ export async function runQaSuite(params?: QaSuiteRunParams): Promise<QaSuiteResu

 export const qaSuiteProgressTesting = {
  parseQaSuiteBooleanEnv,
+  resolveQaSuiteTransportReadyTimeoutMs,
  sanitizeQaSuiteProgressValue,
  shouldLogQaSuiteProgress,
 };