qa-lab: harden live lane cleanup

2026-04-16 03:31:10 +00:00 · 2026-04-10 14:45:50 -04:00
parent 0d457fd14d
commit 1525a39cc7
6 changed files with 109 additions and 10 deletions
--- a/docs/concepts/qa-e2e-automation.md
+++ b/docs/concepts/qa-e2e-automation.md
@@ -59,10 +59,10 @@ pnpm openclaw qa matrix
 ```

 That lane provisions a disposable Tuwunel homeserver in Docker, registers
-temporary driver and SUT users, creates one private room, then runs the real
-Matrix plugin inside a QA gateway child. The Matrix lane keeps `qa-channel`
-available in the child config for shared qa-lab helpers, but the transport
-under test is Matrix itself.
+temporary driver, SUT, and observer users, creates one private room, then runs
+the real Matrix plugin inside a QA gateway child. The live transport lane keeps
+the child config scoped to the transport under test, so Matrix runs without
+`qa-channel` in the child config.

 For a disposable Linux VM lane without bringing Docker into the QA path, run:

--- a/docs/help/testing.md
+++ b/docs/help/testing.md
@@ -67,7 +67,7 @@ These commands sit beside the main test suites when you need QA-lab realism:
  - Starts the Docker-backed QA site for operator-style QA work.
 - `pnpm openclaw qa matrix`
  - Runs the Matrix live QA lane against a disposable Docker-backed Tuwunel homeserver.
-  - Provisions two temporary Matrix users plus one private room, then starts a QA gateway child with the real Matrix plugin as the SUT transport.
+  - Provisions three temporary Matrix users (`driver`, `sut`, `observer`) plus one private room, then starts a QA gateway child with the real Matrix plugin as the SUT transport.
  - Uses the pinned stable Tuwunel image `ghcr.io/matrix-construct/tuwunel:v1.5.1` by default. Override with `OPENCLAW_QA_MATRIX_TUWUNEL_IMAGE` when you need to test a different image.
  - Writes a Matrix QA report, summary, and observed-events artifact under `.artifacts/qa-e2e/...`.

--- a/extensions/qa-lab/src/live-gateway.runtime.test.ts
+++ b/extensions/qa-lab/src/live-gateway.runtime.test.ts
@@ -87,4 +87,22 @@ describe("startQaLiveLaneGateway", () => {
    await harness.stop();
    expect(gatewayStop).toHaveBeenCalledTimes(1);
  });
+
+  it("still stops the mock server when gateway shutdown fails", async () => {
+    gatewayStop.mockRejectedValueOnce(new Error("gateway down"));
+    const harness = await startQaLiveLaneGateway({
+      repoRoot: "/tmp/openclaw-repo",
+      qaBusBaseUrl: "http://127.0.0.1:43123",
+      providerMode: "mock-openai",
+      primaryModel: "mock-openai/gpt-5.4",
+      alternateModel: "mock-openai/gpt-5.4-alt",
+      controlUiEnabled: false,
+    });
+
+    await expect(harness.stop()).rejects.toThrow(
+      "failed to stop QA live lane resources:\ngateway stop failed: gateway down",
+    );
+    expect(gatewayStop).toHaveBeenCalledTimes(1);
+    expect(mockStop).toHaveBeenCalledTimes(1);
+  });
 });
--- a/extensions/qa-lab/src/live-gateway.runtime.ts
+++ b/extensions/qa-lab/src/live-gateway.runtime.ts
@@ -1,8 +1,31 @@
 import type { OpenClawConfig } from "openclaw/plugin-sdk/config-runtime";
+import { formatErrorMessage } from "openclaw/plugin-sdk/error-runtime";
 import { startQaGatewayChild, type QaCliBackendAuthMode } from "./gateway-child.js";
 import { startQaMockOpenAiServer } from "./mock-openai-server.js";
 import type { QaThinkingLevel } from "./qa-gateway-config.js";

+async function stopQaLiveLaneResources(resources: {
+  gateway: Awaited<ReturnType<typeof startQaGatewayChild>>;
+  mock: Awaited<ReturnType<typeof startQaMockOpenAiServer>> | null;
+}) {
+  const errors: string[] = [];
+  try {
+    await resources.gateway.stop();
+  } catch (error) {
+    errors.push(`gateway stop failed: ${formatErrorMessage(error)}`);
+  }
+  if (resources.mock) {
+    try {
+      await resources.mock.stop();
+    } catch (error) {
+      errors.push(`mock provider stop failed: ${formatErrorMessage(error)}`);
+    }
+  }
+  if (errors.length > 0) {
+    throw new Error(`failed to stop QA live lane resources:\n${errors.join("\n")}`);
+  }
+}
+
 export async function startQaLiveLaneGateway(params: {
  repoRoot: string;
  qaBusBaseUrl: string;
@@ -43,8 +66,7 @@ export async function startQaLiveLaneGateway(params: {
      gateway,
      mock,
      async stop() {
-        await gateway.stop();
-        await mock?.stop();
+        await stopQaLiveLaneResources({ gateway, mock });
      },
    };
  } catch (error) {
--- a/extensions/qa-lab/src/matrix-live.runtime.ts
+++ b/extensions/qa-lab/src/matrix-live.runtime.ts
@@ -360,6 +360,7 @@ export async function runMatrixQaLive(params: {
    },
  ];
  const scenarioResults: MatrixQaScenarioResult[] = [];
+  const cleanupErrors: string[] = [];
  let gatewayHarness: Awaited<ReturnType<typeof startQaLiveLaneGateway>> | null = null;
  let canaryFailed = false;
  let canarySince: string | undefined;
@@ -449,8 +450,25 @@ export async function runMatrixQaLive(params: {
      }
    }
  } finally {
-    await gatewayHarness?.stop().catch(() => {});
-    await harness.stop().catch(() => {});
+    if (gatewayHarness) {
+      try {
+        await gatewayHarness.stop();
+      } catch (error) {
+        cleanupErrors.push(`live gateway cleanup: ${formatErrorMessage(error)}`);
+      }
+    }
+    try {
+      await harness.stop();
+    } catch (error) {
+      cleanupErrors.push(`Matrix harness cleanup: ${formatErrorMessage(error)}`);
+    }
+  }
+  if (cleanupErrors.length > 0) {
+    checks.push({
+      name: "Matrix cleanup",
+      status: "fail",
+      details: cleanupErrors.join("\n"),
+    });
  }

  const finishedAtDate = new Date();
@@ -542,6 +560,18 @@ export async function runMatrixQaLive(params: {
      ].join("\n"),
    );
  }
+  if (cleanupErrors.length > 0) {
+    throw new Error(
+      [
+        "Matrix QA cleanup failed after artifacts were written.",
+        ...cleanupErrors,
+        "Artifacts:",
+        `- report: ${reportPath}`,
+        `- summary: ${summaryPath}`,
+        `- observedEvents: ${observedEventsPath}`,
+      ].join("\n"),
+    );
+  }

  return {
    observedEventsPath,
--- a/extensions/qa-lab/src/telegram-live.runtime.ts
+++ b/extensions/qa-lab/src/telegram-live.runtime.ts
@@ -72,6 +72,7 @@ type TelegramQaSummary = {
  groupId: string;
  startedAt: string;
  finishedAt: string;
+  cleanupIssues: string[];
  counts: {
    total: number;
    passed: number;
@@ -428,6 +429,7 @@ async function waitForTelegramChannelRunning(
 }

 function renderTelegramQaMarkdown(params: {
+  cleanupIssues: string[];
  groupId: string;
  startedAt: string;
  finishedAt: string;
@@ -450,6 +452,14 @@ function renderTelegramQaMarkdown(params: {
    lines.push(`- Details: ${scenario.details}`);
    lines.push("");
  }
+  if (params.cleanupIssues.length > 0) {
+    lines.push("## Cleanup");
+    lines.push("");
+    for (const issue of params.cleanupIssues) {
+      lines.push(`- ${issue}`);
+    }
+    lines.push("");
+  }
  return lines.join("\n");
 }

@@ -718,6 +728,7 @@ export async function runTelegramQaLive(params: {
  });

  const scenarioResults: TelegramQaScenarioResult[] = [];
+  const cleanupIssues: string[] = [];
  let canaryFailure: string | null = null;
  try {
    await waitForTelegramChannelRunning(gatewayHarness.gateway, sutAccountId);
@@ -783,7 +794,11 @@ export async function runTelegramQaLive(params: {
      }
    }
  } finally {
-    await gatewayHarness.stop();
+    try {
+      await gatewayHarness.stop();
+    } catch (error) {
+      cleanupIssues.push(`live gateway cleanup: ${formatErrorMessage(error)}`);
+    }
  }

  const finishedAt = new Date().toISOString();
@@ -791,6 +806,7 @@ export async function runTelegramQaLive(params: {
    groupId: runtimeEnv.groupId,
    startedAt,
    finishedAt,
+    cleanupIssues,
    counts: {
      total: scenarioResults.length,
      passed: scenarioResults.filter((entry) => entry.status === "pass").length,
@@ -804,6 +820,7 @@ export async function runTelegramQaLive(params: {
  await fs.writeFile(
    reportPath,
    `${renderTelegramQaMarkdown({
+      cleanupIssues,
      groupId: runtimeEnv.groupId,
      startedAt,
      finishedAt,
@@ -832,6 +849,18 @@ export async function runTelegramQaLive(params: {
      `${canaryFailure}\nArtifacts:\n- report: ${reportPath}\n- summary: ${summaryPath}\n- observedMessages: ${observedMessagesPath}`,
    );
  }
+  if (cleanupIssues.length > 0) {
+    throw new Error(
+      [
+        "Telegram QA cleanup failed after artifacts were written.",
+        ...cleanupIssues,
+        "Artifacts:",
+        `- report: ${reportPath}`,
+        `- summary: ${summaryPath}`,
+        `- observedMessages: ${observedMessagesPath}`,
+      ].join("\n"),
+    );
+  }

  return {
    outputDir,