qa-lab: standardize live transport scenario coverage

2026-04-17 04:01:05 +00:00 · 2026-04-10 15:54:19 -04:00
parent 70379ded4a
commit 362446d890
8 changed files with 362 additions and 33 deletions
--- a/docs/concepts/qa-e2e-automation.md
+++ b/docs/concepts/qa-e2e-automation.md
@@ -64,6 +64,36 @@ the real Matrix plugin inside a QA gateway child. The live transport lane keeps
 the child config scoped to the transport under test, so Matrix runs without
 `qa-channel` in the child config.

+Live transport lanes now share one smaller contract instead of each inventing
+their own scenario list shape:
+
+- Baseline contract:
+  - canary
+  - mention gating
+  - sender allowlist block
+  - top-level reply shape
+  - restart resume
+- Capability add-ons:
+  - thread follow-up
+  - thread isolation
+  - reaction observation
+  - help command
+
+Current coverage:
+
+- Matrix:
+  - baseline contract
+  - thread follow-up
+  - thread isolation
+  - reaction observation
+- Telegram:
+  - canary
+  - help command
+
+This keeps `qa-channel` as the broad product-behavior suite while Matrix,
+Telegram, and future live transports share one explicit transport-contract
+checklist.
+
 For a disposable Linux VM lane without bringing Docker into the QA path, run:

 ```bash
--- a/docs/help/testing.md
+++ b/docs/help/testing.md
@@ -70,6 +70,34 @@ These commands sit beside the main test suites when you need QA-lab realism:
  - Provisions three temporary Matrix users (`driver`, `sut`, `observer`) plus one private room, then starts a QA gateway child with the real Matrix plugin as the SUT transport.
  - Uses the pinned stable Tuwunel image `ghcr.io/matrix-construct/tuwunel:v1.5.1` by default. Override with `OPENCLAW_QA_MATRIX_TUWUNEL_IMAGE` when you need to test a different image.
  - Writes a Matrix QA report, summary, and observed-events artifact under `.artifacts/qa-e2e/...`.
+- `pnpm openclaw qa telegram`
+  - Runs the Telegram live QA lane against a real private group using the driver and SUT bot tokens from env.
+  - Writes a Telegram QA report, summary, and observed-messages artifact under `.artifacts/qa-e2e/...`.
+
+Live transport lanes share one standard contract so new transports do not drift:
+
+- Baseline:
+  - canary
+  - mention gating
+  - sender allowlist block
+  - top-level reply shape
+  - restart resume
+- Capability add-ons:
+  - thread follow-up
+  - thread isolation
+  - reaction observation
+  - help command
+
+Current lane coverage:
+
+- Matrix:
+  - baseline contract
+  - thread follow-up
+  - thread isolation
+  - reaction observation
+- Telegram:
+  - canary
+  - help command

 ## Test suites (what runs where)

--- a/extensions/qa-lab/src/live-transport-scenarios.test.ts
+++ b/extensions/qa-lab/src/live-transport-scenarios.test.ts
@@ -0,0 +1,76 @@
+import { describe, expect, it } from "vitest";
+import {
+  LIVE_TRANSPORT_BASELINE_STANDARD_SCENARIO_IDS,
+  collectLiveTransportStandardScenarioCoverage,
+  findMissingLiveTransportStandardScenarios,
+  selectLiveTransportScenarios,
+} from "./live-transport-scenarios.js";
+
+describe("live transport scenario helpers", () => {
+  it("keeps the repo-wide baseline contract ordered", () => {
+    expect(LIVE_TRANSPORT_BASELINE_STANDARD_SCENARIO_IDS).toEqual([
+      "canary",
+      "mention-gating",
+      "allowlist-block",
+      "top-level-reply-shape",
+      "restart-resume",
+    ]);
+  });
+
+  it("selects requested scenarios and reports unknown ids with the lane label", () => {
+    const definitions = [
+      { id: "alpha", timeoutMs: 1_000, title: "alpha" },
+      { id: "beta", timeoutMs: 1_000, title: "beta" },
+    ] as const;
+
+    expect(
+      selectLiveTransportScenarios({
+        ids: ["beta"],
+        laneLabel: "Demo",
+        scenarios: definitions,
+      }),
+    ).toEqual([definitions[1]]);
+
+    expect(() =>
+      selectLiveTransportScenarios({
+        ids: ["alpha", "missing"],
+        laneLabel: "Demo",
+        scenarios: definitions,
+      }),
+    ).toThrow("unknown Demo QA scenario id(s): missing");
+  });
+
+  it("dedupes always-on and scenario-backed standard coverage", () => {
+    const covered = collectLiveTransportStandardScenarioCoverage({
+      alwaysOnStandardScenarioIds: ["canary"],
+      scenarios: [
+        {
+          id: "scenario-1",
+          standardId: "mention-gating",
+          timeoutMs: 1_000,
+          title: "mention",
+        },
+        {
+          id: "scenario-2",
+          standardId: "mention-gating",
+          timeoutMs: 1_000,
+          title: "mention again",
+        },
+        {
+          id: "scenario-3",
+          standardId: "restart-resume",
+          timeoutMs: 1_000,
+          title: "restart",
+        },
+      ],
+    });
+
+    expect(covered).toEqual(["canary", "mention-gating", "restart-resume"]);
+    expect(
+      findMissingLiveTransportStandardScenarios({
+        coveredStandardScenarioIds: covered,
+        expectedStandardScenarioIds: LIVE_TRANSPORT_BASELINE_STANDARD_SCENARIO_IDS,
+      }),
+    ).toEqual(["allowlist-block", "top-level-reply-shape"]);
+  });
+});
--- a/extensions/qa-lab/src/live-transport-scenarios.ts
+++ b/extensions/qa-lab/src/live-transport-scenarios.ts
@@ -0,0 +1,149 @@
+export type LiveTransportStandardScenarioId =
+  | "canary"
+  | "mention-gating"
+  | "allowlist-block"
+  | "top-level-reply-shape"
+  | "restart-resume"
+  | "thread-follow-up"
+  | "thread-isolation"
+  | "reaction-observation"
+  | "help-command";
+
+export type LiveTransportScenarioDefinition<TId extends string = string> = {
+  id: TId;
+  standardId?: LiveTransportStandardScenarioId;
+  timeoutMs: number;
+  title: string;
+};
+
+export type LiveTransportStandardScenarioDefinition = {
+  description: string;
+  id: LiveTransportStandardScenarioId;
+  title: string;
+};
+
+export const LIVE_TRANSPORT_STANDARD_SCENARIOS: readonly LiveTransportStandardScenarioDefinition[] =
+  [
+    {
+      id: "canary",
+      title: "Transport canary",
+      description: "The lane can trigger one known-good reply on the real transport.",
+    },
+    {
+      id: "mention-gating",
+      title: "Mention gating",
+      description: "Messages without the required mention do not trigger a reply.",
+    },
+    {
+      id: "allowlist-block",
+      title: "Sender allowlist block",
+      description: "Non-allowlisted senders do not trigger a reply.",
+    },
+    {
+      id: "top-level-reply-shape",
+      title: "Top-level reply shape",
+      description: "Top-level replies stay top-level when the lane is configured that way.",
+    },
+    {
+      id: "restart-resume",
+      title: "Restart resume",
+      description: "The lane still responds after a gateway restart.",
+    },
+    {
+      id: "thread-follow-up",
+      title: "Thread follow-up",
+      description: "Threaded prompts receive threaded replies with the expected relation metadata.",
+    },
+    {
+      id: "thread-isolation",
+      title: "Thread isolation",
+      description: "Fresh top-level prompts stay out of prior threads.",
+    },
+    {
+      id: "reaction-observation",
+      title: "Reaction observation",
+      description: "Reaction events are observed and normalized correctly.",
+    },
+    {
+      id: "help-command",
+      title: "Help command",
+      description: "The transport-specific help command path replies successfully.",
+    },
+  ] as const;
+
+export const LIVE_TRANSPORT_BASELINE_STANDARD_SCENARIO_IDS: readonly LiveTransportStandardScenarioId[] =
+  [
+    "canary",
+    "mention-gating",
+    "allowlist-block",
+    "top-level-reply-shape",
+    "restart-resume",
+  ] as const;
+
+const LIVE_TRANSPORT_STANDARD_SCENARIO_ID_SET = new Set(
+  LIVE_TRANSPORT_STANDARD_SCENARIOS.map((scenario) => scenario.id),
+);
+
+function assertKnownStandardScenarioIds(ids: readonly LiveTransportStandardScenarioId[]) {
+  for (const id of ids) {
+    if (!LIVE_TRANSPORT_STANDARD_SCENARIO_ID_SET.has(id)) {
+      throw new Error(`unknown live transport standard scenario id: ${id}`);
+    }
+  }
+}
+
+export function selectLiveTransportScenarios<TDefinition extends { id: string }>(params: {
+  ids?: string[];
+  laneLabel: string;
+  scenarios: readonly TDefinition[];
+}) {
+  if (!params.ids || params.ids.length === 0) {
+    return [...params.scenarios];
+  }
+  const requested = new Set(params.ids);
+  const selected = params.scenarios.filter((scenario) => params.ids?.includes(scenario.id));
+  const missingIds = [...requested].filter(
+    (id) => !selected.some((scenario) => scenario.id === id),
+  );
+  if (missingIds.length > 0) {
+    throw new Error(`unknown ${params.laneLabel} QA scenario id(s): ${missingIds.join(", ")}`);
+  }
+  return selected;
+}
+
+export function collectLiveTransportStandardScenarioCoverage<TId extends string>(params: {
+  alwaysOnStandardScenarioIds?: readonly LiveTransportStandardScenarioId[];
+  scenarios: readonly LiveTransportScenarioDefinition<TId>[];
+}) {
+  const coverage: LiveTransportStandardScenarioId[] = [];
+  const seen = new Set<LiveTransportStandardScenarioId>();
+  const append = (id: LiveTransportStandardScenarioId | undefined) => {
+    if (!id || seen.has(id)) {
+      return;
+    }
+    seen.add(id);
+    coverage.push(id);
+  };
+
+  assertKnownStandardScenarioIds(params.alwaysOnStandardScenarioIds ?? []);
+  for (const id of params.alwaysOnStandardScenarioIds ?? []) {
+    append(id);
+  }
+  for (const scenario of params.scenarios) {
+    if (scenario.standardId) {
+      assertKnownStandardScenarioIds([scenario.standardId]);
+    }
+    append(scenario.standardId);
+  }
+  return coverage;
+}
+
+export function findMissingLiveTransportStandardScenarios(params: {
+  coveredStandardScenarioIds: readonly LiveTransportStandardScenarioId[];
+  expectedStandardScenarioIds: readonly LiveTransportStandardScenarioId[];
+}) {
+  assertKnownStandardScenarioIds(params.coveredStandardScenarioIds);
+  assertKnownStandardScenarioIds(params.expectedStandardScenarioIds);
+  const covered = new Set(params.coveredStandardScenarioIds);
+  return params.expectedStandardScenarioIds.filter((id) => !covered.has(id));
+}
--- a/extensions/qa-lab/src/matrix-live-scenarios.test.ts
+++ b/extensions/qa-lab/src/matrix-live-scenarios.test.ts
@@ -1,4 +1,8 @@
 import { describe, expect, it } from "vitest";
+import {
+  LIVE_TRANSPORT_BASELINE_STANDARD_SCENARIO_IDS,
+  findMissingLiveTransportStandardScenarios,
+} from "./live-transport-scenarios.js";
 import { __testing as scenarioTesting } from "./matrix-live-scenarios.js";

 describe("matrix live qa scenarios", () => {
@@ -25,4 +29,23 @@ describe("matrix live qa scenarios", () => {
      scenarioTesting.findMatrixQaScenarios(["matrix-thread-follow-up", "typo-scenario"]),
    ).toThrow("unknown Matrix QA scenario id(s): typo-scenario");
  });
+
+  it("covers the baseline live transport contract plus Matrix-specific extras", () => {
+    expect(scenarioTesting.MATRIX_QA_STANDARD_SCENARIO_IDS).toEqual([
+      "canary",
+      "thread-follow-up",
+      "thread-isolation",
+      "top-level-reply-shape",
+      "reaction-observation",
+      "restart-resume",
+      "mention-gating",
+      "allowlist-block",
+    ]);
+    expect(
+      findMissingLiveTransportStandardScenarios({
+        coveredStandardScenarioIds: scenarioTesting.MATRIX_QA_STANDARD_SCENARIO_IDS,
+        expectedStandardScenarioIds: LIVE_TRANSPORT_BASELINE_STANDARD_SCENARIO_IDS,
+      }),
+    ).toEqual([]);
+  });
 });
--- a/extensions/qa-lab/src/matrix-live-scenarios.ts
+++ b/extensions/qa-lab/src/matrix-live-scenarios.ts
@@ -1,4 +1,9 @@
 import { randomUUID } from "node:crypto";
+import {
+  collectLiveTransportStandardScenarioCoverage,
+  selectLiveTransportScenarios,
+  type LiveTransportScenarioDefinition,
+} from "./live-transport-scenarios.js";
 import { createMatrixQaClient, type MatrixQaObservedEvent } from "./matrix-driver-client.js";

 export type MatrixQaScenarioId =
@@ -10,11 +15,7 @@ export type MatrixQaScenarioId =
  | "matrix-mention-gating"
  | "matrix-allowlist-block";

-export type MatrixQaScenarioDefinition = {
-  id: MatrixQaScenarioId;
-  timeoutMs: number;
-  title: string;
-};
+export type MatrixQaScenarioDefinition = LiveTransportScenarioDefinition<MatrixQaScenarioId>;

 export type MatrixQaReplyArtifact = {
  bodyPreview?: string;
@@ -78,54 +79,59 @@ const NO_REPLY_WINDOW_MS = 8_000;
 export const MATRIX_QA_SCENARIOS: MatrixQaScenarioDefinition[] = [
  {
    id: "matrix-thread-follow-up",
+    standardId: "thread-follow-up",
    timeoutMs: 60_000,
    title: "Matrix thread follow-up reply",
  },
  {
    id: "matrix-thread-isolation",
+    standardId: "thread-isolation",
    timeoutMs: 75_000,
    title: "Matrix top-level reply stays out of prior thread",
  },
  {
    id: "matrix-top-level-reply-shape",
+    standardId: "top-level-reply-shape",
    timeoutMs: 45_000,
    title: "Matrix top-level reply keeps replyToMode off",
  },
  {
    id: "matrix-reaction-notification",
+    standardId: "reaction-observation",
    timeoutMs: 45_000,
    title: "Matrix reactions on bot replies are observed",
  },
  {
    id: "matrix-restart-resume",
+    standardId: "restart-resume",
    timeoutMs: 60_000,
    title: "Matrix lane resumes cleanly after gateway restart",
  },
  {
    id: "matrix-mention-gating",
+    standardId: "mention-gating",
    timeoutMs: NO_REPLY_WINDOW_MS,
    title: "Matrix room message without mention does not trigger",
  },
  {
    id: "matrix-allowlist-block",
+    standardId: "allowlist-block",
    timeoutMs: NO_REPLY_WINDOW_MS,
    title: "Matrix allowlist blocks non-driver replies",
  },
 ];

+export const MATRIX_QA_STANDARD_SCENARIO_IDS = collectLiveTransportStandardScenarioCoverage({
+  alwaysOnStandardScenarioIds: ["canary"],
+  scenarios: MATRIX_QA_SCENARIOS,
+});
+
 export function findMatrixQaScenarios(ids?: string[]) {
-  if (!ids || ids.length === 0) {
-    return [...MATRIX_QA_SCENARIOS];
-  }
-  const requested = new Set(ids);
-  const selected = MATRIX_QA_SCENARIOS.filter((scenario) => ids.includes(scenario.id));
-  const missingIds = [...requested].filter(
-    (id) => !selected.some((scenario) => scenario.id === id),
-  );
-  if (missingIds.length > 0) {
-    throw new Error(`unknown Matrix QA scenario id(s): ${missingIds.join(", ")}`);
-  }
-  return selected;
+  return selectLiveTransportScenarios({
+    ids,
+    laneLabel: "Matrix",
+    scenarios: MATRIX_QA_SCENARIOS,
+  });
 }

 export function buildMentionPrompt(sutUserId: string, token: string) {
@@ -571,6 +577,7 @@ export async function runMatrixQaScenario(
 }

 export const __testing = {
+  MATRIX_QA_STANDARD_SCENARIO_IDS,
  buildMatrixReplyDetails,
  buildMentionPrompt,
  findMatrixQaScenarios,
--- a/extensions/qa-lab/src/telegram-live.runtime.test.ts
+++ b/extensions/qa-lab/src/telegram-live.runtime.test.ts
@@ -1,5 +1,9 @@
 import type { OpenClawConfig } from "openclaw/plugin-sdk/config-runtime";
 import { afterEach, describe, expect, it, vi } from "vitest";
+import {
+  LIVE_TRANSPORT_BASELINE_STANDARD_SCENARIO_IDS,
+  findMissingLiveTransportStandardScenarios,
+} from "./live-transport-scenarios.js";
 import { __testing } from "./telegram-live.runtime.js";

 const fetchWithSsrFGuardMock = vi.hoisted(() =>
@@ -220,6 +224,16 @@ describe("telegram live qa runtime", () => {
    );
  });

+  it("tracks Telegram live coverage against the shared transport contract", () => {
+    expect(__testing.TELEGRAM_QA_STANDARD_SCENARIO_IDS).toEqual(["canary", "help-command"]);
+    expect(
+      findMissingLiveTransportStandardScenarios({
+        coveredStandardScenarioIds: __testing.TELEGRAM_QA_STANDARD_SCENARIO_IDS,
+        expectedStandardScenarioIds: LIVE_TRANSPORT_BASELINE_STANDARD_SCENARIO_IDS,
+      }),
+    ).toEqual(["mention-gating", "allowlist-block", "top-level-reply-shape", "restart-resume"]);
+  });
+
  it("adds an abort deadline to Telegram API requests", async () => {
    let signal: AbortSignal | undefined;
    vi.stubGlobal(
--- a/extensions/qa-lab/src/telegram-live.runtime.ts
+++ b/extensions/qa-lab/src/telegram-live.runtime.ts
@@ -6,6 +6,11 @@ import { fetchWithSsrFGuard } from "openclaw/plugin-sdk/ssrf-runtime";
 import { startQaGatewayChild } from "./gateway-child.js";
 import { startQaLiveLaneGateway } from "./live-gateway.runtime.js";
 import { appendLiveLaneIssue, buildLiveLaneArtifactsError } from "./live-lane-helpers.js";
+import {
+  collectLiveTransportStandardScenarioCoverage,
+  selectLiveTransportScenarios,
+  type LiveTransportScenarioDefinition,
+} from "./live-transport-scenarios.js";
 import {
  defaultQaModelForMode,
  normalizeQaProviderMode,
@@ -25,10 +30,7 @@ type TelegramBotIdentity = {
  username?: string;
 };

-type TelegramQaScenarioDefinition = {
-  id: "telegram-help-command";
-  title: string;
-  timeoutMs: number;
+type TelegramQaScenarioDefinition = LiveTransportScenarioDefinition<"telegram-help-command"> & {
  buildInput: (sutUsername: string) => string;
 };

@@ -157,12 +159,18 @@ type TelegramSendMessageResult = {
 const TELEGRAM_QA_SCENARIOS: TelegramQaScenarioDefinition[] = [
  {
    id: "telegram-help-command",
+    standardId: "help-command",
    title: "Telegram help command reply",
    timeoutMs: 45_000,
    buildInput: (sutUsername) => `/help@${sutUsername}`,
  },
 ];

+export const TELEGRAM_QA_STANDARD_SCENARIO_IDS = collectLiveTransportStandardScenarioCoverage({
+  alwaysOnStandardScenarioIds: ["canary"],
+  scenarios: TELEGRAM_QA_SCENARIOS,
+});
+
 const TELEGRAM_QA_ENV_KEYS = [
  "OPENCLAW_QA_TELEGRAM_GROUP_ID",
  "OPENCLAW_QA_TELEGRAM_DRIVER_BOT_TOKEN",
@@ -487,18 +495,11 @@ function buildObservedMessagesArtifact(params: {
 }

 function findScenario(ids?: string[]) {
-  if (!ids || ids.length === 0) {
-    return [...TELEGRAM_QA_SCENARIOS];
-  }
-  const requested = new Set(ids);
-  const selected = TELEGRAM_QA_SCENARIOS.filter((scenario) => ids.includes(scenario.id));
-  const missingIds = [...requested].filter(
-    (id) => !selected.some((scenario) => scenario.id === id),
-  );
-  if (missingIds.length > 0) {
-    throw new Error(`unknown Telegram QA scenario id(s): ${missingIds.join(", ")}`);
-  }
-  return selected;
+  return selectLiveTransportScenarios({
+    ids,
+    laneLabel: "Telegram",
+    scenarios: TELEGRAM_QA_SCENARIOS,
+  });
 }

 function classifyCanaryReply(params: {
@@ -879,6 +880,7 @@ export async function runTelegramQaLive(params: {

 export const __testing = {
  TELEGRAM_QA_SCENARIOS,
+  TELEGRAM_QA_STANDARD_SCENARIO_IDS,
  buildTelegramQaConfig,
  buildObservedMessagesArtifact,
  canaryFailureMessage,