qa-lab: standardize live transport scenario coverage

This commit is contained in:
Gustavo Madeira Santana
2026-04-10 15:54:19 -04:00
parent 70379ded4a
commit 362446d890
8 changed files with 362 additions and 33 deletions

View File

@@ -64,6 +64,36 @@ the real Matrix plugin inside a QA gateway child. The live transport lane keeps
the child config scoped to the transport under test, so Matrix runs without
`qa-channel` in the child config.
Live transport lanes now share one smaller contract instead of each inventing
their own scenario list shape:
- Baseline contract:
- canary
- mention gating
- sender allowlist block
- top-level reply shape
- restart resume
- Capability add-ons:
- thread follow-up
- thread isolation
- reaction observation
- help command
Current coverage:
- Matrix:
- baseline contract
- thread follow-up
- thread isolation
- reaction observation
- Telegram:
- canary
- help command
This keeps `qa-channel` as the broad product-behavior suite while Matrix,
Telegram, and future live transports share one explicit transport-contract
checklist.
For a disposable Linux VM lane without bringing Docker into the QA path, run:
```bash

View File

@@ -70,6 +70,34 @@ These commands sit beside the main test suites when you need QA-lab realism:
- Provisions three temporary Matrix users (`driver`, `sut`, `observer`) plus one private room, then starts a QA gateway child with the real Matrix plugin as the SUT transport.
- Uses the pinned stable Tuwunel image `ghcr.io/matrix-construct/tuwunel:v1.5.1` by default. Override with `OPENCLAW_QA_MATRIX_TUWUNEL_IMAGE` when you need to test a different image.
- Writes a Matrix QA report, summary, and observed-events artifact under `.artifacts/qa-e2e/...`.
- `pnpm openclaw qa telegram`
- Runs the Telegram live QA lane against a real private group using the driver and SUT bot tokens from env.
- Writes a Telegram QA report, summary, and observed-messages artifact under `.artifacts/qa-e2e/...`.
Live transport lanes share one standard contract so new transports do not drift:
- Baseline:
- canary
- mention gating
- sender allowlist block
- top-level reply shape
- restart resume
- Capability add-ons:
- thread follow-up
- thread isolation
- reaction observation
- help command
Current lane coverage:
- Matrix:
- baseline contract
- thread follow-up
- thread isolation
- reaction observation
- Telegram:
- canary
- help command
## Test suites (what runs where)

View File

@@ -0,0 +1,76 @@
import { describe, expect, it } from "vitest";
import {
LIVE_TRANSPORT_BASELINE_STANDARD_SCENARIO_IDS,
collectLiveTransportStandardScenarioCoverage,
findMissingLiveTransportStandardScenarios,
selectLiveTransportScenarios,
} from "./live-transport-scenarios.js";
describe("live transport scenario helpers", () => {
it("keeps the repo-wide baseline contract ordered", () => {
expect(LIVE_TRANSPORT_BASELINE_STANDARD_SCENARIO_IDS).toEqual([
"canary",
"mention-gating",
"allowlist-block",
"top-level-reply-shape",
"restart-resume",
]);
});
it("selects requested scenarios and reports unknown ids with the lane label", () => {
const definitions = [
{ id: "alpha", timeoutMs: 1_000, title: "alpha" },
{ id: "beta", timeoutMs: 1_000, title: "beta" },
] as const;
expect(
selectLiveTransportScenarios({
ids: ["beta"],
laneLabel: "Demo",
scenarios: definitions,
}),
).toEqual([definitions[1]]);
expect(() =>
selectLiveTransportScenarios({
ids: ["alpha", "missing"],
laneLabel: "Demo",
scenarios: definitions,
}),
).toThrow("unknown Demo QA scenario id(s): missing");
});
it("dedupes always-on and scenario-backed standard coverage", () => {
const covered = collectLiveTransportStandardScenarioCoverage({
alwaysOnStandardScenarioIds: ["canary"],
scenarios: [
{
id: "scenario-1",
standardId: "mention-gating",
timeoutMs: 1_000,
title: "mention",
},
{
id: "scenario-2",
standardId: "mention-gating",
timeoutMs: 1_000,
title: "mention again",
},
{
id: "scenario-3",
standardId: "restart-resume",
timeoutMs: 1_000,
title: "restart",
},
],
});
expect(covered).toEqual(["canary", "mention-gating", "restart-resume"]);
expect(
findMissingLiveTransportStandardScenarios({
coveredStandardScenarioIds: covered,
expectedStandardScenarioIds: LIVE_TRANSPORT_BASELINE_STANDARD_SCENARIO_IDS,
}),
).toEqual(["allowlist-block", "top-level-reply-shape"]);
});
});

View File

@@ -0,0 +1,149 @@
export type LiveTransportStandardScenarioId =
| "canary"
| "mention-gating"
| "allowlist-block"
| "top-level-reply-shape"
| "restart-resume"
| "thread-follow-up"
| "thread-isolation"
| "reaction-observation"
| "help-command";
export type LiveTransportScenarioDefinition<TId extends string = string> = {
id: TId;
standardId?: LiveTransportStandardScenarioId;
timeoutMs: number;
title: string;
};
export type LiveTransportStandardScenarioDefinition = {
description: string;
id: LiveTransportStandardScenarioId;
title: string;
};
export const LIVE_TRANSPORT_STANDARD_SCENARIOS: readonly LiveTransportStandardScenarioDefinition[] =
[
{
id: "canary",
title: "Transport canary",
description: "The lane can trigger one known-good reply on the real transport.",
},
{
id: "mention-gating",
title: "Mention gating",
description: "Messages without the required mention do not trigger a reply.",
},
{
id: "allowlist-block",
title: "Sender allowlist block",
description: "Non-allowlisted senders do not trigger a reply.",
},
{
id: "top-level-reply-shape",
title: "Top-level reply shape",
description: "Top-level replies stay top-level when the lane is configured that way.",
},
{
id: "restart-resume",
title: "Restart resume",
description: "The lane still responds after a gateway restart.",
},
{
id: "thread-follow-up",
title: "Thread follow-up",
description: "Threaded prompts receive threaded replies with the expected relation metadata.",
},
{
id: "thread-isolation",
title: "Thread isolation",
description: "Fresh top-level prompts stay out of prior threads.",
},
{
id: "reaction-observation",
title: "Reaction observation",
description: "Reaction events are observed and normalized correctly.",
},
{
id: "help-command",
title: "Help command",
description: "The transport-specific help command path replies successfully.",
},
] as const;
export const LIVE_TRANSPORT_BASELINE_STANDARD_SCENARIO_IDS: readonly LiveTransportStandardScenarioId[] =
[
"canary",
"mention-gating",
"allowlist-block",
"top-level-reply-shape",
"restart-resume",
] as const;
const LIVE_TRANSPORT_STANDARD_SCENARIO_ID_SET = new Set(
LIVE_TRANSPORT_STANDARD_SCENARIOS.map((scenario) => scenario.id),
);
function assertKnownStandardScenarioIds(ids: readonly LiveTransportStandardScenarioId[]) {
for (const id of ids) {
if (!LIVE_TRANSPORT_STANDARD_SCENARIO_ID_SET.has(id)) {
throw new Error(`unknown live transport standard scenario id: ${id}`);
}
}
}
export function selectLiveTransportScenarios<TDefinition extends { id: string }>(params: {
ids?: string[];
laneLabel: string;
scenarios: readonly TDefinition[];
}) {
if (!params.ids || params.ids.length === 0) {
return [...params.scenarios];
}
const requested = new Set(params.ids);
const selected = params.scenarios.filter((scenario) => params.ids?.includes(scenario.id));
const missingIds = [...requested].filter(
(id) => !selected.some((scenario) => scenario.id === id),
);
if (missingIds.length > 0) {
throw new Error(`unknown ${params.laneLabel} QA scenario id(s): ${missingIds.join(", ")}`);
}
return selected;
}
export function collectLiveTransportStandardScenarioCoverage<TId extends string>(params: {
alwaysOnStandardScenarioIds?: readonly LiveTransportStandardScenarioId[];
scenarios: readonly LiveTransportScenarioDefinition<TId>[];
}) {
const coverage: LiveTransportStandardScenarioId[] = [];
const seen = new Set<LiveTransportStandardScenarioId>();
const append = (id: LiveTransportStandardScenarioId | undefined) => {
if (!id || seen.has(id)) {
return;
}
seen.add(id);
coverage.push(id);
};
assertKnownStandardScenarioIds(params.alwaysOnStandardScenarioIds ?? []);
for (const id of params.alwaysOnStandardScenarioIds ?? []) {
append(id);
}
for (const scenario of params.scenarios) {
if (scenario.standardId) {
assertKnownStandardScenarioIds([scenario.standardId]);
}
append(scenario.standardId);
}
return coverage;
}
export function findMissingLiveTransportStandardScenarios(params: {
coveredStandardScenarioIds: readonly LiveTransportStandardScenarioId[];
expectedStandardScenarioIds: readonly LiveTransportStandardScenarioId[];
}) {
assertKnownStandardScenarioIds(params.coveredStandardScenarioIds);
assertKnownStandardScenarioIds(params.expectedStandardScenarioIds);
const covered = new Set(params.coveredStandardScenarioIds);
return params.expectedStandardScenarioIds.filter((id) => !covered.has(id));
}

View File

@@ -1,4 +1,8 @@
import { describe, expect, it } from "vitest";
import {
LIVE_TRANSPORT_BASELINE_STANDARD_SCENARIO_IDS,
findMissingLiveTransportStandardScenarios,
} from "./live-transport-scenarios.js";
import { __testing as scenarioTesting } from "./matrix-live-scenarios.js";
describe("matrix live qa scenarios", () => {
@@ -25,4 +29,23 @@ describe("matrix live qa scenarios", () => {
scenarioTesting.findMatrixQaScenarios(["matrix-thread-follow-up", "typo-scenario"]),
).toThrow("unknown Matrix QA scenario id(s): typo-scenario");
});
it("covers the baseline live transport contract plus Matrix-specific extras", () => {
expect(scenarioTesting.MATRIX_QA_STANDARD_SCENARIO_IDS).toEqual([
"canary",
"thread-follow-up",
"thread-isolation",
"top-level-reply-shape",
"reaction-observation",
"restart-resume",
"mention-gating",
"allowlist-block",
]);
expect(
findMissingLiveTransportStandardScenarios({
coveredStandardScenarioIds: scenarioTesting.MATRIX_QA_STANDARD_SCENARIO_IDS,
expectedStandardScenarioIds: LIVE_TRANSPORT_BASELINE_STANDARD_SCENARIO_IDS,
}),
).toEqual([]);
});
});

View File

@@ -1,4 +1,9 @@
import { randomUUID } from "node:crypto";
import {
collectLiveTransportStandardScenarioCoverage,
selectLiveTransportScenarios,
type LiveTransportScenarioDefinition,
} from "./live-transport-scenarios.js";
import { createMatrixQaClient, type MatrixQaObservedEvent } from "./matrix-driver-client.js";
export type MatrixQaScenarioId =
@@ -10,11 +15,7 @@ export type MatrixQaScenarioId =
| "matrix-mention-gating"
| "matrix-allowlist-block";
export type MatrixQaScenarioDefinition = {
id: MatrixQaScenarioId;
timeoutMs: number;
title: string;
};
export type MatrixQaScenarioDefinition = LiveTransportScenarioDefinition<MatrixQaScenarioId>;
export type MatrixQaReplyArtifact = {
bodyPreview?: string;
@@ -78,54 +79,59 @@ const NO_REPLY_WINDOW_MS = 8_000;
export const MATRIX_QA_SCENARIOS: MatrixQaScenarioDefinition[] = [
{
id: "matrix-thread-follow-up",
standardId: "thread-follow-up",
timeoutMs: 60_000,
title: "Matrix thread follow-up reply",
},
{
id: "matrix-thread-isolation",
standardId: "thread-isolation",
timeoutMs: 75_000,
title: "Matrix top-level reply stays out of prior thread",
},
{
id: "matrix-top-level-reply-shape",
standardId: "top-level-reply-shape",
timeoutMs: 45_000,
title: "Matrix top-level reply keeps replyToMode off",
},
{
id: "matrix-reaction-notification",
standardId: "reaction-observation",
timeoutMs: 45_000,
title: "Matrix reactions on bot replies are observed",
},
{
id: "matrix-restart-resume",
standardId: "restart-resume",
timeoutMs: 60_000,
title: "Matrix lane resumes cleanly after gateway restart",
},
{
id: "matrix-mention-gating",
standardId: "mention-gating",
timeoutMs: NO_REPLY_WINDOW_MS,
title: "Matrix room message without mention does not trigger",
},
{
id: "matrix-allowlist-block",
standardId: "allowlist-block",
timeoutMs: NO_REPLY_WINDOW_MS,
title: "Matrix allowlist blocks non-driver replies",
},
];
export const MATRIX_QA_STANDARD_SCENARIO_IDS = collectLiveTransportStandardScenarioCoverage({
alwaysOnStandardScenarioIds: ["canary"],
scenarios: MATRIX_QA_SCENARIOS,
});
export function findMatrixQaScenarios(ids?: string[]) {
if (!ids || ids.length === 0) {
return [...MATRIX_QA_SCENARIOS];
}
const requested = new Set(ids);
const selected = MATRIX_QA_SCENARIOS.filter((scenario) => ids.includes(scenario.id));
const missingIds = [...requested].filter(
(id) => !selected.some((scenario) => scenario.id === id),
);
if (missingIds.length > 0) {
throw new Error(`unknown Matrix QA scenario id(s): ${missingIds.join(", ")}`);
}
return selected;
return selectLiveTransportScenarios({
ids,
laneLabel: "Matrix",
scenarios: MATRIX_QA_SCENARIOS,
});
}
export function buildMentionPrompt(sutUserId: string, token: string) {
@@ -571,6 +577,7 @@ export async function runMatrixQaScenario(
}
export const __testing = {
MATRIX_QA_STANDARD_SCENARIO_IDS,
buildMatrixReplyDetails,
buildMentionPrompt,
findMatrixQaScenarios,

View File

@@ -1,5 +1,9 @@
import type { OpenClawConfig } from "openclaw/plugin-sdk/config-runtime";
import { afterEach, describe, expect, it, vi } from "vitest";
import {
LIVE_TRANSPORT_BASELINE_STANDARD_SCENARIO_IDS,
findMissingLiveTransportStandardScenarios,
} from "./live-transport-scenarios.js";
import { __testing } from "./telegram-live.runtime.js";
const fetchWithSsrFGuardMock = vi.hoisted(() =>
@@ -220,6 +224,16 @@ describe("telegram live qa runtime", () => {
);
});
it("tracks Telegram live coverage against the shared transport contract", () => {
expect(__testing.TELEGRAM_QA_STANDARD_SCENARIO_IDS).toEqual(["canary", "help-command"]);
expect(
findMissingLiveTransportStandardScenarios({
coveredStandardScenarioIds: __testing.TELEGRAM_QA_STANDARD_SCENARIO_IDS,
expectedStandardScenarioIds: LIVE_TRANSPORT_BASELINE_STANDARD_SCENARIO_IDS,
}),
).toEqual(["mention-gating", "allowlist-block", "top-level-reply-shape", "restart-resume"]);
});
it("adds an abort deadline to Telegram API requests", async () => {
let signal: AbortSignal | undefined;
vi.stubGlobal(

View File

@@ -6,6 +6,11 @@ import { fetchWithSsrFGuard } from "openclaw/plugin-sdk/ssrf-runtime";
import { startQaGatewayChild } from "./gateway-child.js";
import { startQaLiveLaneGateway } from "./live-gateway.runtime.js";
import { appendLiveLaneIssue, buildLiveLaneArtifactsError } from "./live-lane-helpers.js";
import {
collectLiveTransportStandardScenarioCoverage,
selectLiveTransportScenarios,
type LiveTransportScenarioDefinition,
} from "./live-transport-scenarios.js";
import {
defaultQaModelForMode,
normalizeQaProviderMode,
@@ -25,10 +30,7 @@ type TelegramBotIdentity = {
username?: string;
};
type TelegramQaScenarioDefinition = {
id: "telegram-help-command";
title: string;
timeoutMs: number;
type TelegramQaScenarioDefinition = LiveTransportScenarioDefinition<"telegram-help-command"> & {
buildInput: (sutUsername: string) => string;
};
@@ -157,12 +159,18 @@ type TelegramSendMessageResult = {
const TELEGRAM_QA_SCENARIOS: TelegramQaScenarioDefinition[] = [
{
id: "telegram-help-command",
standardId: "help-command",
title: "Telegram help command reply",
timeoutMs: 45_000,
buildInput: (sutUsername) => `/help@${sutUsername}`,
},
];
export const TELEGRAM_QA_STANDARD_SCENARIO_IDS = collectLiveTransportStandardScenarioCoverage({
alwaysOnStandardScenarioIds: ["canary"],
scenarios: TELEGRAM_QA_SCENARIOS,
});
const TELEGRAM_QA_ENV_KEYS = [
"OPENCLAW_QA_TELEGRAM_GROUP_ID",
"OPENCLAW_QA_TELEGRAM_DRIVER_BOT_TOKEN",
@@ -487,18 +495,11 @@ function buildObservedMessagesArtifact(params: {
}
function findScenario(ids?: string[]) {
if (!ids || ids.length === 0) {
return [...TELEGRAM_QA_SCENARIOS];
}
const requested = new Set(ids);
const selected = TELEGRAM_QA_SCENARIOS.filter((scenario) => ids.includes(scenario.id));
const missingIds = [...requested].filter(
(id) => !selected.some((scenario) => scenario.id === id),
);
if (missingIds.length > 0) {
throw new Error(`unknown Telegram QA scenario id(s): ${missingIds.join(", ")}`);
}
return selected;
return selectLiveTransportScenarios({
ids,
laneLabel: "Telegram",
scenarios: TELEGRAM_QA_SCENARIOS,
});
}
function classifyCanaryReply(params: {
@@ -879,6 +880,7 @@ export async function runTelegramQaLive(params: {
export const __testing = {
TELEGRAM_QA_SCENARIOS,
TELEGRAM_QA_STANDARD_SCENARIO_IDS,
buildTelegramQaConfig,
buildObservedMessagesArtifact,
canaryFailureMessage,