test(qa-matrix): isolate flaky beta scenarios

This commit is contained in:
Peter Steinberger
2026-04-15 06:09:18 +01:00
parent 2cc97989d3
commit 7fc5a18d89
6 changed files with 162 additions and 28 deletions

View File

@@ -39,9 +39,12 @@ export type MatrixQaScenarioDefinition = LiveTransportScenarioDefinition<MatrixQ
topology?: MatrixQaTopologySpec;
};
export const MATRIX_QA_BLOCK_ROOM_KEY = "block";
export const MATRIX_QA_DRIVER_DM_ROOM_KEY = "driver-dm";
export const MATRIX_QA_DRIVER_DM_SHARED_ROOM_KEY = "driver-dm-shared";
export const MATRIX_QA_HOMESERVER_ROOM_KEY = "homeserver";
export const MATRIX_QA_MEMBERSHIP_ROOM_KEY = "membership";
export const MATRIX_QA_RESTART_ROOM_KEY = "restart";
export const MATRIX_QA_SECONDARY_ROOM_KEY = "secondary";
function buildMatrixQaDmTopology(
@@ -104,12 +107,30 @@ const MATRIX_QA_SECONDARY_ROOM_TOPOLOGY = buildMatrixQaSingleGroupTopology({
requireMention: true,
});
const MATRIX_QA_BLOCK_ROOM_TOPOLOGY = buildMatrixQaSingleGroupTopology({
key: MATRIX_QA_BLOCK_ROOM_KEY,
name: "Matrix QA Block Streaming Room",
requireMention: true,
});
const MATRIX_QA_MEMBERSHIP_ROOM_TOPOLOGY = buildMatrixQaSingleGroupTopology({
key: MATRIX_QA_MEMBERSHIP_ROOM_KEY,
name: "Matrix QA Membership Room",
requireMention: true,
});
const MATRIX_QA_RESTART_ROOM_TOPOLOGY = buildMatrixQaSingleGroupTopology({
key: MATRIX_QA_RESTART_ROOM_KEY,
name: "Matrix QA Restart Room",
requireMention: true,
});
const MATRIX_QA_HOMESERVER_ROOM_TOPOLOGY = buildMatrixQaSingleGroupTopology({
key: MATRIX_QA_HOMESERVER_ROOM_KEY,
name: "Matrix QA Homeserver Restart Room",
requireMention: true,
});
export const MATRIX_QA_SCENARIOS: MatrixQaScenarioDefinition[] = [
{
id: "matrix-thread-follow-up",
@@ -149,7 +170,20 @@ export const MATRIX_QA_SCENARIOS: MatrixQaScenarioDefinition[] = [
id: "matrix-room-block-streaming",
timeoutMs: 45_000,
title: "Matrix block streaming preserves completed quiet preview blocks",
topology: MATRIX_QA_BLOCK_ROOM_TOPOLOGY,
configOverrides: {
agentDefaults: {
blockStreamingChunk: {
breakPreference: "newline",
maxChars: 48,
minChars: 1,
},
blockStreamingCoalesce: {
idleMs: 0,
maxChars: 48,
minChars: 1,
},
},
blockStreaming: true,
streaming: "quiet",
},
@@ -228,6 +262,7 @@ export const MATRIX_QA_SCENARIOS: MatrixQaScenarioDefinition[] = [
standardId: "restart-resume",
timeoutMs: 60_000,
title: "Matrix lane resumes cleanly after gateway restart",
topology: MATRIX_QA_RESTART_ROOM_TOPOLOGY,
},
{
id: "matrix-room-membership-loss",
@@ -239,6 +274,7 @@ export const MATRIX_QA_SCENARIOS: MatrixQaScenarioDefinition[] = [
id: "matrix-homeserver-restart-resume",
timeoutMs: 75_000,
title: "Matrix lane resumes after homeserver restart",
topology: MATRIX_QA_HOMESERVER_ROOM_TOPOLOGY,
},
{
id: "matrix-mention-gating",

View File

@@ -1,7 +1,10 @@
import { randomUUID } from "node:crypto";
import type { MatrixQaObservedEvent } from "../../substrate/events.js";
import {
MATRIX_QA_BLOCK_ROOM_KEY,
MATRIX_QA_HOMESERVER_ROOM_KEY,
MATRIX_QA_MEMBERSHIP_ROOM_KEY,
MATRIX_QA_RESTART_ROOM_KEY,
resolveMatrixQaScenarioRoomId,
} from "./scenario-catalog.js";
import {
@@ -218,30 +221,51 @@ export async function runRoomThreadReplyOverrideScenario(context: MatrixQaScenar
}
export async function runObserverAllowlistOverrideScenario(context: MatrixQaScenarioContext) {
const result = await runTopLevelMentionScenario({
const { client, startSince } = await primeMatrixQaActorCursor({
accessToken: context.observerAccessToken,
actorId: "observer",
baseUrl: context.baseUrl,
observedEvents: context.observedEvents,
roomId: context.roomId,
syncState: context.syncState,
sutUserId: context.sutUserId,
timeoutMs: context.timeoutMs,
tokenPrefix: "MATRIX_QA_OBSERVER_ALLOWLIST",
});
assertTopLevelReplyArtifact("observer allowlist override reply", result.reply);
const token = `MATRIX_QA_OBSERVER_ALLOWLIST_${randomUUID().slice(0, 8).toUpperCase()}`;
const body = buildMentionPrompt(context.sutUserId, token);
const driverEventId = await client.sendTextMessage({
body,
mentionUserIds: [context.sutUserId],
roomId: context.roomId,
});
const matched = await client.waitForRoomEvent({
observedEvents: context.observedEvents,
predicate: (event) =>
event.roomId === context.roomId &&
event.sender === context.sutUserId &&
event.type === "m.room.message" &&
event.relatesTo === undefined &&
typeof event.body === "string" &&
event.body.trim().length > 0,
roomId: context.roomId,
since: startSince,
timeoutMs: context.timeoutMs,
});
advanceMatrixQaActorCursor({
actorId: "observer",
syncState: context.syncState,
nextSince: matched.since,
startSince,
});
const reply = buildMatrixReplyArtifact(matched.event, token);
return {
artifacts: {
actorUserId: context.observerUserId,
driverEventId: result.driverEventId,
reply: result.reply,
token: result.token,
triggerBody: result.body,
driverEventId,
reply,
token,
triggerBody: body,
},
details: [
`trigger sender: ${context.observerUserId}`,
`driver event: ${result.driverEventId}`,
...buildMatrixReplyDetails("reply", result.reply),
`driver event: ${driverEventId}`,
...buildMatrixReplyDetails("reply", reply),
].join("\n"),
} satisfies MatrixQaScenarioExecution;
}
@@ -312,6 +336,7 @@ export async function runQuietStreamingPreviewScenario(context: MatrixQaScenario
}
export async function runBlockStreamingScenario(context: MatrixQaScenarioContext) {
const roomId = resolveMatrixQaScenarioRoomId(context, MATRIX_QA_BLOCK_ROOM_KEY);
const { client, startSince } = await primeMatrixQaActorCursor({
accessToken: context.driverAccessToken,
actorId: "driver",
@@ -324,27 +349,28 @@ export async function runBlockStreamingScenario(context: MatrixQaScenarioContext
const driverEventId = await client.sendTextMessage({
body: triggerBody,
mentionUserIds: [context.sutUserId],
roomId: context.roomId,
roomId,
});
const firstBlock = await client.waitForRoomEvent({
observedEvents: context.observedEvents,
predicate: (event) =>
event.roomId === context.roomId &&
event.roomId === roomId &&
event.sender === context.sutUserId &&
isMatrixQaMessageLikeKind(event.kind) &&
event.body === firstText,
roomId: context.roomId,
(event.body ?? "").includes(firstText) &&
!(event.body ?? "").includes(secondText),
roomId,
since: startSince,
timeoutMs: context.timeoutMs,
});
const secondBlock = await client.waitForRoomEvent({
observedEvents: context.observedEvents,
predicate: (event) =>
event.roomId === context.roomId &&
event.roomId === roomId &&
event.sender === context.sutUserId &&
isMatrixQaMessageLikeKind(event.kind) &&
event.body === secondText,
roomId: context.roomId,
(event.body ?? "").includes(secondText),
roomId,
since: firstBlock.since,
timeoutMs: context.timeoutMs,
});
@@ -364,10 +390,12 @@ export async function runBlockStreamingScenario(context: MatrixQaScenarioContext
blockEventIds: [firstBlock.event.eventId, secondBlock.event.eventId],
driverEventId,
reply: buildMatrixReplyArtifact(secondBlock.event, secondText),
roomId,
token: secondText,
triggerBody,
},
details: [
`room id: ${roomId}`,
`driver event: ${driverEventId}`,
`block one event: ${firstBlock.event.eventId}`,
`block two event: ${secondBlock.event.eventId}`,
@@ -592,12 +620,13 @@ export async function runHomeserverRestartResumeScenario(context: MatrixQaScenar
if (!context.interruptTransport) {
throw new Error("Matrix homeserver restart scenario requires a transport interruption hook");
}
const roomId = resolveMatrixQaScenarioRoomId(context, MATRIX_QA_HOMESERVER_ROOM_KEY);
await context.interruptTransport();
const resumed = await runDriverTopLevelMentionScenario({
baseUrl: context.baseUrl,
driverAccessToken: context.driverAccessToken,
observedEvents: context.observedEvents,
roomId: context.roomId,
roomId,
syncState: context.syncState,
sutUserId: context.sutUserId,
timeoutMs: context.timeoutMs,
@@ -608,10 +637,12 @@ export async function runHomeserverRestartResumeScenario(context: MatrixQaScenar
artifacts: {
driverEventId: resumed.driverEventId,
reply: resumed.reply,
roomId,
token: resumed.token,
transportInterruption: "homeserver-restart",
},
details: [
`room id: ${roomId}`,
"transport interruption: homeserver-restart",
`driver event: ${resumed.driverEventId}`,
...buildMatrixReplyDetails("reply", resumed.reply),
@@ -623,12 +654,13 @@ export async function runRestartResumeScenario(context: MatrixQaScenarioContext)
if (!context.restartGateway) {
throw new Error("Matrix restart scenario requires a gateway restart callback");
}
const roomId = resolveMatrixQaScenarioRoomId(context, MATRIX_QA_RESTART_ROOM_KEY);
await context.restartGateway();
const result = await runDriverTopLevelMentionScenario({
baseUrl: context.baseUrl,
driverAccessToken: context.driverAccessToken,
observedEvents: context.observedEvents,
roomId: context.roomId,
roomId,
syncState: context.syncState,
sutUserId: context.sutUserId,
timeoutMs: context.timeoutMs,
@@ -640,9 +672,11 @@ export async function runRestartResumeScenario(context: MatrixQaScenarioContext)
driverEventId: result.driverEventId,
reply: result.reply,
restartSignal: "SIGUSR1",
roomId,
token: result.token,
},
details: [
`room id: ${roomId}`,
"restart signal: SIGUSR1",
`post-restart driver event: ${result.driverEventId}`,
...buildMatrixReplyDetails("reply", result.reply),

View File

@@ -26,6 +26,7 @@ export type MatrixQaScenarioArtifacts = {
recoveredDriverEventId?: string;
recoveredReply?: MatrixQaReplyArtifact;
roomKey?: string;
roomId?: string;
restartSignal?: string;
rootEventId?: string;
threadDriverEventId?: string;

View File

@@ -336,10 +336,7 @@ describe("matrix live qa scenarios", () => {
eventId: "$sut-reply",
sender: "@sut:matrix-qa.test",
type: "m.room.message",
body: String(sendTextMessage.mock.calls[0]?.[0]?.body).replace(
"@sut:matrix-qa.test reply with only this exact marker: ",
"",
),
body: "observer sender accepted",
},
since: "observer-sync-next",
}));
@@ -380,6 +377,9 @@ describe("matrix live qa scenarios", () => {
artifacts: {
actorUserId: "@observer:matrix-qa.test",
driverEventId: "$observer-allow-trigger",
reply: {
tokenMatched: false,
},
},
});
@@ -707,7 +707,21 @@ describe("matrix live qa scenarios", () => {
topology: {
defaultRoomId: "!main:matrix-qa.test",
defaultRoomKey: "main",
rooms: [],
rooms: [
{
key: "block",
kind: "group",
memberRoles: ["driver", "observer", "sut"],
memberUserIds: [
"@driver:matrix-qa.test",
"@observer:matrix-qa.test",
"@sut:matrix-qa.test",
],
name: "Block",
requireMention: true,
roomId: "!block:matrix-qa.test",
},
],
},
}),
).resolves.toMatchObject({
@@ -720,7 +734,7 @@ describe("matrix live qa scenarios", () => {
expect(sendTextMessage).toHaveBeenCalledWith({
body: expect.stringContaining("Matrix block streaming QA check"),
mentionUserIds: ["@sut:matrix-qa.test"],
roomId: "!main:matrix-qa.test",
roomId: "!block:matrix-qa.test",
});
expect(waitForRoomEvent).toHaveBeenNthCalledWith(
2,

View File

@@ -86,6 +86,18 @@ describe("matrix qa config", () => {
overrides: {
autoJoin: "allowlist",
autoJoinAllowlist: [" !dm:matrix-qa.test ", "#ops:matrix-qa.test"],
agentDefaults: {
blockStreamingChunk: {
breakPreference: "newline",
maxChars: 48,
minChars: 1,
},
blockStreamingCoalesce: {
idleMs: 0,
maxChars: 48,
minChars: 1,
},
},
blockStreaming: true,
dm: {
sessionScope: "per-room",
@@ -108,6 +120,18 @@ describe("matrix qa config", () => {
topology,
});
expect(next.agents?.defaults).toMatchObject({
blockStreamingChunk: {
breakPreference: "newline",
maxChars: 48,
minChars: 1,
},
blockStreamingCoalesce: {
idleMs: 0,
maxChars: 48,
minChars: 1,
},
});
expect(next.channels?.matrix?.accounts?.sut).toMatchObject({
autoJoin: "allowlist",
autoJoinAllowlist: ["!dm:matrix-qa.test", "#ops:matrix-qa.test"],

View File

@@ -9,6 +9,19 @@ export type MatrixQaAutoJoinMode = "allowlist" | "always" | "off";
export type MatrixQaStreamingMode = "off" | "partial" | "quiet";
export type MatrixQaActorRole = "driver" | "observer" | "sut";
export type MatrixQaAgentDefaultsOverrides = {
blockStreamingChunk?: {
breakPreference?: "newline" | "paragraph" | "sentence";
maxChars?: number;
minChars?: number;
};
blockStreamingCoalesce?: {
idleMs?: number;
maxChars?: number;
minChars?: number;
};
};
export type MatrixQaGroupConfigOverrides = {
enabled?: boolean;
requireMention?: boolean;
@@ -23,6 +36,7 @@ export type MatrixQaDmConfigOverrides = {
};
export type MatrixQaConfigOverrides = {
agentDefaults?: MatrixQaAgentDefaultsOverrides;
autoJoin?: MatrixQaAutoJoinMode;
autoJoinAllowlist?: string[];
blockStreaming?: boolean;
@@ -352,6 +366,17 @@ export function buildMatrixQaConfig(
return {
...baseCfg,
...(params.overrides?.agentDefaults
? {
agents: {
...baseCfg.agents,
defaults: {
...baseCfg.agents?.defaults,
...params.overrides.agentDefaults,
},
},
}
: {}),
plugins: {
...baseCfg.plugins,
allow: pluginAllow,