qa-matrix: add streaming tool progress scenarios

This commit is contained in:
Gustavo Madeira Santana
2026-04-27 16:19:26 -04:00
parent 3132f4990c
commit b9fd13e8d7
9 changed files with 754 additions and 12 deletions

View File

@@ -82,6 +82,7 @@ The full scenario id list is the `MatrixQaScenarioId` union in `extensions/qa-ma
- threading — `matrix-thread-*`, `matrix-subagent-thread-spawn`
- top-level / DM / room — `matrix-top-level-reply-shape`, `matrix-room-*`, `matrix-dm-*`
- streaming and tool progress — `matrix-room-partial-streaming-preview`, `matrix-room-quiet-streaming-preview`, `matrix-room-tool-progress-*`, `matrix-room-block-streaming`
- media — `matrix-media-type-coverage`, `matrix-room-image-understanding-attachment`, `matrix-attachment-only-ignored`, `matrix-unsupported-media-safe`
- routing — `matrix-room-autojoin-invite`, `matrix-secondary-room-*`
- reactions — `matrix-reaction-*`

View File

@@ -20,7 +20,12 @@ export type MatrixQaScenarioId =
| "matrix-subagent-thread-spawn"
| "matrix-top-level-reply-shape"
| "matrix-room-thread-reply-override"
| "matrix-room-partial-streaming-preview"
| "matrix-room-quiet-streaming-preview"
| "matrix-room-tool-progress-preview"
| "matrix-room-tool-progress-preview-opt-out"
| "matrix-room-tool-progress-error"
| "matrix-room-tool-progress-mention-safety"
| "matrix-room-block-streaming"
| "matrix-room-image-understanding-attachment"
| "matrix-room-generated-image-delivery"
@@ -317,6 +322,14 @@ export const MATRIX_QA_SCENARIOS: MatrixQaScenarioDefinition[] = [
threadReplies: "always",
},
},
{
id: "matrix-room-partial-streaming-preview",
timeoutMs: 45_000,
title: "Matrix partial streaming emits text previews before finalizing",
configOverrides: {
streaming: "partial",
},
},
{
id: "matrix-room-quiet-streaming-preview",
timeoutMs: 45_000,
@@ -325,6 +338,47 @@ export const MATRIX_QA_SCENARIOS: MatrixQaScenarioDefinition[] = [
streaming: "quiet",
},
},
{
id: "matrix-room-tool-progress-preview",
timeoutMs: 60_000,
title: "Matrix streaming folds tool progress into the preview message",
configOverrides: {
streaming: "quiet",
toolProfile: "coding",
},
},
{
id: "matrix-room-tool-progress-preview-opt-out",
timeoutMs: 60_000,
title: "Matrix streaming can opt out of preview tool progress",
configOverrides: {
streaming: {
mode: "quiet",
preview: {
toolProgress: false,
},
},
toolProfile: "coding",
},
},
{
id: "matrix-room-tool-progress-error",
timeoutMs: 60_000,
title: "Matrix streaming finalizes previews after tool errors",
configOverrides: {
streaming: "quiet",
toolProfile: "coding",
},
},
{
id: "matrix-room-tool-progress-mention-safety",
timeoutMs: 60_000,
title: "Matrix streaming keeps tool-progress mentions inert",
configOverrides: {
streaming: "partial",
toolProfile: "coding",
},
},
{
id: "matrix-room-block-streaming",
timeoutMs: 75_000,

View File

@@ -15,8 +15,12 @@ import {
assertTopLevelReplyArtifact,
advanceMatrixQaActorCursor,
buildMatrixBlockStreamingPrompt,
buildMatrixPartialStreamingPrompt,
buildMatrixQuietStreamingPrompt,
buildMatrixQaToken,
buildMatrixToolProgressErrorPrompt,
buildMatrixToolProgressMentionSafetyPrompt,
buildMatrixToolProgressPrompt,
buildMatrixReplyArtifact,
buildMatrixReplyDetails,
buildMentionPrompt,
@@ -554,9 +558,34 @@ export async function runAllowlistHotReloadScenario(context: MatrixQaScenarioCon
}
export async function runQuietStreamingPreviewScenario(context: MatrixQaScenarioContext) {
return runMatrixStreamingPreviewScenario(context, {
expectedPreviewKind: "notice",
finalText: `MATRIX_QA_QUIET_STREAM_${randomUUID().slice(0, 8).toUpperCase()} preview complete`,
label: "quiet streaming",
triggerBodyBuilder: buildMatrixQuietStreamingPrompt,
});
}
export async function runPartialStreamingPreviewScenario(context: MatrixQaScenarioContext) {
return runMatrixStreamingPreviewScenario(context, {
expectedPreviewKind: "message",
finalText: `MATRIX_QA_PARTIAL_STREAM_${randomUUID().slice(0, 8).toUpperCase()} preview complete`,
label: "partial streaming",
triggerBodyBuilder: buildMatrixPartialStreamingPrompt,
});
}
async function runMatrixStreamingPreviewScenario(
context: MatrixQaScenarioContext,
params: {
expectedPreviewKind: MatrixQaObservedEvent["kind"];
finalText: string;
label: string;
triggerBodyBuilder: (sutUserId: string, finalText: string) => string;
},
) {
const { client, startSince } = await primeMatrixQaDriverScenarioClient(context);
const finalText = `MATRIX_QA_QUIET_STREAM_${randomUUID().slice(0, 8).toUpperCase()} preview complete`;
const triggerBody = buildMatrixQuietStreamingPrompt(context.sutUserId, finalText);
const triggerBody = params.triggerBodyBuilder(context.sutUserId, params.finalText);
const driverEventId = await client.sendTextMessage({
body: triggerBody,
mentionUserIds: [context.sutUserId],
@@ -567,7 +596,8 @@ export async function runQuietStreamingPreviewScenario(context: MatrixQaScenario
predicate: (event) =>
event.roomId === context.roomId &&
event.sender === context.sutUserId &&
event.kind === "notice",
event.kind === params.expectedPreviewKind &&
event.relatesTo === undefined,
roomId: context.roomId,
since: startSince,
timeoutMs: context.timeoutMs,
@@ -580,7 +610,7 @@ export async function runQuietStreamingPreviewScenario(context: MatrixQaScenario
isMatrixQaMessageLikeKind(event.kind) &&
event.relatesTo?.relType === "m.replace" &&
event.relatesTo.eventId === preview.event.eventId &&
event.body === finalText,
event.body === params.finalText,
roomId: context.roomId,
since: preview.since,
timeoutMs: context.timeoutMs,
@@ -591,23 +621,268 @@ export async function runQuietStreamingPreviewScenario(context: MatrixQaScenario
nextSince: finalized.since,
startSince,
});
const finalReply = buildMatrixReplyArtifact(finalized.event, params.finalText);
return {
artifacts: {
driverEventId,
previewFormattedBodyPreview: preview.event.formattedBody?.slice(0, 200),
previewBodyPreview: preview.event.body?.slice(0, 200),
previewEventId: preview.event.eventId,
previewMentions: preview.event.mentions,
reply: finalReply,
token: params.finalText,
triggerBody,
},
details: [
`driver event: ${driverEventId}`,
`scenario: ${params.label}`,
`preview event: ${preview.event.eventId}`,
`preview kind: ${preview.event.kind}`,
`preview body: ${preview.event.body ?? "<none>"}`,
`final reply relation: ${finalized.event.relatesTo?.relType ?? "<none>"}`,
`final reply target: ${finalized.event.relatesTo?.eventId ?? "<none>"}`,
...buildMatrixReplyDetails("final reply", finalReply),
].join("\n"),
} satisfies MatrixQaScenarioExecution;
}
function findMatrixQaUnexpectedWorkingEvents(params: {
events: MatrixQaObservedEvent[];
finalEventId?: string;
previewEventId?: string;
startIndex: number;
sutUserId: string;
}) {
return params.events.slice(params.startIndex).filter((event) => {
if (event.sender !== params.sutUserId || event.type !== "m.room.message") {
return false;
}
if (!/\bWorking\b/i.test(event.body ?? "")) {
return false;
}
if (event.eventId === params.previewEventId || event.eventId === params.finalEventId) {
return false;
}
return event.relatesTo?.eventId !== params.previewEventId;
});
}
function assertMatrixQaToolProgressMentionsInert(event: MatrixQaObservedEvent) {
const mentions = event.mentions;
if (mentions?.room || (mentions?.userIds?.length ?? 0) > 0) {
throw new Error(
`Matrix tool-progress preview emitted active mentions: ${JSON.stringify(mentions)}`,
);
}
if (/matrix\.to/i.test(event.formattedBody ?? "")) {
throw new Error(
`Matrix tool-progress preview linked Matrix mentions: ${event.formattedBody ?? "<none>"}`,
);
}
if (
!/<code>[^<]*(?:@room|@alice:matrix-qa\.test|!room:matrix-qa\.test)/i.test(
event.formattedBody ?? "",
)
) {
throw new Error(
`Matrix tool-progress preview did not preserve mention-looking text inside code: ${event.formattedBody ?? "<none>"}`,
);
}
}
async function runMatrixToolProgressScenario(
context: MatrixQaScenarioContext,
params: {
expectedPreviewKind: MatrixQaObservedEvent["kind"];
finalText: string;
label: string;
mentionSafety?: boolean;
progressPattern: RegExp;
triggerBodyBuilder: (sutUserId: string, finalText: string) => string;
},
) {
const { client, startSince } = await primeMatrixQaDriverScenarioClient(context);
const startObservedIndex = context.observedEvents.length;
const triggerBody = params.triggerBodyBuilder(context.sutUserId, params.finalText);
const driverEventId = await client.sendTextMessage({
body: triggerBody,
mentionUserIds: [context.sutUserId],
roomId: context.roomId,
});
const preview = await client.waitForRoomEvent({
observedEvents: context.observedEvents,
predicate: (event) =>
event.roomId === context.roomId &&
event.sender === context.sutUserId &&
event.kind === params.expectedPreviewKind &&
event.relatesTo === undefined &&
/\bWorking\b/i.test(event.body ?? ""),
roomId: context.roomId,
since: startSince,
timeoutMs: context.timeoutMs,
});
const progress = params.progressPattern.test(preview.event.body ?? "")
? preview
: await client.waitForRoomEvent({
observedEvents: context.observedEvents,
predicate: (event) =>
event.roomId === context.roomId &&
event.sender === context.sutUserId &&
event.kind === params.expectedPreviewKind &&
event.relatesTo?.relType === "m.replace" &&
event.relatesTo.eventId === preview.event.eventId &&
/\bWorking\b/i.test(event.body ?? "") &&
params.progressPattern.test(event.body ?? ""),
roomId: context.roomId,
since: preview.since,
timeoutMs: context.timeoutMs,
});
if (params.mentionSafety) {
assertMatrixQaToolProgressMentionsInert(progress.event);
}
const finalized = await client.waitForRoomEvent({
observedEvents: context.observedEvents,
predicate: (event) =>
event.roomId === context.roomId &&
event.sender === context.sutUserId &&
isMatrixQaMessageLikeKind(event.kind) &&
event.relatesTo?.relType === "m.replace" &&
event.relatesTo.eventId === preview.event.eventId &&
event.body === params.finalText,
roomId: context.roomId,
since: progress.since,
timeoutMs: context.timeoutMs,
});
const unexpectedWorkingEvents = findMatrixQaUnexpectedWorkingEvents({
events: context.observedEvents,
finalEventId: finalized.event.eventId,
previewEventId: preview.event.eventId,
startIndex: startObservedIndex,
sutUserId: context.sutUserId,
});
if (unexpectedWorkingEvents.length > 0) {
throw new Error(
`Matrix tool progress leaked outside preview event: ${unexpectedWorkingEvents.map((event) => `${event.eventId}:${event.body ?? ""}`).join("; ")}`,
);
}
advanceMatrixQaActorCursor({
actorId: "driver",
syncState: context.syncState,
nextSince: finalized.since,
startSince,
});
const finalReply = buildMatrixReplyArtifact(finalized.event, params.finalText);
return {
artifacts: {
driverEventId,
previewBodyPreview: progress.event.body?.slice(0, 200),
previewEventId: preview.event.eventId,
previewFormattedBodyPreview: progress.event.formattedBody?.slice(0, 200),
previewMentions: progress.event.mentions,
reply: finalReply,
token: params.finalText,
triggerBody,
},
details: [
`driver event: ${driverEventId}`,
`scenario: ${params.label}`,
`preview event: ${preview.event.eventId}`,
`preview kind: ${progress.event.kind}`,
`preview body: ${progress.event.body ?? "<none>"}`,
`preview mentions: ${JSON.stringify(progress.event.mentions ?? {})}`,
`final reply relation: ${finalized.event.relatesTo?.relType ?? "<none>"}`,
`final reply target: ${finalized.event.relatesTo?.eventId ?? "<none>"}`,
...buildMatrixReplyDetails("final reply", finalReply),
].join("\n"),
} satisfies MatrixQaScenarioExecution;
}
export async function runToolProgressPreviewScenario(context: MatrixQaScenarioContext) {
return runMatrixToolProgressScenario(context, {
expectedPreviewKind: "notice",
finalText: buildMatrixQaToken("MATRIX_QA_TOOL_PROGRESS"),
label: "tool progress preview",
progressPattern: /\btool:\s*read\b/i,
triggerBodyBuilder: buildMatrixToolProgressPrompt,
});
}
export async function runToolProgressErrorScenario(context: MatrixQaScenarioContext) {
return runMatrixToolProgressScenario(context, {
expectedPreviewKind: "notice",
finalText: buildMatrixQaToken("MATRIX_QA_TOOL_PROGRESS_ERROR"),
label: "tool progress error",
progressPattern: /read from missing-matrix-tool-progress-target\.txt/i,
triggerBodyBuilder: buildMatrixToolProgressErrorPrompt,
});
}
export async function runToolProgressMentionSafetyScenario(context: MatrixQaScenarioContext) {
return runMatrixToolProgressScenario(context, {
expectedPreviewKind: "message",
finalText: buildMatrixQaToken("MATRIX_QA_TOOL_PROGRESS_MENTION_SAFE"),
label: "tool progress mention safety",
mentionSafety: true,
progressPattern: /@room|@alice:matrix-qa\.test|!room:matrix-qa\.test/i,
triggerBodyBuilder: buildMatrixToolProgressMentionSafetyPrompt,
});
}
export async function runToolProgressPreviewOptOutScenario(context: MatrixQaScenarioContext) {
const { client, startSince } = await primeMatrixQaDriverScenarioClient(context);
const startObservedIndex = context.observedEvents.length;
const finalText = buildMatrixQaToken("MATRIX_QA_TOOL_PROGRESS_OPTOUT");
const triggerBody = buildMatrixToolProgressPrompt(context.sutUserId, finalText);
const driverEventId = await client.sendTextMessage({
body: triggerBody,
mentionUserIds: [context.sutUserId],
roomId: context.roomId,
});
const finalized = await client.waitForRoomEvent({
observedEvents: context.observedEvents,
predicate: (event) =>
event.roomId === context.roomId &&
event.sender === context.sutUserId &&
isMatrixQaMessageLikeKind(event.kind) &&
event.body === finalText,
roomId: context.roomId,
since: startSince,
timeoutMs: context.timeoutMs,
});
const unexpectedPreviewProgressEvents = context.observedEvents
.slice(startObservedIndex)
.filter(
(event) =>
event.sender === context.sutUserId &&
event.type === "m.room.message" &&
event.eventId !== finalized.event.eventId &&
/^Working\.\.\.\n-/i.test(event.body ?? ""),
);
if (unexpectedPreviewProgressEvents.length > 0) {
throw new Error(
`Matrix tool-progress opt-out still emitted preview progress: ${unexpectedPreviewProgressEvents.map((event) => `${event.eventId}:${event.body ?? ""}`).join("; ")}`,
);
}
advanceMatrixQaActorCursor({
actorId: "driver",
syncState: context.syncState,
nextSince: finalized.since,
startSince,
});
const finalReply = buildMatrixReplyArtifact(finalized.event, finalText);
return {
artifacts: {
driverEventId,
previewBodyPreview: preview.event.body?.slice(0, 200),
previewEventId: preview.event.eventId,
reply: finalReply,
token: finalText,
triggerBody,
},
details: [
`driver event: ${driverEventId}`,
`preview event: ${preview.event.eventId}`,
`preview kind: ${preview.event.kind}`,
`preview body: ${preview.event.body ?? "<none>"}`,
`final reply relation: ${finalized.event.relatesTo?.relType ?? "<none>"}`,
`final reply target: ${finalized.event.relatesTo?.eventId ?? "<none>"}`,
"scenario: tool progress preview opt-out",
"preview progress events: 0",
...buildMatrixReplyDetails("final reply", finalReply),
].join("\n"),
} satisfies MatrixQaScenarioExecution;

View File

@@ -81,6 +81,31 @@ export function buildMatrixQuietStreamingPrompt(sutUserId: string, text: string)
return `${sutUserId} Quiet streaming QA check: reply exactly \`${text}\`.`;
}
export function buildMatrixPartialStreamingPrompt(sutUserId: string, text: string) {
return `${sutUserId} Partial streaming QA check: reply exactly \`${text}\`.`;
}
export function buildMatrixToolProgressPrompt(sutUserId: string, text: string) {
return [
`${sutUserId} Tool progress QA check: read \`QA_KICKOFF_TASK.md\` before answering.`,
`After the read completes, reply exactly \`${text}\`.`,
].join(" ");
}
export function buildMatrixToolProgressErrorPrompt(sutUserId: string, text: string) {
return [
`${sutUserId} Tool progress error QA check: read \`missing-matrix-tool-progress-target.txt\` before answering.`,
`After the read fails, reply exactly \`${text}\`.`,
].join(" ");
}
export function buildMatrixToolProgressMentionSafetyPrompt(sutUserId: string, text: string) {
return [
`${sutUserId} Tool progress QA check: read \`matrix-progress-@room-@alice:matrix-qa.test-!room:matrix-qa.test.txt\` before answering.`,
`After the read completes, reply exactly \`${text}\`.`,
].join(" ");
}
export function buildMatrixBlockStreamingPrompt(
sutUserId: string,
firstText: string,

View File

@@ -77,6 +77,7 @@ import {
runMatrixQaCanary,
runMembershipLossScenario,
runObserverAllowlistOverrideScenario,
runPartialStreamingPreviewScenario,
runQuietStreamingPreviewScenario,
runReactionThreadedScenario,
runRoomAutoJoinInviteScenario,
@@ -86,6 +87,10 @@ import {
runThreadIsolationScenario,
runThreadNestedReplyShapeScenario,
runThreadRootPreservationScenario,
runToolProgressErrorScenario,
runToolProgressMentionSafetyScenario,
runToolProgressPreviewOptOutScenario,
runToolProgressPreviewScenario,
runTopLevelReplyShapeScenario,
} from "./scenario-runtime-room.js";
import {
@@ -203,8 +208,18 @@ export async function runMatrixQaScenario(
return await runTopLevelReplyShapeScenario(context);
case "matrix-room-thread-reply-override":
return await runRoomThreadReplyOverrideScenario(context);
case "matrix-room-partial-streaming-preview":
return await runPartialStreamingPreviewScenario(context);
case "matrix-room-quiet-streaming-preview":
return await runQuietStreamingPreviewScenario(context);
case "matrix-room-tool-progress-preview":
return await runToolProgressPreviewScenario(context);
case "matrix-room-tool-progress-preview-opt-out":
return await runToolProgressPreviewOptOutScenario(context);
case "matrix-room-tool-progress-error":
return await runToolProgressErrorScenario(context);
case "matrix-room-tool-progress-mention-safety":
return await runToolProgressMentionSafetyScenario(context);
case "matrix-room-block-streaming":
return await runBlockStreamingScenario(context);
case "matrix-room-image-understanding-attachment":

View File

@@ -93,6 +93,8 @@ export type MatrixQaScenarioArtifacts = {
noticeEventId?: string;
previewBodyPreview?: string;
previewEventId?: string;
previewFormattedBodyPreview?: string;
previewMentions?: MatrixQaObservedEvent["mentions"];
blockEventIds?: string[];
bootstrapActor?: "driver" | "observer" | "sut";
bootstrapErrorPreview?: string;

View File

@@ -84,6 +84,48 @@ function matrixQaScenarioContext(): MatrixQaScenarioContext {
};
}
function matrixQaMessageEvent(
overrides: Partial<MatrixQaObservedEvent> &
Pick<MatrixQaObservedEvent, "body" | "eventId" | "kind">,
): MatrixQaObservedEvent {
return {
roomId: "!main:matrix-qa.test",
sender: "@sut:matrix-qa.test",
type: "m.room.message",
...overrides,
};
}
function readMatrixQaReplyDirective(body: unknown, fallback: string) {
return /reply exactly `([^`]+)`/.exec(String(body))?.[1] ?? fallback;
}
function mockMatrixQaRoomClient(params: {
driverEventId: string;
events: Array<{
event:
| MatrixQaObservedEvent
| ((client: { sendTextMessage: ReturnType<typeof vi.fn> }) => MatrixQaObservedEvent);
since: string;
}>;
}) {
const primeRoom = vi.fn().mockResolvedValue("driver-sync-start");
const sendTextMessage = vi.fn().mockResolvedValue(params.driverEventId);
const waitForRoomEvent = vi.fn();
for (const entry of params.events) {
waitForRoomEvent.mockImplementationOnce(async () => ({
event: typeof entry.event === "function" ? entry.event({ sendTextMessage }) : entry.event,
since: entry.since,
}));
}
createMatrixQaClient.mockReturnValue({
primeRoom,
sendTextMessage,
waitForRoomEvent,
});
return { primeRoom, sendTextMessage, waitForRoomEvent };
}
function mockMatrixQaCliAccount(params: {
accessToken: string;
deviceId: string;
@@ -165,7 +207,12 @@ describe("matrix live qa scenarios", () => {
"matrix-subagent-thread-spawn",
"matrix-top-level-reply-shape",
"matrix-room-thread-reply-override",
"matrix-room-partial-streaming-preview",
"matrix-room-quiet-streaming-preview",
"matrix-room-tool-progress-preview",
"matrix-room-tool-progress-preview-opt-out",
"matrix-room-tool-progress-error",
"matrix-room-tool-progress-mention-safety",
"matrix-room-block-streaming",
"matrix-room-image-understanding-attachment",
"matrix-room-generated-image-delivery",
@@ -2243,6 +2290,269 @@ describe("matrix live qa scenarios", () => {
);
});
it("captures partial preview text messages before the finalized Matrix reply", async () => {
const previewEventId = "$partial-preview";
const fallbackFinalText = "MATRIX_QA_PARTIAL_STREAM_PREVIEW_COMPLETE";
const { sendTextMessage } = mockMatrixQaRoomClient({
driverEventId: "$partial-stream-trigger",
events: [
{
event: matrixQaMessageEvent({
kind: "message",
eventId: previewEventId,
body: "partial preview",
}),
since: "driver-sync-preview",
},
{
event: ({ sendTextMessage }) =>
matrixQaMessageEvent({
kind: "message",
eventId: "$partial-final",
body: readMatrixQaReplyDirective(
sendTextMessage.mock.calls[0]?.[0]?.body,
fallbackFinalText,
),
relatesTo: {
relType: "m.replace",
eventId: previewEventId,
},
}),
since: "driver-sync-next",
},
],
});
const scenario = MATRIX_QA_SCENARIOS.find(
(entry) => entry.id === "matrix-room-partial-streaming-preview",
);
expect(scenario).toBeDefined();
await expect(runMatrixQaScenario(scenario!, matrixQaScenarioContext())).resolves.toMatchObject({
artifacts: {
driverEventId: "$partial-stream-trigger",
previewEventId: "$partial-preview",
reply: {
eventId: "$partial-final",
},
},
});
expect(sendTextMessage).toHaveBeenCalledWith({
body: expect.stringContaining("Partial streaming QA check"),
mentionUserIds: ["@sut:matrix-qa.test"],
roomId: "!main:matrix-qa.test",
});
});
it("captures Matrix tool progress inside the quiet preview before finalizing", async () => {
const previewEventId = "$tool-progress-preview";
mockMatrixQaRoomClient({
driverEventId: "$tool-progress-trigger",
events: [
{
event: matrixQaMessageEvent({
kind: "notice",
eventId: previewEventId,
body: "Working...\n- `tool: read`",
}),
since: "driver-sync-preview",
},
{
event: ({ sendTextMessage }) =>
matrixQaMessageEvent({
kind: "notice",
eventId: "$tool-progress-final",
body: readMatrixQaReplyDirective(
sendTextMessage.mock.calls[0]?.[0]?.body,
"MATRIX_QA_TOOL_PROGRESS_FIXED",
),
relatesTo: {
relType: "m.replace",
eventId: previewEventId,
},
}),
since: "driver-sync-next",
},
],
});
const scenario = MATRIX_QA_SCENARIOS.find(
(entry) => entry.id === "matrix-room-tool-progress-preview",
);
expect(scenario).toBeDefined();
await expect(runMatrixQaScenario(scenario!, matrixQaScenarioContext())).resolves.toMatchObject({
artifacts: {
driverEventId: "$tool-progress-trigger",
previewBodyPreview: "Working...\n- `tool: read`",
previewEventId: "$tool-progress-preview",
reply: {
eventId: "$tool-progress-final",
},
},
});
});
it("keeps Matrix tool progress opt-out from creating Working previews", async () => {
const { waitForRoomEvent } = mockMatrixQaRoomClient({
driverEventId: "$tool-progress-optout-trigger",
events: [
{
event: ({ sendTextMessage }) =>
matrixQaMessageEvent({
kind: "message",
eventId: "$tool-progress-optout-final",
body: readMatrixQaReplyDirective(
sendTextMessage.mock.calls[0]?.[0]?.body,
"MATRIX_QA_TOOL_PROGRESS_OPTOUT_FIXED",
),
}),
since: "driver-sync-next",
},
],
});
const scenario = MATRIX_QA_SCENARIOS.find(
(entry) => entry.id === "matrix-room-tool-progress-preview-opt-out",
);
expect(scenario).toBeDefined();
await expect(runMatrixQaScenario(scenario!, matrixQaScenarioContext())).resolves.toMatchObject({
artifacts: {
driverEventId: "$tool-progress-optout-trigger",
reply: {
eventId: "$tool-progress-optout-final",
},
},
});
expect(waitForRoomEvent).toHaveBeenCalledTimes(1);
});
it("finalizes Matrix tool progress previews after tool errors", async () => {
const previewEventId = "$tool-progress-error-preview";
const { sendTextMessage } = mockMatrixQaRoomClient({
driverEventId: "$tool-progress-error-trigger",
events: [
{
event: matrixQaMessageEvent({
kind: "notice",
eventId: previewEventId,
body: "Working...\n- `read from missing-matrix-tool-progress-target.txt`",
}),
since: "driver-sync-preview",
},
{
event: ({ sendTextMessage }) =>
matrixQaMessageEvent({
kind: "notice",
eventId: "$tool-progress-error-final",
body: readMatrixQaReplyDirective(
sendTextMessage.mock.calls[0]?.[0]?.body,
"MATRIX_QA_TOOL_PROGRESS_ERROR_FIXED",
),
relatesTo: {
relType: "m.replace",
eventId: previewEventId,
},
}),
since: "driver-sync-next",
},
],
});
const scenario = MATRIX_QA_SCENARIOS.find(
(entry) => entry.id === "matrix-room-tool-progress-error",
);
expect(scenario).toBeDefined();
await expect(runMatrixQaScenario(scenario!, matrixQaScenarioContext())).resolves.toMatchObject({
artifacts: {
driverEventId: "$tool-progress-error-trigger",
previewBodyPreview: "Working...\n- `read from missing-matrix-tool-progress-target.txt`",
previewEventId: "$tool-progress-error-preview",
reply: {
eventId: "$tool-progress-error-final",
relatesTo: {
eventId: "$tool-progress-error-preview",
relType: "m.replace",
},
},
},
});
expect(sendTextMessage).toHaveBeenCalledWith({
body: expect.stringContaining("Tool progress error QA check"),
mentionUserIds: ["@sut:matrix-qa.test"],
roomId: "!main:matrix-qa.test",
});
});
it("keeps Matrix-looking tool progress mentions inert in partial previews", async () => {
const previewEventId = "$tool-progress-mention-preview";
mockMatrixQaRoomClient({
driverEventId: "$tool-progress-mention-trigger",
events: [
{
event: matrixQaMessageEvent({
kind: "message",
eventId: previewEventId,
body: "Working...\n- `tool: read`",
}),
since: "driver-sync-preview",
},
{
event: matrixQaMessageEvent({
kind: "message",
eventId: "$tool-progress-mention-edit",
body: "Working...\n- `tool: read`\n- `read from matrix-progress-@room-@alice:matrix-qa.test-!room:matrix-qa.test.txt`",
formattedBody:
"Working...<br><ul><li><code>read from matrix-progress-@room-@alice:matrix-qa.test-!room:matrix-qa.test.txt</code></li></ul>",
mentions: {},
relatesTo: {
relType: "m.replace",
eventId: previewEventId,
},
}),
since: "driver-sync-progress",
},
{
event: ({ sendTextMessage }) =>
matrixQaMessageEvent({
kind: "message",
eventId: "$tool-progress-mention-final",
body: readMatrixQaReplyDirective(
sendTextMessage.mock.calls[0]?.[0]?.body,
"MATRIX_QA_TOOL_PROGRESS_MENTION_SAFE_FIXED",
),
relatesTo: {
relType: "m.replace",
eventId: previewEventId,
},
}),
since: "driver-sync-next",
},
],
});
const scenario = MATRIX_QA_SCENARIOS.find(
(entry) => entry.id === "matrix-room-tool-progress-mention-safety",
);
expect(scenario).toBeDefined();
await expect(runMatrixQaScenario(scenario!, matrixQaScenarioContext())).resolves.toMatchObject({
artifacts: {
driverEventId: "$tool-progress-mention-trigger",
previewEventId: "$tool-progress-mention-preview",
previewMentions: {},
reply: {
eventId: "$tool-progress-mention-final",
},
},
});
});
it("preserves separate finalized block events when Matrix block streaming is enabled", async () => {
const primeRoom = vi.fn().mockResolvedValue("driver-sync-start");
const sendTextMessage = vi.fn().mockResolvedValue("$block-stream-trigger");

View File

@@ -254,11 +254,39 @@ describe("matrix qa config", () => {
},
replyToMode: "off",
streaming: "partial",
streamingPreviewToolProgress: true,
threadBindings: {},
threadReplies: "inbound",
});
expect(summarizeMatrixQaConfigSnapshot(snapshot)).toContain("autoJoin=allowlist");
expect(summarizeMatrixQaConfigSnapshot(snapshot)).toContain("streaming=partial");
expect(summarizeMatrixQaConfigSnapshot(snapshot)).toContain(
"streaming.preview.toolProgress=true",
);
});
it("builds Matrix QA config snapshots from structured streaming overrides", () => {
const snapshot = buildMatrixQaConfigSnapshot({
driverUserId: "@driver:matrix-qa.test",
observerUserId: "@observer:matrix-qa.test",
overrides: {
streaming: {
mode: "quiet",
preview: {
toolProgress: false,
},
},
},
sutUserId: "@sut:matrix-qa.test",
topology,
});
expect(snapshot.streaming).toBe("quiet");
expect(snapshot.streamingPreviewToolProgress).toBe(false);
expect(summarizeMatrixQaConfigSnapshot(snapshot)).toContain("streaming=quiet");
expect(summarizeMatrixQaConfigSnapshot(snapshot)).toContain(
"streaming.preview.toolProgress=false",
);
});
it("resolves role-based Matrix sender allowlist overrides", () => {

View File

@@ -9,6 +9,13 @@ export type MatrixQaAutoJoinMode = "allowlist" | "always" | "off";
export type MatrixQaStreamingMode = "off" | "partial" | "quiet";
export type MatrixQaActorRole = "driver" | "observer" | "sut";
export type MatrixQaStreamingConfig = {
mode?: MatrixQaStreamingMode;
preview?: {
toolProgress?: boolean;
};
};
export type MatrixQaAgentDefaultsOverrides = {
blockStreamingChunk?: {
breakPreference?: "newline" | "paragraph" | "sentence";
@@ -62,7 +69,7 @@ export type MatrixQaConfigOverrides = {
groupsByKey?: Record<string, MatrixQaGroupConfigOverrides>;
replyToMode?: MatrixQaReplyToMode;
startupVerification?: "if-unverified" | "off";
streaming?: "off" | "partial" | "quiet" | boolean;
streaming?: MatrixQaStreamingMode | MatrixQaStreamingConfig | boolean;
threadBindings?: MatrixQaThreadBindingsConfigOverrides;
threadReplies?: MatrixQaThreadRepliesMode;
toolProfile?: "coding" | "messaging" | "minimal";
@@ -86,6 +93,7 @@ export type MatrixQaConfigSnapshot = {
replyToMode: MatrixQaReplyToMode;
startupVerification?: "if-unverified" | "off";
streaming: MatrixQaStreamingMode;
streamingPreviewToolProgress: boolean;
threadBindings: MatrixQaThreadBindingsConfigOverrides;
threadReplies: MatrixQaThreadRepliesMode;
};
@@ -205,9 +213,29 @@ function resolveMatrixQaStreamingMode(
if (value === "quiet") {
return "quiet";
}
if (isMatrixQaStreamingConfig(value)) {
if (value.mode === "partial" || value.mode === "quiet") {
return value.mode;
}
}
return "off";
}
function isMatrixQaStreamingConfig(
value: MatrixQaConfigOverrides["streaming"],
): value is MatrixQaStreamingConfig {
return Boolean(value && typeof value === "object" && !Array.isArray(value));
}
function resolveMatrixQaStreamingPreviewToolProgress(
value: MatrixQaConfigOverrides["streaming"],
): boolean {
if (!isMatrixQaStreamingConfig(value)) {
return true;
}
return value.preview?.toolProgress ?? true;
}
function resolveMatrixQaAutoJoinAllowlist(params: { overrides?: MatrixQaConfigOverrides }) {
if (params.overrides?.autoJoin !== "allowlist") {
return [];
@@ -352,6 +380,9 @@ export function buildMatrixQaConfigSnapshot(params: {
replyToMode: params.overrides?.replyToMode ?? "off",
startupVerification: params.overrides?.startupVerification,
streaming: resolveMatrixQaStreamingMode(params.overrides?.streaming),
streamingPreviewToolProgress: resolveMatrixQaStreamingPreviewToolProgress(
params.overrides?.streaming,
),
threadBindings: { ...params.overrides?.threadBindings },
threadReplies: params.overrides?.threadReplies ?? "inbound",
};
@@ -366,6 +397,7 @@ export function summarizeMatrixQaConfigSnapshot(snapshot: MatrixQaConfigSnapshot
`dm.sessionScope=${snapshot.dm.sessionScope}`,
`dm.threadReplies=${snapshot.dm.threadReplies}`,
`streaming=${snapshot.streaming}`,
`streaming.preview.toolProgress=${formatMatrixQaBoolean(snapshot.streamingPreviewToolProgress)}`,
`blockStreaming=${formatMatrixQaBoolean(snapshot.blockStreaming)}`,
`autoJoin=${snapshot.autoJoin}`,
`encryption=${formatMatrixQaBoolean(snapshot.encryption)}`,