From b9fd13e8d7304fd5d35f93bee1dd460484fcc94d Mon Sep 17 00:00:00 2001 From: Gustavo Madeira Santana Date: Mon, 27 Apr 2026 16:19:26 -0400 Subject: [PATCH] qa-matrix: add streaming tool progress scenarios --- docs/concepts/qa-matrix.md | 1 + .../src/runners/contract/scenario-catalog.ts | 54 +++ .../runners/contract/scenario-runtime-room.ts | 297 ++++++++++++++++- .../contract/scenario-runtime-shared.ts | 25 ++ .../src/runners/contract/scenario-runtime.ts | 15 + .../src/runners/contract/scenario-types.ts | 2 + .../src/runners/contract/scenarios.test.ts | 310 ++++++++++++++++++ .../qa-matrix/src/substrate/config.test.ts | 28 ++ extensions/qa-matrix/src/substrate/config.ts | 34 +- 9 files changed, 754 insertions(+), 12 deletions(-) diff --git a/docs/concepts/qa-matrix.md b/docs/concepts/qa-matrix.md index 2f931787527..b2899fa1033 100644 --- a/docs/concepts/qa-matrix.md +++ b/docs/concepts/qa-matrix.md @@ -82,6 +82,7 @@ The full scenario id list is the `MatrixQaScenarioId` union in `extensions/qa-ma - threading — `matrix-thread-*`, `matrix-subagent-thread-spawn` - top-level / DM / room — `matrix-top-level-reply-shape`, `matrix-room-*`, `matrix-dm-*` +- streaming and tool progress — `matrix-room-partial-streaming-preview`, `matrix-room-quiet-streaming-preview`, `matrix-room-tool-progress-*`, `matrix-room-block-streaming` - media — `matrix-media-type-coverage`, `matrix-room-image-understanding-attachment`, `matrix-attachment-only-ignored`, `matrix-unsupported-media-safe` - routing — `matrix-room-autojoin-invite`, `matrix-secondary-room-*` - reactions — `matrix-reaction-*` diff --git a/extensions/qa-matrix/src/runners/contract/scenario-catalog.ts b/extensions/qa-matrix/src/runners/contract/scenario-catalog.ts index e51846b2018..195935a424a 100644 --- a/extensions/qa-matrix/src/runners/contract/scenario-catalog.ts +++ b/extensions/qa-matrix/src/runners/contract/scenario-catalog.ts @@ -20,7 +20,12 @@ export type MatrixQaScenarioId = | "matrix-subagent-thread-spawn" | "matrix-top-level-reply-shape" | "matrix-room-thread-reply-override" + | "matrix-room-partial-streaming-preview" | "matrix-room-quiet-streaming-preview" + | "matrix-room-tool-progress-preview" + | "matrix-room-tool-progress-preview-opt-out" + | "matrix-room-tool-progress-error" + | "matrix-room-tool-progress-mention-safety" | "matrix-room-block-streaming" | "matrix-room-image-understanding-attachment" | "matrix-room-generated-image-delivery" @@ -317,6 +322,14 @@ export const MATRIX_QA_SCENARIOS: MatrixQaScenarioDefinition[] = [ threadReplies: "always", }, }, + { + id: "matrix-room-partial-streaming-preview", + timeoutMs: 45_000, + title: "Matrix partial streaming emits text previews before finalizing", + configOverrides: { + streaming: "partial", + }, + }, { id: "matrix-room-quiet-streaming-preview", timeoutMs: 45_000, @@ -325,6 +338,47 @@ export const MATRIX_QA_SCENARIOS: MatrixQaScenarioDefinition[] = [ streaming: "quiet", }, }, + { + id: "matrix-room-tool-progress-preview", + timeoutMs: 60_000, + title: "Matrix streaming folds tool progress into the preview message", + configOverrides: { + streaming: "quiet", + toolProfile: "coding", + }, + }, + { + id: "matrix-room-tool-progress-preview-opt-out", + timeoutMs: 60_000, + title: "Matrix streaming can opt out of preview tool progress", + configOverrides: { + streaming: { + mode: "quiet", + preview: { + toolProgress: false, + }, + }, + toolProfile: "coding", + }, + }, + { + id: "matrix-room-tool-progress-error", + timeoutMs: 60_000, + title: "Matrix streaming finalizes previews after tool errors", + configOverrides: { + streaming: "quiet", + toolProfile: "coding", + }, + }, + { + id: "matrix-room-tool-progress-mention-safety", + timeoutMs: 60_000, + title: "Matrix streaming keeps tool-progress mentions inert", + configOverrides: { + streaming: "partial", + toolProfile: "coding", + }, + }, { id: "matrix-room-block-streaming", timeoutMs: 75_000, diff --git a/extensions/qa-matrix/src/runners/contract/scenario-runtime-room.ts b/extensions/qa-matrix/src/runners/contract/scenario-runtime-room.ts index 57102a28596..5725fd8f82f 100644 --- a/extensions/qa-matrix/src/runners/contract/scenario-runtime-room.ts +++ b/extensions/qa-matrix/src/runners/contract/scenario-runtime-room.ts @@ -15,8 +15,12 @@ import { assertTopLevelReplyArtifact, advanceMatrixQaActorCursor, buildMatrixBlockStreamingPrompt, + buildMatrixPartialStreamingPrompt, buildMatrixQuietStreamingPrompt, buildMatrixQaToken, + buildMatrixToolProgressErrorPrompt, + buildMatrixToolProgressMentionSafetyPrompt, + buildMatrixToolProgressPrompt, buildMatrixReplyArtifact, buildMatrixReplyDetails, buildMentionPrompt, @@ -554,9 +558,34 @@ export async function runAllowlistHotReloadScenario(context: MatrixQaScenarioCon } export async function runQuietStreamingPreviewScenario(context: MatrixQaScenarioContext) { + return runMatrixStreamingPreviewScenario(context, { + expectedPreviewKind: "notice", + finalText: `MATRIX_QA_QUIET_STREAM_${randomUUID().slice(0, 8).toUpperCase()} preview complete`, + label: "quiet streaming", + triggerBodyBuilder: buildMatrixQuietStreamingPrompt, + }); +} + +export async function runPartialStreamingPreviewScenario(context: MatrixQaScenarioContext) { + return runMatrixStreamingPreviewScenario(context, { + expectedPreviewKind: "message", + finalText: `MATRIX_QA_PARTIAL_STREAM_${randomUUID().slice(0, 8).toUpperCase()} preview complete`, + label: "partial streaming", + triggerBodyBuilder: buildMatrixPartialStreamingPrompt, + }); +} + +async function runMatrixStreamingPreviewScenario( + context: MatrixQaScenarioContext, + params: { + expectedPreviewKind: MatrixQaObservedEvent["kind"]; + finalText: string; + label: string; + triggerBodyBuilder: (sutUserId: string, finalText: string) => string; + }, +) { const { client, startSince } = await primeMatrixQaDriverScenarioClient(context); - const finalText = `MATRIX_QA_QUIET_STREAM_${randomUUID().slice(0, 8).toUpperCase()} preview complete`; - const triggerBody = buildMatrixQuietStreamingPrompt(context.sutUserId, finalText); + const triggerBody = params.triggerBodyBuilder(context.sutUserId, params.finalText); const driverEventId = await client.sendTextMessage({ body: triggerBody, mentionUserIds: [context.sutUserId], @@ -567,7 +596,8 @@ export async function runQuietStreamingPreviewScenario(context: MatrixQaScenario predicate: (event) => event.roomId === context.roomId && event.sender === context.sutUserId && - event.kind === "notice", + event.kind === params.expectedPreviewKind && + event.relatesTo === undefined, roomId: context.roomId, since: startSince, timeoutMs: context.timeoutMs, @@ -580,7 +610,7 @@ export async function runQuietStreamingPreviewScenario(context: MatrixQaScenario isMatrixQaMessageLikeKind(event.kind) && event.relatesTo?.relType === "m.replace" && event.relatesTo.eventId === preview.event.eventId && - event.body === finalText, + event.body === params.finalText, roomId: context.roomId, since: preview.since, timeoutMs: context.timeoutMs, @@ -591,23 +621,268 @@ export async function runQuietStreamingPreviewScenario(context: MatrixQaScenario nextSince: finalized.since, startSince, }); + const finalReply = buildMatrixReplyArtifact(finalized.event, params.finalText); + return { + artifacts: { + driverEventId, + previewFormattedBodyPreview: preview.event.formattedBody?.slice(0, 200), + previewBodyPreview: preview.event.body?.slice(0, 200), + previewEventId: preview.event.eventId, + previewMentions: preview.event.mentions, + reply: finalReply, + token: params.finalText, + triggerBody, + }, + details: [ + `driver event: ${driverEventId}`, + `scenario: ${params.label}`, + `preview event: ${preview.event.eventId}`, + `preview kind: ${preview.event.kind}`, + `preview body: ${preview.event.body ?? ""}`, + `final reply relation: ${finalized.event.relatesTo?.relType ?? ""}`, + `final reply target: ${finalized.event.relatesTo?.eventId ?? ""}`, + ...buildMatrixReplyDetails("final reply", finalReply), + ].join("\n"), + } satisfies MatrixQaScenarioExecution; +} + +function findMatrixQaUnexpectedWorkingEvents(params: { + events: MatrixQaObservedEvent[]; + finalEventId?: string; + previewEventId?: string; + startIndex: number; + sutUserId: string; +}) { + return params.events.slice(params.startIndex).filter((event) => { + if (event.sender !== params.sutUserId || event.type !== "m.room.message") { + return false; + } + if (!/\bWorking\b/i.test(event.body ?? "")) { + return false; + } + if (event.eventId === params.previewEventId || event.eventId === params.finalEventId) { + return false; + } + return event.relatesTo?.eventId !== params.previewEventId; + }); +} + +function assertMatrixQaToolProgressMentionsInert(event: MatrixQaObservedEvent) { + const mentions = event.mentions; + if (mentions?.room || (mentions?.userIds?.length ?? 0) > 0) { + throw new Error( + `Matrix tool-progress preview emitted active mentions: ${JSON.stringify(mentions)}`, + ); + } + if (/matrix\.to/i.test(event.formattedBody ?? "")) { + throw new Error( + `Matrix tool-progress preview linked Matrix mentions: ${event.formattedBody ?? ""}`, + ); + } + if ( + !/[^<]*(?:@room|@alice:matrix-qa\.test|!room:matrix-qa\.test)/i.test( + event.formattedBody ?? "", + ) + ) { + throw new Error( + `Matrix tool-progress preview did not preserve mention-looking text inside code: ${event.formattedBody ?? ""}`, + ); + } +} + +async function runMatrixToolProgressScenario( + context: MatrixQaScenarioContext, + params: { + expectedPreviewKind: MatrixQaObservedEvent["kind"]; + finalText: string; + label: string; + mentionSafety?: boolean; + progressPattern: RegExp; + triggerBodyBuilder: (sutUserId: string, finalText: string) => string; + }, +) { + const { client, startSince } = await primeMatrixQaDriverScenarioClient(context); + const startObservedIndex = context.observedEvents.length; + const triggerBody = params.triggerBodyBuilder(context.sutUserId, params.finalText); + const driverEventId = await client.sendTextMessage({ + body: triggerBody, + mentionUserIds: [context.sutUserId], + roomId: context.roomId, + }); + const preview = await client.waitForRoomEvent({ + observedEvents: context.observedEvents, + predicate: (event) => + event.roomId === context.roomId && + event.sender === context.sutUserId && + event.kind === params.expectedPreviewKind && + event.relatesTo === undefined && + /\bWorking\b/i.test(event.body ?? ""), + roomId: context.roomId, + since: startSince, + timeoutMs: context.timeoutMs, + }); + const progress = params.progressPattern.test(preview.event.body ?? "") + ? preview + : await client.waitForRoomEvent({ + observedEvents: context.observedEvents, + predicate: (event) => + event.roomId === context.roomId && + event.sender === context.sutUserId && + event.kind === params.expectedPreviewKind && + event.relatesTo?.relType === "m.replace" && + event.relatesTo.eventId === preview.event.eventId && + /\bWorking\b/i.test(event.body ?? "") && + params.progressPattern.test(event.body ?? ""), + roomId: context.roomId, + since: preview.since, + timeoutMs: context.timeoutMs, + }); + + if (params.mentionSafety) { + assertMatrixQaToolProgressMentionsInert(progress.event); + } + + const finalized = await client.waitForRoomEvent({ + observedEvents: context.observedEvents, + predicate: (event) => + event.roomId === context.roomId && + event.sender === context.sutUserId && + isMatrixQaMessageLikeKind(event.kind) && + event.relatesTo?.relType === "m.replace" && + event.relatesTo.eventId === preview.event.eventId && + event.body === params.finalText, + roomId: context.roomId, + since: progress.since, + timeoutMs: context.timeoutMs, + }); + const unexpectedWorkingEvents = findMatrixQaUnexpectedWorkingEvents({ + events: context.observedEvents, + finalEventId: finalized.event.eventId, + previewEventId: preview.event.eventId, + startIndex: startObservedIndex, + sutUserId: context.sutUserId, + }); + if (unexpectedWorkingEvents.length > 0) { + throw new Error( + `Matrix tool progress leaked outside preview event: ${unexpectedWorkingEvents.map((event) => `${event.eventId}:${event.body ?? ""}`).join("; ")}`, + ); + } + advanceMatrixQaActorCursor({ + actorId: "driver", + syncState: context.syncState, + nextSince: finalized.since, + startSince, + }); + const finalReply = buildMatrixReplyArtifact(finalized.event, params.finalText); + return { + artifacts: { + driverEventId, + previewBodyPreview: progress.event.body?.slice(0, 200), + previewEventId: preview.event.eventId, + previewFormattedBodyPreview: progress.event.formattedBody?.slice(0, 200), + previewMentions: progress.event.mentions, + reply: finalReply, + token: params.finalText, + triggerBody, + }, + details: [ + `driver event: ${driverEventId}`, + `scenario: ${params.label}`, + `preview event: ${preview.event.eventId}`, + `preview kind: ${progress.event.kind}`, + `preview body: ${progress.event.body ?? ""}`, + `preview mentions: ${JSON.stringify(progress.event.mentions ?? {})}`, + `final reply relation: ${finalized.event.relatesTo?.relType ?? ""}`, + `final reply target: ${finalized.event.relatesTo?.eventId ?? ""}`, + ...buildMatrixReplyDetails("final reply", finalReply), + ].join("\n"), + } satisfies MatrixQaScenarioExecution; +} + +export async function runToolProgressPreviewScenario(context: MatrixQaScenarioContext) { + return runMatrixToolProgressScenario(context, { + expectedPreviewKind: "notice", + finalText: buildMatrixQaToken("MATRIX_QA_TOOL_PROGRESS"), + label: "tool progress preview", + progressPattern: /\btool:\s*read\b/i, + triggerBodyBuilder: buildMatrixToolProgressPrompt, + }); +} + +export async function runToolProgressErrorScenario(context: MatrixQaScenarioContext) { + return runMatrixToolProgressScenario(context, { + expectedPreviewKind: "notice", + finalText: buildMatrixQaToken("MATRIX_QA_TOOL_PROGRESS_ERROR"), + label: "tool progress error", + progressPattern: /read from missing-matrix-tool-progress-target\.txt/i, + triggerBodyBuilder: buildMatrixToolProgressErrorPrompt, + }); +} + +export async function runToolProgressMentionSafetyScenario(context: MatrixQaScenarioContext) { + return runMatrixToolProgressScenario(context, { + expectedPreviewKind: "message", + finalText: buildMatrixQaToken("MATRIX_QA_TOOL_PROGRESS_MENTION_SAFE"), + label: "tool progress mention safety", + mentionSafety: true, + progressPattern: /@room|@alice:matrix-qa\.test|!room:matrix-qa\.test/i, + triggerBodyBuilder: buildMatrixToolProgressMentionSafetyPrompt, + }); +} + +export async function runToolProgressPreviewOptOutScenario(context: MatrixQaScenarioContext) { + const { client, startSince } = await primeMatrixQaDriverScenarioClient(context); + const startObservedIndex = context.observedEvents.length; + const finalText = buildMatrixQaToken("MATRIX_QA_TOOL_PROGRESS_OPTOUT"); + const triggerBody = buildMatrixToolProgressPrompt(context.sutUserId, finalText); + const driverEventId = await client.sendTextMessage({ + body: triggerBody, + mentionUserIds: [context.sutUserId], + roomId: context.roomId, + }); + const finalized = await client.waitForRoomEvent({ + observedEvents: context.observedEvents, + predicate: (event) => + event.roomId === context.roomId && + event.sender === context.sutUserId && + isMatrixQaMessageLikeKind(event.kind) && + event.body === finalText, + roomId: context.roomId, + since: startSince, + timeoutMs: context.timeoutMs, + }); + const unexpectedPreviewProgressEvents = context.observedEvents + .slice(startObservedIndex) + .filter( + (event) => + event.sender === context.sutUserId && + event.type === "m.room.message" && + event.eventId !== finalized.event.eventId && + /^Working\.\.\.\n-/i.test(event.body ?? ""), + ); + if (unexpectedPreviewProgressEvents.length > 0) { + throw new Error( + `Matrix tool-progress opt-out still emitted preview progress: ${unexpectedPreviewProgressEvents.map((event) => `${event.eventId}:${event.body ?? ""}`).join("; ")}`, + ); + } + advanceMatrixQaActorCursor({ + actorId: "driver", + syncState: context.syncState, + nextSince: finalized.since, + startSince, + }); const finalReply = buildMatrixReplyArtifact(finalized.event, finalText); return { artifacts: { driverEventId, - previewBodyPreview: preview.event.body?.slice(0, 200), - previewEventId: preview.event.eventId, reply: finalReply, token: finalText, triggerBody, }, details: [ `driver event: ${driverEventId}`, - `preview event: ${preview.event.eventId}`, - `preview kind: ${preview.event.kind}`, - `preview body: ${preview.event.body ?? ""}`, - `final reply relation: ${finalized.event.relatesTo?.relType ?? ""}`, - `final reply target: ${finalized.event.relatesTo?.eventId ?? ""}`, + "scenario: tool progress preview opt-out", + "preview progress events: 0", ...buildMatrixReplyDetails("final reply", finalReply), ].join("\n"), } satisfies MatrixQaScenarioExecution; diff --git a/extensions/qa-matrix/src/runners/contract/scenario-runtime-shared.ts b/extensions/qa-matrix/src/runners/contract/scenario-runtime-shared.ts index a655ccde885..0a595203c36 100644 --- a/extensions/qa-matrix/src/runners/contract/scenario-runtime-shared.ts +++ b/extensions/qa-matrix/src/runners/contract/scenario-runtime-shared.ts @@ -81,6 +81,31 @@ export function buildMatrixQuietStreamingPrompt(sutUserId: string, text: string) return `${sutUserId} Quiet streaming QA check: reply exactly \`${text}\`.`; } +export function buildMatrixPartialStreamingPrompt(sutUserId: string, text: string) { + return `${sutUserId} Partial streaming QA check: reply exactly \`${text}\`.`; +} + +export function buildMatrixToolProgressPrompt(sutUserId: string, text: string) { + return [ + `${sutUserId} Tool progress QA check: read \`QA_KICKOFF_TASK.md\` before answering.`, + `After the read completes, reply exactly \`${text}\`.`, + ].join(" "); +} + +export function buildMatrixToolProgressErrorPrompt(sutUserId: string, text: string) { + return [ + `${sutUserId} Tool progress error QA check: read \`missing-matrix-tool-progress-target.txt\` before answering.`, + `After the read fails, reply exactly \`${text}\`.`, + ].join(" "); +} + +export function buildMatrixToolProgressMentionSafetyPrompt(sutUserId: string, text: string) { + return [ + `${sutUserId} Tool progress QA check: read \`matrix-progress-@room-@alice:matrix-qa.test-!room:matrix-qa.test.txt\` before answering.`, + `After the read completes, reply exactly \`${text}\`.`, + ].join(" "); +} + export function buildMatrixBlockStreamingPrompt( sutUserId: string, firstText: string, diff --git a/extensions/qa-matrix/src/runners/contract/scenario-runtime.ts b/extensions/qa-matrix/src/runners/contract/scenario-runtime.ts index ca41bca453a..a67d21b56ce 100644 --- a/extensions/qa-matrix/src/runners/contract/scenario-runtime.ts +++ b/extensions/qa-matrix/src/runners/contract/scenario-runtime.ts @@ -77,6 +77,7 @@ import { runMatrixQaCanary, runMembershipLossScenario, runObserverAllowlistOverrideScenario, + runPartialStreamingPreviewScenario, runQuietStreamingPreviewScenario, runReactionThreadedScenario, runRoomAutoJoinInviteScenario, @@ -86,6 +87,10 @@ import { runThreadIsolationScenario, runThreadNestedReplyShapeScenario, runThreadRootPreservationScenario, + runToolProgressErrorScenario, + runToolProgressMentionSafetyScenario, + runToolProgressPreviewOptOutScenario, + runToolProgressPreviewScenario, runTopLevelReplyShapeScenario, } from "./scenario-runtime-room.js"; import { @@ -203,8 +208,18 @@ export async function runMatrixQaScenario( return await runTopLevelReplyShapeScenario(context); case "matrix-room-thread-reply-override": return await runRoomThreadReplyOverrideScenario(context); + case "matrix-room-partial-streaming-preview": + return await runPartialStreamingPreviewScenario(context); case "matrix-room-quiet-streaming-preview": return await runQuietStreamingPreviewScenario(context); + case "matrix-room-tool-progress-preview": + return await runToolProgressPreviewScenario(context); + case "matrix-room-tool-progress-preview-opt-out": + return await runToolProgressPreviewOptOutScenario(context); + case "matrix-room-tool-progress-error": + return await runToolProgressErrorScenario(context); + case "matrix-room-tool-progress-mention-safety": + return await runToolProgressMentionSafetyScenario(context); case "matrix-room-block-streaming": return await runBlockStreamingScenario(context); case "matrix-room-image-understanding-attachment": diff --git a/extensions/qa-matrix/src/runners/contract/scenario-types.ts b/extensions/qa-matrix/src/runners/contract/scenario-types.ts index 1e1215c3b57..a9176ede86b 100644 --- a/extensions/qa-matrix/src/runners/contract/scenario-types.ts +++ b/extensions/qa-matrix/src/runners/contract/scenario-types.ts @@ -93,6 +93,8 @@ export type MatrixQaScenarioArtifacts = { noticeEventId?: string; previewBodyPreview?: string; previewEventId?: string; + previewFormattedBodyPreview?: string; + previewMentions?: MatrixQaObservedEvent["mentions"]; blockEventIds?: string[]; bootstrapActor?: "driver" | "observer" | "sut"; bootstrapErrorPreview?: string; diff --git a/extensions/qa-matrix/src/runners/contract/scenarios.test.ts b/extensions/qa-matrix/src/runners/contract/scenarios.test.ts index 951e93201fe..5e1caa825a4 100644 --- a/extensions/qa-matrix/src/runners/contract/scenarios.test.ts +++ b/extensions/qa-matrix/src/runners/contract/scenarios.test.ts @@ -84,6 +84,48 @@ function matrixQaScenarioContext(): MatrixQaScenarioContext { }; } +function matrixQaMessageEvent( + overrides: Partial & + Pick, +): MatrixQaObservedEvent { + return { + roomId: "!main:matrix-qa.test", + sender: "@sut:matrix-qa.test", + type: "m.room.message", + ...overrides, + }; +} + +function readMatrixQaReplyDirective(body: unknown, fallback: string) { + return /reply exactly `([^`]+)`/.exec(String(body))?.[1] ?? fallback; +} + +function mockMatrixQaRoomClient(params: { + driverEventId: string; + events: Array<{ + event: + | MatrixQaObservedEvent + | ((client: { sendTextMessage: ReturnType }) => MatrixQaObservedEvent); + since: string; + }>; +}) { + const primeRoom = vi.fn().mockResolvedValue("driver-sync-start"); + const sendTextMessage = vi.fn().mockResolvedValue(params.driverEventId); + const waitForRoomEvent = vi.fn(); + for (const entry of params.events) { + waitForRoomEvent.mockImplementationOnce(async () => ({ + event: typeof entry.event === "function" ? entry.event({ sendTextMessage }) : entry.event, + since: entry.since, + })); + } + createMatrixQaClient.mockReturnValue({ + primeRoom, + sendTextMessage, + waitForRoomEvent, + }); + return { primeRoom, sendTextMessage, waitForRoomEvent }; +} + function mockMatrixQaCliAccount(params: { accessToken: string; deviceId: string; @@ -165,7 +207,12 @@ describe("matrix live qa scenarios", () => { "matrix-subagent-thread-spawn", "matrix-top-level-reply-shape", "matrix-room-thread-reply-override", + "matrix-room-partial-streaming-preview", "matrix-room-quiet-streaming-preview", + "matrix-room-tool-progress-preview", + "matrix-room-tool-progress-preview-opt-out", + "matrix-room-tool-progress-error", + "matrix-room-tool-progress-mention-safety", "matrix-room-block-streaming", "matrix-room-image-understanding-attachment", "matrix-room-generated-image-delivery", @@ -2243,6 +2290,269 @@ describe("matrix live qa scenarios", () => { ); }); + it("captures partial preview text messages before the finalized Matrix reply", async () => { + const previewEventId = "$partial-preview"; + const fallbackFinalText = "MATRIX_QA_PARTIAL_STREAM_PREVIEW_COMPLETE"; + const { sendTextMessage } = mockMatrixQaRoomClient({ + driverEventId: "$partial-stream-trigger", + events: [ + { + event: matrixQaMessageEvent({ + kind: "message", + eventId: previewEventId, + body: "partial preview", + }), + since: "driver-sync-preview", + }, + { + event: ({ sendTextMessage }) => + matrixQaMessageEvent({ + kind: "message", + eventId: "$partial-final", + body: readMatrixQaReplyDirective( + sendTextMessage.mock.calls[0]?.[0]?.body, + fallbackFinalText, + ), + relatesTo: { + relType: "m.replace", + eventId: previewEventId, + }, + }), + since: "driver-sync-next", + }, + ], + }); + + const scenario = MATRIX_QA_SCENARIOS.find( + (entry) => entry.id === "matrix-room-partial-streaming-preview", + ); + expect(scenario).toBeDefined(); + + await expect(runMatrixQaScenario(scenario!, matrixQaScenarioContext())).resolves.toMatchObject({ + artifacts: { + driverEventId: "$partial-stream-trigger", + previewEventId: "$partial-preview", + reply: { + eventId: "$partial-final", + }, + }, + }); + + expect(sendTextMessage).toHaveBeenCalledWith({ + body: expect.stringContaining("Partial streaming QA check"), + mentionUserIds: ["@sut:matrix-qa.test"], + roomId: "!main:matrix-qa.test", + }); + }); + + it("captures Matrix tool progress inside the quiet preview before finalizing", async () => { + const previewEventId = "$tool-progress-preview"; + mockMatrixQaRoomClient({ + driverEventId: "$tool-progress-trigger", + events: [ + { + event: matrixQaMessageEvent({ + kind: "notice", + eventId: previewEventId, + body: "Working...\n- `tool: read`", + }), + since: "driver-sync-preview", + }, + { + event: ({ sendTextMessage }) => + matrixQaMessageEvent({ + kind: "notice", + eventId: "$tool-progress-final", + body: readMatrixQaReplyDirective( + sendTextMessage.mock.calls[0]?.[0]?.body, + "MATRIX_QA_TOOL_PROGRESS_FIXED", + ), + relatesTo: { + relType: "m.replace", + eventId: previewEventId, + }, + }), + since: "driver-sync-next", + }, + ], + }); + + const scenario = MATRIX_QA_SCENARIOS.find( + (entry) => entry.id === "matrix-room-tool-progress-preview", + ); + expect(scenario).toBeDefined(); + + await expect(runMatrixQaScenario(scenario!, matrixQaScenarioContext())).resolves.toMatchObject({ + artifacts: { + driverEventId: "$tool-progress-trigger", + previewBodyPreview: "Working...\n- `tool: read`", + previewEventId: "$tool-progress-preview", + reply: { + eventId: "$tool-progress-final", + }, + }, + }); + }); + + it("keeps Matrix tool progress opt-out from creating Working previews", async () => { + const { waitForRoomEvent } = mockMatrixQaRoomClient({ + driverEventId: "$tool-progress-optout-trigger", + events: [ + { + event: ({ sendTextMessage }) => + matrixQaMessageEvent({ + kind: "message", + eventId: "$tool-progress-optout-final", + body: readMatrixQaReplyDirective( + sendTextMessage.mock.calls[0]?.[0]?.body, + "MATRIX_QA_TOOL_PROGRESS_OPTOUT_FIXED", + ), + }), + since: "driver-sync-next", + }, + ], + }); + + const scenario = MATRIX_QA_SCENARIOS.find( + (entry) => entry.id === "matrix-room-tool-progress-preview-opt-out", + ); + expect(scenario).toBeDefined(); + + await expect(runMatrixQaScenario(scenario!, matrixQaScenarioContext())).resolves.toMatchObject({ + artifacts: { + driverEventId: "$tool-progress-optout-trigger", + reply: { + eventId: "$tool-progress-optout-final", + }, + }, + }); + + expect(waitForRoomEvent).toHaveBeenCalledTimes(1); + }); + + it("finalizes Matrix tool progress previews after tool errors", async () => { + const previewEventId = "$tool-progress-error-preview"; + const { sendTextMessage } = mockMatrixQaRoomClient({ + driverEventId: "$tool-progress-error-trigger", + events: [ + { + event: matrixQaMessageEvent({ + kind: "notice", + eventId: previewEventId, + body: "Working...\n- `read from missing-matrix-tool-progress-target.txt`", + }), + since: "driver-sync-preview", + }, + { + event: ({ sendTextMessage }) => + matrixQaMessageEvent({ + kind: "notice", + eventId: "$tool-progress-error-final", + body: readMatrixQaReplyDirective( + sendTextMessage.mock.calls[0]?.[0]?.body, + "MATRIX_QA_TOOL_PROGRESS_ERROR_FIXED", + ), + relatesTo: { + relType: "m.replace", + eventId: previewEventId, + }, + }), + since: "driver-sync-next", + }, + ], + }); + + const scenario = MATRIX_QA_SCENARIOS.find( + (entry) => entry.id === "matrix-room-tool-progress-error", + ); + expect(scenario).toBeDefined(); + + await expect(runMatrixQaScenario(scenario!, matrixQaScenarioContext())).resolves.toMatchObject({ + artifacts: { + driverEventId: "$tool-progress-error-trigger", + previewBodyPreview: "Working...\n- `read from missing-matrix-tool-progress-target.txt`", + previewEventId: "$tool-progress-error-preview", + reply: { + eventId: "$tool-progress-error-final", + relatesTo: { + eventId: "$tool-progress-error-preview", + relType: "m.replace", + }, + }, + }, + }); + + expect(sendTextMessage).toHaveBeenCalledWith({ + body: expect.stringContaining("Tool progress error QA check"), + mentionUserIds: ["@sut:matrix-qa.test"], + roomId: "!main:matrix-qa.test", + }); + }); + + it("keeps Matrix-looking tool progress mentions inert in partial previews", async () => { + const previewEventId = "$tool-progress-mention-preview"; + mockMatrixQaRoomClient({ + driverEventId: "$tool-progress-mention-trigger", + events: [ + { + event: matrixQaMessageEvent({ + kind: "message", + eventId: previewEventId, + body: "Working...\n- `tool: read`", + }), + since: "driver-sync-preview", + }, + { + event: matrixQaMessageEvent({ + kind: "message", + eventId: "$tool-progress-mention-edit", + body: "Working...\n- `tool: read`\n- `read from matrix-progress-@room-@alice:matrix-qa.test-!room:matrix-qa.test.txt`", + formattedBody: + "Working...
  • read from matrix-progress-@room-@alice:matrix-qa.test-!room:matrix-qa.test.txt
", + mentions: {}, + relatesTo: { + relType: "m.replace", + eventId: previewEventId, + }, + }), + since: "driver-sync-progress", + }, + { + event: ({ sendTextMessage }) => + matrixQaMessageEvent({ + kind: "message", + eventId: "$tool-progress-mention-final", + body: readMatrixQaReplyDirective( + sendTextMessage.mock.calls[0]?.[0]?.body, + "MATRIX_QA_TOOL_PROGRESS_MENTION_SAFE_FIXED", + ), + relatesTo: { + relType: "m.replace", + eventId: previewEventId, + }, + }), + since: "driver-sync-next", + }, + ], + }); + + const scenario = MATRIX_QA_SCENARIOS.find( + (entry) => entry.id === "matrix-room-tool-progress-mention-safety", + ); + expect(scenario).toBeDefined(); + + await expect(runMatrixQaScenario(scenario!, matrixQaScenarioContext())).resolves.toMatchObject({ + artifacts: { + driverEventId: "$tool-progress-mention-trigger", + previewEventId: "$tool-progress-mention-preview", + previewMentions: {}, + reply: { + eventId: "$tool-progress-mention-final", + }, + }, + }); + }); + it("preserves separate finalized block events when Matrix block streaming is enabled", async () => { const primeRoom = vi.fn().mockResolvedValue("driver-sync-start"); const sendTextMessage = vi.fn().mockResolvedValue("$block-stream-trigger"); diff --git a/extensions/qa-matrix/src/substrate/config.test.ts b/extensions/qa-matrix/src/substrate/config.test.ts index 4423ead4df4..34f06089469 100644 --- a/extensions/qa-matrix/src/substrate/config.test.ts +++ b/extensions/qa-matrix/src/substrate/config.test.ts @@ -254,11 +254,39 @@ describe("matrix qa config", () => { }, replyToMode: "off", streaming: "partial", + streamingPreviewToolProgress: true, threadBindings: {}, threadReplies: "inbound", }); expect(summarizeMatrixQaConfigSnapshot(snapshot)).toContain("autoJoin=allowlist"); expect(summarizeMatrixQaConfigSnapshot(snapshot)).toContain("streaming=partial"); + expect(summarizeMatrixQaConfigSnapshot(snapshot)).toContain( + "streaming.preview.toolProgress=true", + ); + }); + + it("builds Matrix QA config snapshots from structured streaming overrides", () => { + const snapshot = buildMatrixQaConfigSnapshot({ + driverUserId: "@driver:matrix-qa.test", + observerUserId: "@observer:matrix-qa.test", + overrides: { + streaming: { + mode: "quiet", + preview: { + toolProgress: false, + }, + }, + }, + sutUserId: "@sut:matrix-qa.test", + topology, + }); + + expect(snapshot.streaming).toBe("quiet"); + expect(snapshot.streamingPreviewToolProgress).toBe(false); + expect(summarizeMatrixQaConfigSnapshot(snapshot)).toContain("streaming=quiet"); + expect(summarizeMatrixQaConfigSnapshot(snapshot)).toContain( + "streaming.preview.toolProgress=false", + ); }); it("resolves role-based Matrix sender allowlist overrides", () => { diff --git a/extensions/qa-matrix/src/substrate/config.ts b/extensions/qa-matrix/src/substrate/config.ts index af5019a2584..7fb5eff47e4 100644 --- a/extensions/qa-matrix/src/substrate/config.ts +++ b/extensions/qa-matrix/src/substrate/config.ts @@ -9,6 +9,13 @@ export type MatrixQaAutoJoinMode = "allowlist" | "always" | "off"; export type MatrixQaStreamingMode = "off" | "partial" | "quiet"; export type MatrixQaActorRole = "driver" | "observer" | "sut"; +export type MatrixQaStreamingConfig = { + mode?: MatrixQaStreamingMode; + preview?: { + toolProgress?: boolean; + }; +}; + export type MatrixQaAgentDefaultsOverrides = { blockStreamingChunk?: { breakPreference?: "newline" | "paragraph" | "sentence"; @@ -62,7 +69,7 @@ export type MatrixQaConfigOverrides = { groupsByKey?: Record; replyToMode?: MatrixQaReplyToMode; startupVerification?: "if-unverified" | "off"; - streaming?: "off" | "partial" | "quiet" | boolean; + streaming?: MatrixQaStreamingMode | MatrixQaStreamingConfig | boolean; threadBindings?: MatrixQaThreadBindingsConfigOverrides; threadReplies?: MatrixQaThreadRepliesMode; toolProfile?: "coding" | "messaging" | "minimal"; @@ -86,6 +93,7 @@ export type MatrixQaConfigSnapshot = { replyToMode: MatrixQaReplyToMode; startupVerification?: "if-unverified" | "off"; streaming: MatrixQaStreamingMode; + streamingPreviewToolProgress: boolean; threadBindings: MatrixQaThreadBindingsConfigOverrides; threadReplies: MatrixQaThreadRepliesMode; }; @@ -205,9 +213,29 @@ function resolveMatrixQaStreamingMode( if (value === "quiet") { return "quiet"; } + if (isMatrixQaStreamingConfig(value)) { + if (value.mode === "partial" || value.mode === "quiet") { + return value.mode; + } + } return "off"; } +function isMatrixQaStreamingConfig( + value: MatrixQaConfigOverrides["streaming"], +): value is MatrixQaStreamingConfig { + return Boolean(value && typeof value === "object" && !Array.isArray(value)); +} + +function resolveMatrixQaStreamingPreviewToolProgress( + value: MatrixQaConfigOverrides["streaming"], +): boolean { + if (!isMatrixQaStreamingConfig(value)) { + return true; + } + return value.preview?.toolProgress ?? true; +} + function resolveMatrixQaAutoJoinAllowlist(params: { overrides?: MatrixQaConfigOverrides }) { if (params.overrides?.autoJoin !== "allowlist") { return []; @@ -352,6 +380,9 @@ export function buildMatrixQaConfigSnapshot(params: { replyToMode: params.overrides?.replyToMode ?? "off", startupVerification: params.overrides?.startupVerification, streaming: resolveMatrixQaStreamingMode(params.overrides?.streaming), + streamingPreviewToolProgress: resolveMatrixQaStreamingPreviewToolProgress( + params.overrides?.streaming, + ), threadBindings: { ...params.overrides?.threadBindings }, threadReplies: params.overrides?.threadReplies ?? "inbound", }; @@ -366,6 +397,7 @@ export function summarizeMatrixQaConfigSnapshot(snapshot: MatrixQaConfigSnapshot `dm.sessionScope=${snapshot.dm.sessionScope}`, `dm.threadReplies=${snapshot.dm.threadReplies}`, `streaming=${snapshot.streaming}`, + `streaming.preview.toolProgress=${formatMatrixQaBoolean(snapshot.streamingPreviewToolProgress)}`, `blockStreaming=${formatMatrixQaBoolean(snapshot.blockStreaming)}`, `autoJoin=${snapshot.autoJoin}`, `encryption=${formatMatrixQaBoolean(snapshot.encryption)}`,