From 8a1e2202734476d79b462bbdf66e6291c26104d7 Mon Sep 17 00:00:00 2001 From: Vincent Koc Date: Sun, 3 May 2026 16:04:51 -0700 Subject: [PATCH] test(qa): relax Matrix tool progress matching --- .../runners/contract/scenario-runtime-room.ts | 113 +++++++++++++++--- .../src/runners/contract/scenarios.test.ts | 105 ++++++++++++++++ 2 files changed, 203 insertions(+), 15 deletions(-) diff --git a/extensions/qa-matrix/src/runners/contract/scenario-runtime-room.ts b/extensions/qa-matrix/src/runners/contract/scenario-runtime-room.ts index 5725fd8f82f..bfadbedca6b 100644 --- a/extensions/qa-matrix/src/runners/contract/scenario-runtime-room.ts +++ b/extensions/qa-matrix/src/runners/contract/scenario-runtime-room.ts @@ -690,12 +690,77 @@ function assertMatrixQaToolProgressMentionsInert(event: MatrixQaObservedEvent) { } } +function hasMatrixQaToolProgressPreviewLine(body: string | undefined) { + return Boolean( + body?.split(/\r?\n/).some((line) => /^\s*[-*•]\s+`?[^`\s][^`]*`?\s*$/u.test(line)), + ); +} + +function truncateMatrixQaToolProgressBody(body: string | undefined) { + if (!body) { + return ""; + } + return body.length <= 240 ? body : `${body.slice(0, 237)}...`; +} + +function describeMatrixQaToolProgressCandidate(event: MatrixQaObservedEvent) { + const relation = event.relatesTo?.relType + ? `${event.relatesTo.relType}:${event.relatesTo.eventId ?? ""}` + : ""; + return [ + `${event.eventId} kind=${event.kind}`, + `relation=${relation}`, + `body=${JSON.stringify(truncateMatrixQaToolProgressBody(event.body))}`, + ].join(" "); +} + +function buildMatrixQaToolProgressTimeoutMessage(params: { + cause: unknown; + events: MatrixQaObservedEvent[]; + expectedPreviewKind: MatrixQaObservedEvent["kind"]; + previewEventId: string; + roomId: string; + startIndex: number; + sutUserId: string; +}) { + const candidates = params.events + .slice(params.startIndex) + .filter((event) => { + if ( + event.roomId !== params.roomId || + event.sender !== params.sutUserId || + event.type !== "m.room.message" || + event.kind !== params.expectedPreviewKind + ) { + return false; + } + return ( + event.eventId === params.previewEventId || + event.relatesTo?.eventId === params.previewEventId || + /\bWorking\b/i.test(event.body ?? "") + ); + }) + .slice(-8); + const candidateDetails = + candidates.length === 0 + ? ["observed preview candidates: "] + : ["observed preview candidates:", ...candidates.map(describeMatrixQaToolProgressCandidate)]; + return [ + params.cause instanceof Error + ? params.cause.message + : `Matrix tool progress wait failed: ${String(params.cause)}`, + `preview event: ${params.previewEventId}`, + ...candidateDetails, + ].join("\n"); +} + async function runMatrixToolProgressScenario( context: MatrixQaScenarioContext, params: { expectedPreviewKind: MatrixQaObservedEvent["kind"]; finalText: string; label: string; + allowGenericProgressLine?: boolean; mentionSafety?: boolean; progressPattern: RegExp; triggerBodyBuilder: (sutUserId: string, finalText: string) => string; @@ -721,22 +786,39 @@ async function runMatrixToolProgressScenario( since: startSince, timeoutMs: context.timeoutMs, }); - const progress = params.progressPattern.test(preview.event.body ?? "") + const matchesExpectedProgress = (body: string | undefined) => + params.progressPattern.test(body ?? "") || + (params.allowGenericProgressLine === true && hasMatrixQaToolProgressPreviewLine(body)); + const progress = matchesExpectedProgress(preview.event.body) ? preview - : await client.waitForRoomEvent({ - observedEvents: context.observedEvents, - predicate: (event) => - event.roomId === context.roomId && - event.sender === context.sutUserId && - event.kind === params.expectedPreviewKind && - event.relatesTo?.relType === "m.replace" && - event.relatesTo.eventId === preview.event.eventId && - /\bWorking\b/i.test(event.body ?? "") && - params.progressPattern.test(event.body ?? ""), - roomId: context.roomId, - since: preview.since, - timeoutMs: context.timeoutMs, - }); + : await client + .waitForRoomEvent({ + observedEvents: context.observedEvents, + predicate: (event) => + event.roomId === context.roomId && + event.sender === context.sutUserId && + event.kind === params.expectedPreviewKind && + event.relatesTo?.relType === "m.replace" && + event.relatesTo.eventId === preview.event.eventId && + /\bWorking\b/i.test(event.body ?? "") && + matchesExpectedProgress(event.body), + roomId: context.roomId, + since: preview.since, + timeoutMs: context.timeoutMs, + }) + .catch((err: unknown) => { + throw new Error( + buildMatrixQaToolProgressTimeoutMessage({ + cause: err, + events: context.observedEvents, + expectedPreviewKind: params.expectedPreviewKind, + previewEventId: preview.event.eventId, + roomId: context.roomId, + startIndex: startObservedIndex, + sutUserId: context.sutUserId, + }), + ); + }); if (params.mentionSafety) { assertMatrixQaToolProgressMentionsInert(progress.event); @@ -804,6 +886,7 @@ export async function runToolProgressPreviewScenario(context: MatrixQaScenarioCo expectedPreviewKind: "notice", finalText: buildMatrixQaToken("MATRIX_QA_TOOL_PROGRESS"), label: "tool progress preview", + allowGenericProgressLine: true, progressPattern: /\btool:\s*read\b/i, triggerBodyBuilder: buildMatrixToolProgressPrompt, }); diff --git a/extensions/qa-matrix/src/runners/contract/scenarios.test.ts b/extensions/qa-matrix/src/runners/contract/scenarios.test.ts index f7e99537d5b..8475c01d7d7 100644 --- a/extensions/qa-matrix/src/runners/contract/scenarios.test.ts +++ b/extensions/qa-matrix/src/runners/contract/scenarios.test.ts @@ -2554,6 +2554,111 @@ describe("matrix live qa scenarios", () => { }); }); + it("accepts non-read Matrix tool progress lines in quiet previews", async () => { + const previewEventId = "$tool-progress-generic-preview"; + mockMatrixQaRoomClient({ + driverEventId: "$tool-progress-generic-trigger", + events: [ + { + event: matrixQaMessageEvent({ + kind: "notice", + eventId: previewEventId, + body: "Working...", + }), + since: "driver-sync-preview", + }, + { + event: matrixQaMessageEvent({ + kind: "notice", + eventId: "$tool-progress-generic-update", + body: "Working...\n- `tool: exec_command`", + relatesTo: { + relType: "m.replace", + eventId: previewEventId, + }, + }), + since: "driver-sync-progress", + }, + { + event: ({ sendTextMessage }) => + matrixQaMessageEvent({ + kind: "notice", + eventId: "$tool-progress-generic-final", + body: readMatrixQaReplyDirective( + sendTextMessage.mock.calls[0]?.[0]?.body, + "MATRIX_QA_TOOL_PROGRESS_FIXED", + ), + relatesTo: { + relType: "m.replace", + eventId: previewEventId, + }, + }), + since: "driver-sync-next", + }, + ], + }); + + const scenario = MATRIX_QA_SCENARIOS.find( + (entry) => entry.id === "matrix-room-tool-progress-preview", + ); + expect(scenario).toBeDefined(); + + await expect(runMatrixQaScenario(scenario!, matrixQaScenarioContext())).resolves.toMatchObject({ + artifacts: { + driverEventId: "$tool-progress-generic-trigger", + previewBodyPreview: "Working...\n- `tool: exec_command`", + previewEventId: "$tool-progress-generic-preview", + reply: { + eventId: "$tool-progress-generic-final", + }, + }, + }); + }); + + it("reports Matrix tool progress preview candidates when the progress wait times out", async () => { + const previewEvent = matrixQaMessageEvent({ + kind: "notice", + eventId: "$tool-progress-timeout-preview", + body: "Working...", + }); + const updateEvent = matrixQaMessageEvent({ + kind: "notice", + eventId: "$tool-progress-timeout-update", + body: "Working...\nstill deciding", + relatesTo: { + relType: "m.replace", + eventId: previewEvent.eventId, + }, + }); + const context = matrixQaScenarioContext(); + const primeRoom = vi.fn().mockResolvedValue("driver-sync-start"); + const sendTextMessage = vi.fn().mockResolvedValue("$tool-progress-timeout-trigger"); + const waitForRoomEvent = vi + .fn() + .mockImplementationOnce(async () => { + context.observedEvents.push(previewEvent); + return { event: previewEvent, since: "driver-sync-preview" }; + }) + .mockImplementationOnce(async () => { + context.observedEvents.push(updateEvent); + throw new Error("timed out after 8000ms waiting for Matrix room event"); + }); + createMatrixQaClient.mockReturnValue({ + primeRoom, + sendTextMessage, + waitForRoomEvent, + }); + + const scenario = MATRIX_QA_SCENARIOS.find( + (entry) => entry.id === "matrix-room-tool-progress-preview", + ); + expect(scenario).toBeDefined(); + + await expect(runMatrixQaScenario(scenario!, context)).rejects.toThrow( + /observed preview candidates:[\s\S]*\$tool-progress-timeout-update/, + ); + }); + it("keeps Matrix tool progress opt-out from creating Working previews", async () => { const { waitForRoomEvent } = mockMatrixQaRoomClient({ driverEventId: "$tool-progress-optout-trigger",