test(qa): relax Matrix tool progress matching

This commit is contained in:
Vincent Koc
2026-05-03 16:04:51 -07:00
parent 250be27f64
commit 8a1e220273
2 changed files with 203 additions and 15 deletions

View File

@@ -690,12 +690,77 @@ function assertMatrixQaToolProgressMentionsInert(event: MatrixQaObservedEvent) {
}
}
function hasMatrixQaToolProgressPreviewLine(body: string | undefined) {
return Boolean(
body?.split(/\r?\n/).some((line) => /^\s*[-*]\s+`?[^`\s][^`]*`?\s*$/u.test(line)),
);
}
function truncateMatrixQaToolProgressBody(body: string | undefined) {
if (!body) {
return "<none>";
}
return body.length <= 240 ? body : `${body.slice(0, 237)}...`;
}
function describeMatrixQaToolProgressCandidate(event: MatrixQaObservedEvent) {
const relation = event.relatesTo?.relType
? `${event.relatesTo.relType}:${event.relatesTo.eventId ?? "<none>"}`
: "<none>";
return [
`${event.eventId} kind=${event.kind}`,
`relation=${relation}`,
`body=${JSON.stringify(truncateMatrixQaToolProgressBody(event.body))}`,
].join(" ");
}
function buildMatrixQaToolProgressTimeoutMessage(params: {
cause: unknown;
events: MatrixQaObservedEvent[];
expectedPreviewKind: MatrixQaObservedEvent["kind"];
previewEventId: string;
roomId: string;
startIndex: number;
sutUserId: string;
}) {
const candidates = params.events
.slice(params.startIndex)
.filter((event) => {
if (
event.roomId !== params.roomId ||
event.sender !== params.sutUserId ||
event.type !== "m.room.message" ||
event.kind !== params.expectedPreviewKind
) {
return false;
}
return (
event.eventId === params.previewEventId ||
event.relatesTo?.eventId === params.previewEventId ||
/\bWorking\b/i.test(event.body ?? "")
);
})
.slice(-8);
const candidateDetails =
candidates.length === 0
? ["observed preview candidates: <none>"]
: ["observed preview candidates:", ...candidates.map(describeMatrixQaToolProgressCandidate)];
return [
params.cause instanceof Error
? params.cause.message
: `Matrix tool progress wait failed: ${String(params.cause)}`,
`preview event: ${params.previewEventId}`,
...candidateDetails,
].join("\n");
}
async function runMatrixToolProgressScenario(
context: MatrixQaScenarioContext,
params: {
expectedPreviewKind: MatrixQaObservedEvent["kind"];
finalText: string;
label: string;
allowGenericProgressLine?: boolean;
mentionSafety?: boolean;
progressPattern: RegExp;
triggerBodyBuilder: (sutUserId: string, finalText: string) => string;
@@ -721,22 +786,39 @@ async function runMatrixToolProgressScenario(
since: startSince,
timeoutMs: context.timeoutMs,
});
const progress = params.progressPattern.test(preview.event.body ?? "")
const matchesExpectedProgress = (body: string | undefined) =>
params.progressPattern.test(body ?? "") ||
(params.allowGenericProgressLine === true && hasMatrixQaToolProgressPreviewLine(body));
const progress = matchesExpectedProgress(preview.event.body)
? preview
: await client.waitForRoomEvent({
observedEvents: context.observedEvents,
predicate: (event) =>
event.roomId === context.roomId &&
event.sender === context.sutUserId &&
event.kind === params.expectedPreviewKind &&
event.relatesTo?.relType === "m.replace" &&
event.relatesTo.eventId === preview.event.eventId &&
/\bWorking\b/i.test(event.body ?? "") &&
params.progressPattern.test(event.body ?? ""),
roomId: context.roomId,
since: preview.since,
timeoutMs: context.timeoutMs,
});
: await client
.waitForRoomEvent({
observedEvents: context.observedEvents,
predicate: (event) =>
event.roomId === context.roomId &&
event.sender === context.sutUserId &&
event.kind === params.expectedPreviewKind &&
event.relatesTo?.relType === "m.replace" &&
event.relatesTo.eventId === preview.event.eventId &&
/\bWorking\b/i.test(event.body ?? "") &&
matchesExpectedProgress(event.body),
roomId: context.roomId,
since: preview.since,
timeoutMs: context.timeoutMs,
})
.catch((err: unknown) => {
throw new Error(
buildMatrixQaToolProgressTimeoutMessage({
cause: err,
events: context.observedEvents,
expectedPreviewKind: params.expectedPreviewKind,
previewEventId: preview.event.eventId,
roomId: context.roomId,
startIndex: startObservedIndex,
sutUserId: context.sutUserId,
}),
);
});
if (params.mentionSafety) {
assertMatrixQaToolProgressMentionsInert(progress.event);
@@ -804,6 +886,7 @@ export async function runToolProgressPreviewScenario(context: MatrixQaScenarioCo
expectedPreviewKind: "notice",
finalText: buildMatrixQaToken("MATRIX_QA_TOOL_PROGRESS"),
label: "tool progress preview",
allowGenericProgressLine: true,
progressPattern: /\btool:\s*read\b/i,
triggerBodyBuilder: buildMatrixToolProgressPrompt,
});

View File

@@ -2554,6 +2554,111 @@ describe("matrix live qa scenarios", () => {
});
});
it("accepts non-read Matrix tool progress lines in quiet previews", async () => {
const previewEventId = "$tool-progress-generic-preview";
mockMatrixQaRoomClient({
driverEventId: "$tool-progress-generic-trigger",
events: [
{
event: matrixQaMessageEvent({
kind: "notice",
eventId: previewEventId,
body: "Working...",
}),
since: "driver-sync-preview",
},
{
event: matrixQaMessageEvent({
kind: "notice",
eventId: "$tool-progress-generic-update",
body: "Working...\n- `tool: exec_command`",
relatesTo: {
relType: "m.replace",
eventId: previewEventId,
},
}),
since: "driver-sync-progress",
},
{
event: ({ sendTextMessage }) =>
matrixQaMessageEvent({
kind: "notice",
eventId: "$tool-progress-generic-final",
body: readMatrixQaReplyDirective(
sendTextMessage.mock.calls[0]?.[0]?.body,
"MATRIX_QA_TOOL_PROGRESS_FIXED",
),
relatesTo: {
relType: "m.replace",
eventId: previewEventId,
},
}),
since: "driver-sync-next",
},
],
});
const scenario = MATRIX_QA_SCENARIOS.find(
(entry) => entry.id === "matrix-room-tool-progress-preview",
);
expect(scenario).toBeDefined();
await expect(runMatrixQaScenario(scenario!, matrixQaScenarioContext())).resolves.toMatchObject({
artifacts: {
driverEventId: "$tool-progress-generic-trigger",
previewBodyPreview: "Working...\n- `tool: exec_command`",
previewEventId: "$tool-progress-generic-preview",
reply: {
eventId: "$tool-progress-generic-final",
},
},
});
});
it("reports Matrix tool progress preview candidates when the progress wait times out", async () => {
const previewEvent = matrixQaMessageEvent({
kind: "notice",
eventId: "$tool-progress-timeout-preview",
body: "Working...",
});
const updateEvent = matrixQaMessageEvent({
kind: "notice",
eventId: "$tool-progress-timeout-update",
body: "Working...\nstill deciding",
relatesTo: {
relType: "m.replace",
eventId: previewEvent.eventId,
},
});
const context = matrixQaScenarioContext();
const primeRoom = vi.fn().mockResolvedValue("driver-sync-start");
const sendTextMessage = vi.fn().mockResolvedValue("$tool-progress-timeout-trigger");
const waitForRoomEvent = vi
.fn()
.mockImplementationOnce(async () => {
context.observedEvents.push(previewEvent);
return { event: previewEvent, since: "driver-sync-preview" };
})
.mockImplementationOnce(async () => {
context.observedEvents.push(updateEvent);
throw new Error("timed out after 8000ms waiting for Matrix room event");
});
createMatrixQaClient.mockReturnValue({
primeRoom,
sendTextMessage,
waitForRoomEvent,
});
const scenario = MATRIX_QA_SCENARIOS.find(
(entry) => entry.id === "matrix-room-tool-progress-preview",
);
expect(scenario).toBeDefined();
await expect(runMatrixQaScenario(scenario!, context)).rejects.toThrow(
/observed preview candidates:[\s\S]*\$tool-progress-timeout-update/,
);
});
it("keeps Matrix tool progress opt-out from creating Working previews", async () => {
const { waitForRoomEvent } = mockMatrixQaRoomClient({
driverEventId: "$tool-progress-optout-trigger",