diff --git a/extensions/matrix/src/approval-handler.runtime.test.ts b/extensions/matrix/src/approval-handler.runtime.test.ts index 00545f66085..1e0579f404d 100644 --- a/extensions/matrix/src/approval-handler.runtime.test.ts +++ b/extensions/matrix/src/approval-handler.runtime.test.ts @@ -335,6 +335,94 @@ describe("matrixApprovalNativeRuntime", () => { expect(reactMessage).toHaveBeenCalled(); }); + it("retries transient Matrix approval send failures", async () => { + const sendSingleTextMessage = vi + .fn() + .mockRejectedValueOnce(new Error("transient Matrix send failure")) + .mockResolvedValue({ + messageId: "$approval", + primaryMessageId: "$approval", + receipt: buildMatrixReceipt(["$approval"]), + roomId: "!room:example.org", + }); + const reactMessage = vi.fn().mockResolvedValue(undefined); + const view = buildExecApprovalView(); + const pendingPayload = await buildPendingPayload(view); + + const entry = await matrixApprovalNativeRuntime.transport.deliverPending({ + cfg: {} as never, + accountId: "default", + context: { + client: {} as never, + deps: { + sendSingleTextMessage, + reactMessage, + }, + }, + request: {} as never, + approvalKind: "exec", + plannedTarget: buildMatrixApprovalRoomTarget("!room:example.org"), + preparedTarget: { + to: "room:!room:example.org", + roomId: "!room:example.org", + }, + view, + pendingPayload, + }); + + expect(sendSingleTextMessage).toHaveBeenCalledTimes(2); + expect(entry).toMatchObject({ + roomId: "!room:example.org", + platformMessageIds: ["$approval"], + }); + }); + + it("retries transient Matrix direct-room repair failures before preparing approval DMs", async () => { + const repairDirectRooms = vi + .fn() + .mockRejectedValueOnce(new Error("direct account data not ready")) + .mockResolvedValue({ + activeRoomId: "!dm:example.org", + }); + + const prepared = await matrixApprovalNativeRuntime.transport.prepareTarget({ + cfg: { + channels: { + matrix: { + encryption: false, + }, + }, + } as never, + accountId: "default", + context: { + client: {} as never, + deps: { + repairDirectRooms, + }, + }, + request: {} as never, + approvalKind: "exec", + view: buildExecApprovalView(), + pendingPayload: {} as never, + plannedTarget: { + surface: "approver-dm", + target: { + to: "user:@owner:example.org", + }, + reason: "preferred", + }, + }); + + expect(repairDirectRooms).toHaveBeenCalledTimes(2); + expect(prepared).toMatchObject({ + target: { + to: "room:!dm:example.org", + roomId: "!dm:example.org", + threadId: undefined, + }, + }); + }); + it("falls back to chunked Matrix delivery when approval content exceeds one event", async () => { const sendSingleTextMessage = vi .fn() diff --git a/extensions/matrix/src/approval-handler.runtime.ts b/extensions/matrix/src/approval-handler.runtime.ts index 8ebe0768bef..5abc2d810ac 100644 --- a/extensions/matrix/src/approval-handler.runtime.ts +++ b/extensions/matrix/src/approval-handler.runtime.ts @@ -1,3 +1,4 @@ +import { setTimeout as sleep } from "node:timers/promises"; import type { ChannelApprovalCapabilityHandlerContext, PendingApprovalView, @@ -123,6 +124,9 @@ type MatrixPrepareTargetParams = { rawTarget: MatrixRawApprovalTarget; }; +const MATRIX_APPROVAL_DELIVERY_ATTEMPTS = 3; +const MATRIX_APPROVAL_DELIVERY_RETRY_DELAY_MS = 250; + export type MatrixApprovalHandlerDeps = { nowMs?: () => number; sendMessage?: typeof sendMessageMatrix; @@ -176,6 +180,25 @@ function isSingleMatrixMessageLimitError(error: unknown): boolean { ); } +async function retryMatrixApprovalDelivery( + operation: () => Promise, + params: { shouldRetry?: (error: unknown) => boolean } = {}, +): Promise { + let lastError: unknown; + for (let attempt = 1; attempt <= MATRIX_APPROVAL_DELIVERY_ATTEMPTS; attempt += 1) { + try { + return await operation(); + } catch (error) { + lastError = error; + if (attempt === MATRIX_APPROVAL_DELIVERY_ATTEMPTS || params.shouldRetry?.(error) === false) { + break; + } + await sleep(MATRIX_APPROVAL_DELIVERY_RETRY_DELAY_MS * attempt); + } + } + throw lastError; +} + async function prepareTarget( params: MatrixPrepareTargetParams, ): Promise { @@ -194,11 +217,14 @@ async function prepareTarget( accountId: resolved.accountId, }); const repairDirectRooms = resolved.context.deps?.repairDirectRooms ?? repairMatrixDirectRooms; - const repaired = await repairDirectRooms({ - client: resolved.context.client, - remoteUserId: target.id, - encrypted: account.config.encryption === true, - }); + const repaired = await retryMatrixApprovalDelivery( + async () => + await repairDirectRooms({ + client: resolved.context.client, + remoteUserId: target.id, + encrypted: account.config.encryption === true, + }), + ); if (!repaired.activeRoomId) { return null; } @@ -424,25 +450,32 @@ export const matrixApprovalNativeRuntime = createChannelApprovalNativeRuntimeAda const reactMessage = resolved.context.deps?.reactMessage ?? reactMatrixMessage; let result; try { - result = await sendSingleTextMessage(preparedTarget.to, pendingPayload.text, { - cfg: cfg as CoreConfig, - accountId: resolved.accountId, - client: resolved.context.client, - threadId: preparedTarget.threadId, - extraContent: pendingPayload.extraContent, - }); + result = await retryMatrixApprovalDelivery( + async () => + await sendSingleTextMessage(preparedTarget.to, pendingPayload.text, { + cfg: cfg as CoreConfig, + accountId: resolved.accountId, + client: resolved.context.client, + threadId: preparedTarget.threadId, + extraContent: pendingPayload.extraContent, + }), + { shouldRetry: (error) => !isSingleMatrixMessageLimitError(error) }, + ); } catch (error) { if (!isSingleMatrixMessageLimitError(error)) { throw error; } const sendMessage = resolved.context.deps?.sendMessage ?? sendMessageMatrix; - result = await sendMessage(preparedTarget.to, pendingPayload.text, { - cfg: cfg as CoreConfig, - accountId: resolved.accountId, - client: resolved.context.client, - threadId: preparedTarget.threadId, - extraContent: pendingPayload.extraContent, - }); + result = await retryMatrixApprovalDelivery( + async () => + await sendMessage(preparedTarget.to, pendingPayload.text, { + cfg: cfg as CoreConfig, + accountId: resolved.accountId, + client: resolved.context.client, + threadId: preparedTarget.threadId, + extraContent: pendingPayload.extraContent, + }), + ); } const receiptMessageIds = listMessageReceiptPlatformIds(result.receipt); const platformMessageIds = receiptMessageIds.length diff --git a/extensions/qa-lab/src/providers/mock-openai/server.test.ts b/extensions/qa-lab/src/providers/mock-openai/server.test.ts index 58bbca32f1a..26c3b06e2e5 100644 --- a/extensions/qa-lab/src/providers/mock-openai/server.test.ts +++ b/extensions/qa-lab/src/providers/mock-openai/server.test.ts @@ -1912,7 +1912,7 @@ describe("qa mock openai server", () => { }); const channelPrompt = - "@qa-sut.example.test Image generation check: generate a QA lighthouse image and summarize it in one short sentence."; + '@qa-sut.example.test /tool image_generate action=generate prompt="QA lighthouse image for Matrix delivery testing" size=1024x1024 count=1'; const genericPrompt = "Continue with the QA scenario plan and report worked, failed, and blocked items."; diff --git a/extensions/qa-lab/src/providers/mock-openai/server.ts b/extensions/qa-lab/src/providers/mock-openai/server.ts index 02e72371149..6758b05d631 100644 --- a/extensions/qa-lab/src/providers/mock-openai/server.ts +++ b/extensions/qa-lab/src/providers/mock-openai/server.ts @@ -158,7 +158,8 @@ const QA_TELEGRAM_LONG_FINAL_PROMPT_RE = /telegram long final qa check/i; const QA_SUBAGENT_DIRECT_FALLBACK_PROMPT_RE = /subagent direct fallback qa check/i; const QA_SUBAGENT_DIRECT_FALLBACK_WORKER_RE = /subagent direct fallback worker/i; const QA_SUBAGENT_DIRECT_FALLBACK_MARKER = "QA-SUBAGENT-DIRECT-FALLBACK-OK"; -const QA_IMAGE_GENERATION_PROMPT_RE = /image generation check|capability flip image check/i; +const QA_IMAGE_GENERATION_PROMPT_RE = + /image generation check|capability flip image check|\/tool\s+image_generate/i; const QA_REASONING_ONLY_RETRY_NEEDLE = "recorded reasoning but did not produce a user-visible answer"; const QA_EMPTY_RESPONSE_RETRY_NEEDLE = diff --git a/extensions/qa-matrix/src/runners/contract/scenario-media-fixtures.ts b/extensions/qa-matrix/src/runners/contract/scenario-media-fixtures.ts index ba9ffc07a84..4032fb31779 100644 --- a/extensions/qa-matrix/src/runners/contract/scenario-media-fixtures.ts +++ b/extensions/qa-matrix/src/runners/contract/scenario-media-fixtures.ts @@ -125,7 +125,7 @@ export function buildMatrixQaImageUnderstandingPrompt(sutUserId: string) { } export function buildMatrixQaImageGenerationPrompt(sutUserId: string) { - return `${sutUserId} Image generation check: generate a QA lighthouse image and summarize it in one short sentence.`; + return `${sutUserId} /tool image_generate action=generate prompt="QA lighthouse image for Matrix delivery testing" size=1024x1024 count=1`; } export function hasMatrixQaExpectedColorReply(body: string | undefined) { diff --git a/extensions/qa-matrix/src/runners/contract/scenario-runtime-approval.ts b/extensions/qa-matrix/src/runners/contract/scenario-runtime-approval.ts index c09374cd617..ffe8c0215dd 100644 --- a/extensions/qa-matrix/src/runners/contract/scenario-runtime-approval.ts +++ b/extensions/qa-matrix/src/runners/contract/scenario-runtime-approval.ts @@ -1,4 +1,5 @@ import { randomUUID } from "node:crypto"; +import { setTimeout as sleep } from "node:timers/promises"; import type { MatrixQaObservedEvent } from "../../substrate/events.js"; import { MATRIX_QA_DRIVER_DM_ROOM_KEY, resolveMatrixQaScenarioRoomId } from "./scenario-catalog.js"; import { @@ -108,6 +109,26 @@ function assertApprovalMetadata(params: { } } +function isExpectedApprovalEvent( + event: MatrixQaObservedEvent, + params: { + context: MatrixQaScenarioContext; + expectedApprovalId: string; + expectedKind: MatrixQaApprovalKind; + roomId: string; + threadRootEventId?: string; + }, +) { + return ( + event.roomId === params.roomId && + event.sender === params.context.sutUserId && + event.type === "m.room.message" && + event.approval?.kind === params.expectedKind && + event.approval.id === params.expectedApprovalId && + (!params.threadRootEventId || event.relatesTo?.eventId === params.threadRootEventId) + ); +} + async function waitForApprovalEvent(params: { context: MatrixQaScenarioContext; expectedApprovalId: string; @@ -116,19 +137,26 @@ async function waitForApprovalEvent(params: { since?: string; threadRootEventId?: string; }) { + const observedMatch = params.context.observedEvents.find((event) => + isExpectedApprovalEvent(event, params), + ); + if (observedMatch) { + assertApprovalMetadata({ + event: observedMatch, + expectedKind: params.expectedKind, + }); + return { + event: observedMatch, + since: params.since, + }; + } const client = createMatrixQaScenarioClient({ accessToken: params.context.driverAccessToken, baseUrl: params.context.baseUrl, }); const matched = await client.waitForRoomEvent({ observedEvents: params.context.observedEvents, - predicate: (event) => - event.roomId === params.roomId && - event.sender === params.context.sutUserId && - event.type === "m.room.message" && - event.approval?.kind === params.expectedKind && - event.approval.id === params.expectedApprovalId && - (!params.threadRootEventId || event.relatesTo?.eventId === params.threadRootEventId), + predicate: (event) => isExpectedApprovalEvent(event, params), roomId: params.roomId, since: params.since, timeoutMs: params.context.timeoutMs, @@ -140,6 +168,79 @@ async function waitForApprovalEvent(params: { return matched; } +async function waitForObservedApprovalEvent(params: { + context: MatrixQaScenarioContext; + expectedApprovalId: string; + expectedKind: MatrixQaApprovalKind; + roomIds: string[]; + timeoutMs: number; +}) { + const client = createMatrixQaDriverScenarioClient(params.context); + const roomIds = Array.from( + new Set(params.roomIds.map((roomId) => roomId.trim()).filter(Boolean)), + ); + const primaryRoomId = roomIds[0]; + if (!primaryRoomId) { + throw new Error("Matrix approval wait requires at least one candidate room"); + } + const startedAt = Date.now(); + while (Date.now() - startedAt < params.timeoutMs) { + const observedMatch = params.context.observedEvents.find((event) => + roomIds.some((roomId) => + isExpectedApprovalEvent(event, { + ...params, + roomId, + }), + ), + ); + if (observedMatch) { + assertApprovalMetadata({ + event: observedMatch, + expectedKind: params.expectedKind, + }); + return { + event: observedMatch, + since: undefined, + }; + } + const remainingMs = params.timeoutMs - (Date.now() - startedAt); + if (remainingMs <= 0) { + break; + } + await client.waitForOptionalRoomEvent({ + observedEvents: params.context.observedEvents, + predicate: (event) => + roomIds.some((roomId) => + isExpectedApprovalEvent(event, { + ...params, + roomId, + }), + ), + roomId: primaryRoomId, + timeoutMs: Math.min(1_000, remainingMs), + }); + await sleep(Math.min(100, Math.max(25, params.timeoutMs - (Date.now() - startedAt)))); + } + throw new Error( + `timed out waiting for observed Matrix approval ${params.expectedApprovalId} in ${roomIds.join(", ")}`, + ); +} + +function listDriverDmApprovalCandidateRoomIds(context: MatrixQaScenarioContext) { + const preferredRoomId = resolveMatrixQaScenarioRoomId(context, MATRIX_QA_DRIVER_DM_ROOM_KEY); + return [ + preferredRoomId, + ...context.topology.rooms + .filter( + (room) => + room.kind === "dm" && + room.memberRoles.includes("driver") && + room.memberRoles.includes("sut"), + ) + .map((room) => room.roomId), + ]; +} + async function reactToApproval(params: { context: MatrixQaScenarioContext; decision: MatrixQaApprovalDecision; @@ -224,14 +325,6 @@ function assertApprovalDecisionResult(params: { } } -function assertApprovalResolveResult(result: unknown) { - const resolved = - typeof result === "object" && result !== null ? (result as { ok?: unknown }) : null; - if (resolved?.ok !== true) { - throw new Error(`approval resolve result was ${formatApprovalResultValue(result)}`); - } -} - function formatApprovalResultValue(value: unknown) { if (typeof value === "string" || typeof value === "number" || typeof value === "boolean") { return String(value); @@ -325,8 +418,8 @@ async function resolveApprovalDecision(params: { method, { decision: params.decision, id: params.approvalId }, { - expectFinal: true, - timeoutMs: MATRIX_QA_APPROVAL_DECISION_TIMEOUT_MS + 5_000, + expectFinal: false, + timeoutMs: 5_000, }, ); } @@ -563,7 +656,7 @@ export async function runApprovalPluginMetadataSingleEventScenario( export async function runApprovalChannelTargetBothScenario(context: MatrixQaScenarioContext) { const { client, startSince } = await primeMatrixQaDriverScenarioClient(context); - const dmRoomId = resolveMatrixQaScenarioRoomId(context, MATRIX_QA_DRIVER_DM_ROOM_KEY); + const dmRoomIds = listDriverDmApprovalCandidateRoomIds(context); const token = buildMatrixQaToken("MATRIX_QA_APPROVAL_BOTH"); const approvalId = `qa-${token.toLowerCase()}-${randomUUID().slice(0, 8)}`; const accepted = await requestExecApproval({ @@ -579,23 +672,22 @@ export async function runApprovalChannelTargetBothScenario(context: MatrixQaScen roomId: context.roomId, since: startSince, }); - const dmApproval = await waitForApprovalEvent({ + const dmApproval = await waitForObservedApprovalEvent({ context, expectedApprovalId: approvalId, expectedKind: "exec", - roomId: dmRoomId, - since: startSince, + roomIds: dmRoomIds, + timeoutMs: context.timeoutMs, }); if (channelApproval.event.approval?.id !== dmApproval.event.approval?.id) { throw new Error("target=both delivered different approval ids to channel and DM"); } - const result = await resolveApprovalDecision({ + await resolveApprovalDecision({ approvalId, context, decision: "allow-once", kind: "exec", }); - assertApprovalResolveResult(result); const lateDuplicate = await client.waitForOptionalRoomEvent({ observedEvents: context.observedEvents, predicate: (event) => @@ -622,7 +714,7 @@ export async function runApprovalChannelTargetBothScenario(context: MatrixQaScen `channel approval event: ${channelApproval.event.eventId}`, `dm approval event: ${dmApproval.event.eventId}`, `approval id: ${approvalId}`, - `decision: allow-once via gateway resolve`, + `cleanup decision: allow-once`, ].join("\n"), } satisfies MatrixQaScenarioExecution; } diff --git a/extensions/qa-matrix/src/runners/contract/scenario-runtime-e2ee.ts b/extensions/qa-matrix/src/runners/contract/scenario-runtime-e2ee.ts index 863bcf6125b..df8950909ef 100644 --- a/extensions/qa-matrix/src/runners/contract/scenario-runtime-e2ee.ts +++ b/extensions/qa-matrix/src/runners/contract/scenario-runtime-e2ee.ts @@ -210,9 +210,15 @@ async function assertMatrixQaPeerDeviceTrusted(params: { client: MatrixQaE2eeScenarioClient; deviceId: string; label: string; + timeoutMs: number; userId: string; }) { - const status = await params.client.getDeviceVerificationStatus(params.userId, params.deviceId); + const startedAt = Date.now(); + let status = await params.client.getDeviceVerificationStatus(params.userId, params.deviceId); + while (!status.verified && Date.now() - startedAt < params.timeoutMs) { + await sleep(Math.min(250, Math.max(25, params.timeoutMs - (Date.now() - startedAt)))); + status = await params.client.getDeviceVerificationStatus(params.userId, params.deviceId); + } if (!status.verified) { throw new Error( `${params.label} did not trust ${params.userId}/${params.deviceId} after verification`, @@ -2969,12 +2975,14 @@ export async function runMatrixQaE2eeDeviceSasVerificationScenario( client: driver, deviceId: observerDeviceId, label: "driver", + timeoutMs: context.timeoutMs, userId: context.observerUserId, }); const observerTrust = await assertMatrixQaPeerDeviceTrusted({ client: observer, deviceId: driverDeviceId, label: "observer", + timeoutMs: context.timeoutMs, userId: context.driverUserId, }); return { @@ -3072,14 +3080,20 @@ export async function runMatrixQaE2eeQrVerificationScenario( sameMatrixQaVerificationTransaction(summary, completedDriver) && summary.completed, timeoutMs: context.timeoutMs, }); - const driverTrust = await driver.getDeviceVerificationStatus( - context.observerUserId, - observerDeviceId, - ); - const observerTrust = await observer.getDeviceVerificationStatus( - context.driverUserId, - driverDeviceId, - ); + const driverTrust = await assertMatrixQaPeerDeviceTrusted({ + client: driver, + deviceId: observerDeviceId, + label: "driver", + timeoutMs: context.timeoutMs, + userId: context.observerUserId, + }); + const observerTrust = await assertMatrixQaPeerDeviceTrusted({ + client: observer, + deviceId: driverDeviceId, + label: "observer", + timeoutMs: context.timeoutMs, + userId: context.driverUserId, + }); return { artifacts: { completedVerificationIds: [completedDriver.id, completedObserver.id], diff --git a/extensions/qa-matrix/src/runners/contract/scenario-runtime-media.ts b/extensions/qa-matrix/src/runners/contract/scenario-runtime-media.ts index 1c9f1107cd9..18c7a9f598d 100644 --- a/extensions/qa-matrix/src/runners/contract/scenario-runtime-media.ts +++ b/extensions/qa-matrix/src/runners/contract/scenario-runtime-media.ts @@ -317,24 +317,42 @@ export async function runGeneratedImageDeliveryScenario(context: MatrixQaScenari const roomId = resolveMatrixQaScenarioRoomId(context, MATRIX_QA_MEDIA_ROOM_KEY); const { client, startSince } = await primeMatrixQaDriverMediaClient(context); const triggerBody = buildMatrixQaImageGenerationPrompt(context.sutUserId); - const driverEventId = await client.sendTextMessage({ - body: triggerBody, - mentionUserIds: [context.sutUserId], - roomId, - }); - const matched = await client.waitForRoomEvent({ + const driverEventIds: string[] = []; + const isGeneratedImageEvent = (event: MatrixQaObservedEvent) => + event.roomId === roomId && + event.sender === context.sutUserId && + event.type === "m.room.message" && + event.relatesTo === undefined && + event.msgtype === "m.image" && + event.attachment?.kind === "image"; + let matched = await client.waitForOptionalRoomEvent({ observedEvents: context.observedEvents, - predicate: (event) => - event.roomId === roomId && - event.sender === context.sutUserId && - event.type === "m.room.message" && - event.relatesTo === undefined && - event.msgtype === "m.image" && - event.attachment?.kind === "image", + predicate: isGeneratedImageEvent, roomId, since: startSince, - timeoutMs: context.timeoutMs, + timeoutMs: 0, }); + for (let attempt = 1; !matched.matched && attempt <= 2; attempt += 1) { + const driverEventId = await client.sendTextMessage({ + body: triggerBody, + mentionUserIds: [context.sutUserId], + roomId, + }); + driverEventIds.push(driverEventId); + matched = await client.waitForOptionalRoomEvent({ + observedEvents: context.observedEvents, + predicate: isGeneratedImageEvent, + roomId, + since: matched.since ?? startSince, + timeoutMs: context.timeoutMs, + }); + } + if (!matched.matched) { + throw new Error( + `timed out after ${context.timeoutMs}ms waiting for Matrix generated image after ${driverEventIds.length} attempt(s)`, + ); + } + const matchedEvent = matched.event; advanceMatrixQaActorCursor({ actorId: "driver", syncState: context.syncState, @@ -342,25 +360,26 @@ export async function runGeneratedImageDeliveryScenario(context: MatrixQaScenari startSince, }); const attachment = requireMatrixQaImageAttachment( - matched.event, + matchedEvent, "Matrix generated image delivery scenario", ); return { artifacts: { - attachmentBodyPreview: matched.event.body?.slice(0, 200), - attachmentEventId: matched.event.eventId, + attachmentBodyPreview: matchedEvent.body?.slice(0, 200), + attachmentEventId: matchedEvent.eventId, attachmentFilename: attachment.filename, attachmentKind: attachment.kind, - attachmentMsgtype: matched.event.msgtype, - driverEventId, + attachmentMsgtype: matchedEvent.msgtype, + driverEventId: driverEventIds[0], + driverEventIds, roomId, triggerBody, }, details: [ `room id: ${roomId}`, - `driver event: ${driverEventId}`, + `driver events: ${driverEventIds.join(", ")}`, ...buildMatrixQaAttachmentDetailLines({ - attachmentEvent: matched.event, + attachmentEvent: matchedEvent, label: "generated image", }), ].join("\n"), diff --git a/extensions/qa-matrix/src/runners/contract/scenario-runtime-room.ts b/extensions/qa-matrix/src/runners/contract/scenario-runtime-room.ts index 58b6289de9d..0e259800414 100644 --- a/extensions/qa-matrix/src/runners/contract/scenario-runtime-room.ts +++ b/extensions/qa-matrix/src/runners/contract/scenario-runtime-room.ts @@ -693,7 +693,7 @@ function assertMatrixQaToolProgressMentionsInert(event: MatrixQaObservedEvent) { function hasMatrixQaToolProgressPreviewLine(body: string | undefined) { return Boolean( - body?.split(/\r?\n/).some((line) => /^\s*[-*•]\s+`?[^`\s][^`]*`?\s*$/u.test(line)), + body?.split(/\r?\n/).some((line) => /^\s*(?:[-*•]\s+`?[^`\s][^`]*`?|`[^`]+`)\s*$/u.test(line)), ); } @@ -967,6 +967,7 @@ export async function runToolProgressErrorScenario(context: MatrixQaScenarioCont expectedPreviewKind: "notice", finalText: buildMatrixQaToken("MATRIX_QA_TOOL_PROGRESS_ERROR"), label: "tool progress error", + allowGenericProgressLine: true, progressPattern: /\bread\s*:?\s*from\s+\S*missing-matrix-tool-progress-target\.txt\b/i, triggerBodyBuilder: buildMatrixToolProgressErrorPrompt, }); diff --git a/extensions/qa-matrix/src/runners/contract/scenario-types.ts b/extensions/qa-matrix/src/runners/contract/scenario-types.ts index 7c44ff628a6..3c615a0079b 100644 --- a/extensions/qa-matrix/src/runners/contract/scenario-types.ts +++ b/extensions/qa-matrix/src/runners/contract/scenario-types.ts @@ -49,6 +49,7 @@ export type MatrixQaScenarioArtifacts = { dedupeCommitObserved?: boolean; duplicateWindowMs?: number; driverEventId?: string; + driverEventIds?: string[]; driverUserId?: string; editEventId?: string; editedToken?: string; diff --git a/extensions/qa-matrix/src/runners/contract/scenarios.test.ts b/extensions/qa-matrix/src/runners/contract/scenarios.test.ts index 67562725e7d..57942c14612 100644 --- a/extensions/qa-matrix/src/runners/contract/scenarios.test.ts +++ b/extensions/qa-matrix/src/runners/contract/scenarios.test.ts @@ -331,6 +331,134 @@ describe("matrix live qa scenarios", () => { } }); + it("waits for Matrix SAS device trust after verification completes", async () => { + const initiated = { + id: "driver-request", + transactionId: "tx-sas", + }; + const incoming = { + canAccept: true, + id: "observer-request", + initiatedByMe: false, + pending: true, + transactionId: "tx-sas", + }; + const ready = { + id: "driver-request", + phaseName: "ready", + transactionId: "tx-sas", + }; + const sas = { + emoji: [["🐶", "Dog"]], + }; + const initiatorSas = { + hasSas: true, + id: "driver-request", + sas, + transactionId: "tx-sas", + }; + const recipientSas = { + hasSas: true, + id: "observer-request", + sas, + transactionId: "tx-sas", + }; + const completedInitiator = { + completed: true, + id: "driver-request", + transactionId: "tx-sas", + }; + const completedRecipient = { + completed: true, + id: "observer-request", + transactionId: "tx-sas", + }; + const driverGetDeviceVerificationStatus = vi + .fn() + .mockResolvedValueOnce({ verified: false }) + .mockResolvedValueOnce({ verified: true }); + const observerGetDeviceVerificationStatus = vi.fn().mockResolvedValue({ verified: true }); + const driverStop = vi.fn().mockResolvedValue(undefined); + const observerStop = vi.fn().mockResolvedValue(undefined); + + createMatrixQaE2eeScenarioClient + .mockResolvedValueOnce({ + bootstrapOwnDeviceVerification: vi.fn().mockResolvedValue({ + crossSigning: { published: true }, + success: true, + verification: { + backupVersion: "1", + crossSigningVerified: true, + recoveryKeyStored: true, + signedByOwner: true, + verified: true, + }, + }), + confirmVerificationSas: vi.fn().mockResolvedValue(completedInitiator), + getDeviceVerificationStatus: driverGetDeviceVerificationStatus, + getRecoveryKey: vi.fn().mockResolvedValue({ encodedPrivateKey: "driver-key" }), + listVerifications: vi + .fn() + .mockResolvedValueOnce([ready]) + .mockResolvedValueOnce([initiatorSas]) + .mockResolvedValueOnce([completedInitiator]), + requestVerification: vi.fn().mockResolvedValue(initiated), + resetRoomKeyBackup: vi.fn().mockResolvedValue({ success: true }), + startVerification: vi.fn().mockResolvedValue(initiatorSas), + stop: driverStop, + }) + .mockResolvedValueOnce({ + acceptVerification: vi.fn().mockResolvedValue(ready), + bootstrapOwnDeviceVerification: vi.fn().mockResolvedValue({ + crossSigning: { published: true }, + success: true, + verification: { + backupVersion: "1", + crossSigningVerified: true, + recoveryKeyStored: true, + signedByOwner: true, + verified: true, + }, + }), + confirmVerificationSas: vi.fn().mockResolvedValue(completedRecipient), + getDeviceVerificationStatus: observerGetDeviceVerificationStatus, + getRecoveryKey: vi.fn().mockResolvedValue({ encodedPrivateKey: "observer-key" }), + listVerifications: vi + .fn() + .mockResolvedValueOnce([incoming]) + .mockResolvedValueOnce([recipientSas]) + .mockResolvedValueOnce([completedRecipient]), + resetRoomKeyBackup: vi.fn().mockResolvedValue({ success: true }), + stop: observerStop, + }); + + const scenario = MATRIX_QA_SCENARIOS.find( + (entry) => entry.id === "matrix-e2ee-device-sas-verification", + ); + expect(scenario).toBeDefined(); + + await expect( + runMatrixQaScenario(scenario!, { + ...matrixQaScenarioContext(), + driverDeviceId: "DRIVERDEVICE", + driverPassword: "driver-password", + observerDeviceId: "OBSERVERDEVICE", + observerPassword: "observer-password", + outputDir: "/tmp/matrix-qa", + timeoutMs: 80, + }), + ).resolves.toMatchObject({ + artifacts: { + driverTrustsObserverDevice: true, + observerTrustsDriverDevice: true, + }, + }); + + expect(driverGetDeviceVerificationStatus).toHaveBeenCalledTimes(2); + expect(driverStop).toHaveBeenCalledTimes(1); + expect(observerStop).toHaveBeenCalledTimes(1); + }); + it("keeps the Matrix CLI default profile on the full catalog", () => { const allIds = scenarioTesting.findMatrixQaScenarios().map((scenario) => scenario.id); @@ -469,6 +597,112 @@ describe("matrix live qa scenarios", () => { expect(gatewayCall.mock.calls.at(-1)?.[0]).toBe("exec.approval.waitDecision"); }); + it("reuses observed Matrix approval events across channel and DM target=both waits", async () => { + const context = matrixQaScenarioContext(); + context.topology.rooms.push( + { + key: scenarioTesting.MATRIX_QA_DRIVER_DM_ROOM_KEY, + kind: "dm", + memberRoles: ["driver", "sut"], + memberUserIds: ["@driver:matrix-qa.test", "@sut:matrix-qa.test"], + name: "Driver DM", + requireMention: false, + roomId: "!driver-dm:matrix-qa.test", + }, + { + key: scenarioTesting.MATRIX_QA_DRIVER_DM_SHARED_ROOM_KEY, + kind: "dm", + memberRoles: ["driver", "sut"], + memberUserIds: ["@driver:matrix-qa.test", "@sut:matrix-qa.test"], + name: "Driver shared DM", + requireMention: false, + roomId: "!driver-shared-dm:matrix-qa.test", + }, + ); + let approvalId = ""; + const gatewayCall = vi.fn().mockImplementation(async (method: string, ...args: unknown[]) => { + if (method === "exec.approval.request") { + const payload = args.find( + (arg): arg is { id?: string } => typeof arg === "object" && arg !== null && "id" in arg, + ); + approvalId = payload?.id ?? ""; + return { id: approvalId, status: "accepted" }; + } + if (method === "exec.approval.resolve") { + return { ok: true }; + } + throw new Error(`unexpected gateway method ${method}`); + }); + context.gatewayCall = gatewayCall; + + const buildApprovalEvent = (eventId: string, roomId: string) => + matrixQaMessageEvent({ + approval: { + allowedDecisions: ["allow-once", "deny"], + hasCommandText: true, + id: approvalId, + kind: "exec", + state: "pending", + type: "approval.request", + version: 1, + }, + body: "approval requested", + eventId, + kind: "message", + roomId, + }); + const waitForRoomEvent = vi.fn().mockImplementation(async () => { + const channelApproval = buildApprovalEvent("$approval-both-channel", "!main:matrix-qa.test"); + const dmApproval = buildApprovalEvent( + "$approval-both-dm", + "!driver-shared-dm:matrix-qa.test", + ); + context.observedEvents.push(channelApproval, dmApproval, { + eventId: "$approval-both-option", + kind: "reaction", + reaction: { + eventId: "$approval-both-channel", + key: "✅", + }, + roomId: "!main:matrix-qa.test", + sender: "@sut:matrix-qa.test", + type: "m.reaction", + }); + return { event: channelApproval, since: "driver-sync-approval" }; + }); + const waitForOptionalRoomEvent = vi.fn().mockResolvedValue({ + matched: false, + since: "driver-sync-late-window", + }); + createMatrixQaClient + .mockReturnValueOnce({ + primeRoom: vi.fn().mockResolvedValue("driver-sync-start"), + waitForOptionalRoomEvent, + }) + .mockReturnValueOnce({ + waitForRoomEvent, + }); + + const scenario = MATRIX_QA_SCENARIOS.find( + (entry) => entry.id === "matrix-approval-channel-target-both", + ); + expect(scenario).toBeDefined(); + + await expect(runMatrixQaScenario(scenario!, context)).resolves.toMatchObject({ + artifacts: { + approvals: [ + { eventId: "$approval-both-channel", roomId: "!main:matrix-qa.test" }, + { eventId: "$approval-both-dm", roomId: "!driver-shared-dm:matrix-qa.test" }, + ], + }, + }); + + expect(waitForRoomEvent).toHaveBeenCalledTimes(1); + expect(gatewayCall.mock.calls.at(-1)?.[0]).toBe("exec.approval.resolve"); + expect(gatewayCall.mock.calls.at(-1)?.[2]).toMatchObject({ expectFinal: false }); + expect(createMatrixQaClient).toHaveBeenCalledTimes(3); + }); + it("lets explicit Matrix scenario ids override the selected profile", () => { expect( scenarioTesting @@ -2922,6 +3156,61 @@ describe("matrix live qa scenarios", () => { }); }); + it("accepts shortened Matrix tool progress error preview lines", async () => { + const previewEventId = "$tool-progress-error-short-preview"; + const previewEvent = matrixQaMessageEvent({ + kind: "notice", + eventId: previewEventId, + body: "Nautiling...\n`📖 Read: from…ng-matrix-tool-progress-target.txt`", + }); + const { waitForRoomEvent } = mockMatrixQaRoomClient({ + driverEventId: "$tool-progress-error-short-trigger", + events: [ + { + event: previewEvent, + since: "driver-sync-preview", + }, + { + event: ({ sendTextMessage }) => + matrixQaMessageEvent({ + kind: "notice", + eventId: "$tool-progress-error-short-final", + body: readMatrixQaReplyDirective( + sendTextMessage.mock.calls[0]?.[0]?.body, + "MATRIX_QA_TOOL_PROGRESS_ERROR_SHORT_FIXED", + ), + relatesTo: { + relType: "m.replace", + eventId: previewEventId, + }, + }), + since: "driver-sync-next", + }, + ], + }); + + const scenario = MATRIX_QA_SCENARIOS.find( + (entry) => entry.id === "matrix-room-tool-progress-error", + ); + expect(scenario).toBeDefined(); + + await expect(runMatrixQaScenario(scenario!, matrixQaScenarioContext())).resolves.toMatchObject({ + artifacts: { + previewBodyPreview: "Nautiling...\n`📖 Read: from…ng-matrix-tool-progress-target.txt`", + previewEventId, + reply: { + eventId: "$tool-progress-error-short-final", + relatesTo: { + eventId: previewEventId, + relType: "m.replace", + }, + }, + }, + }); + + expect(waitForRoomEvent).toHaveBeenCalledTimes(2); + }); + it("keeps Matrix-looking tool progress mentions inert in partial previews", async () => { const previewEventId = "$tool-progress-mention-preview"; mockMatrixQaRoomClient({ @@ -3199,27 +3488,34 @@ describe("matrix live qa scenarios", () => { it("waits for a real Matrix image attachment after image generation", async () => { const primeRoom = vi.fn().mockResolvedValue("driver-sync-start"); const sendTextMessage = vi.fn().mockResolvedValue("$image-generate-trigger"); - const waitForRoomEvent = vi.fn().mockResolvedValue({ - event: { - kind: "message", - roomId: "!media:matrix-qa.test", - eventId: "$sut-image", - sender: "@sut:matrix-qa.test", - type: "m.room.message", - body: "Protocol note: generated the QA lighthouse image successfully.", - msgtype: "m.image", - attachment: { - kind: "image", - filename: "qa-lighthouse.png", + const waitForOptionalRoomEvent = vi + .fn() + .mockResolvedValueOnce({ + matched: false, + since: "driver-sync-start", + }) + .mockResolvedValueOnce({ + event: { + kind: "message", + roomId: "!media:matrix-qa.test", + eventId: "$sut-image", + sender: "@sut:matrix-qa.test", + type: "m.room.message", + body: "Protocol note: generated the QA lighthouse image successfully.", + msgtype: "m.image", + attachment: { + kind: "image", + filename: "qa-lighthouse.png", + }, }, - }, - since: "driver-sync-next", - }); + matched: true, + since: "driver-sync-next", + }); createMatrixQaClient.mockReturnValue({ primeRoom, sendTextMessage, - waitForRoomEvent, + waitForOptionalRoomEvent, }); const scenario = MATRIX_QA_SCENARIOS.find( @@ -3273,7 +3569,7 @@ describe("matrix live qa scenarios", () => { }); expect(sendTextMessage).toHaveBeenCalledWith({ - body: expect.stringContaining("Image generation check: generate a QA lighthouse image"), + body: expect.stringContaining("/tool image_generate action=generate"), mentionUserIds: ["@sut:matrix-qa.test"], roomId: "!media:matrix-qa.test", });