fix: stabilize Matrix tool progress QA (#78179)

* fix: stabilize matrix tool progress QA

* fix: handle backtick matrix progress previews

* fix: reuse observed matrix approvals

* fix: retry matrix generated image QA

* fix: wait for matrix sas trust propagation

* fix: resolve matrix target both approvals by reaction

* fix: avoid matrix target both approval echo wait

* fix: reuse observed matrix target both dm approval

* fix: retry matrix approval delivery

* fix: accept active matrix approval dm

* test: align matrix approval retry receipt

* test: include matrix approval view in retry fixture
This commit is contained in:
Patrick Erichsen
2026-05-05 23:20:08 -07:00
committed by GitHub
parent eb4d654796
commit 5107384e67
11 changed files with 639 additions and 94 deletions

View File

@@ -335,6 +335,94 @@ describe("matrixApprovalNativeRuntime", () => {
expect(reactMessage).toHaveBeenCalled();
});
it("retries transient Matrix approval send failures", async () => {
const sendSingleTextMessage = vi
.fn()
.mockRejectedValueOnce(new Error("transient Matrix send failure"))
.mockResolvedValue({
messageId: "$approval",
primaryMessageId: "$approval",
receipt: buildMatrixReceipt(["$approval"]),
roomId: "!room:example.org",
});
const reactMessage = vi.fn().mockResolvedValue(undefined);
const view = buildExecApprovalView();
const pendingPayload = await buildPendingPayload(view);
const entry = await matrixApprovalNativeRuntime.transport.deliverPending({
cfg: {} as never,
accountId: "default",
context: {
client: {} as never,
deps: {
sendSingleTextMessage,
reactMessage,
},
},
request: {} as never,
approvalKind: "exec",
plannedTarget: buildMatrixApprovalRoomTarget("!room:example.org"),
preparedTarget: {
to: "room:!room:example.org",
roomId: "!room:example.org",
},
view,
pendingPayload,
});
expect(sendSingleTextMessage).toHaveBeenCalledTimes(2);
expect(entry).toMatchObject({
roomId: "!room:example.org",
platformMessageIds: ["$approval"],
});
});
it("retries transient Matrix direct-room repair failures before preparing approval DMs", async () => {
const repairDirectRooms = vi
.fn()
.mockRejectedValueOnce(new Error("direct account data not ready"))
.mockResolvedValue({
activeRoomId: "!dm:example.org",
});
const prepared = await matrixApprovalNativeRuntime.transport.prepareTarget({
cfg: {
channels: {
matrix: {
encryption: false,
},
},
} as never,
accountId: "default",
context: {
client: {} as never,
deps: {
repairDirectRooms,
},
},
request: {} as never,
approvalKind: "exec",
view: buildExecApprovalView(),
pendingPayload: {} as never,
plannedTarget: {
surface: "approver-dm",
target: {
to: "user:@owner:example.org",
},
reason: "preferred",
},
});
expect(repairDirectRooms).toHaveBeenCalledTimes(2);
expect(prepared).toMatchObject({
target: {
to: "room:!dm:example.org",
roomId: "!dm:example.org",
threadId: undefined,
},
});
});
it("falls back to chunked Matrix delivery when approval content exceeds one event", async () => {
const sendSingleTextMessage = vi
.fn()

View File

@@ -1,3 +1,4 @@
import { setTimeout as sleep } from "node:timers/promises";
import type {
ChannelApprovalCapabilityHandlerContext,
PendingApprovalView,
@@ -123,6 +124,9 @@ type MatrixPrepareTargetParams = {
rawTarget: MatrixRawApprovalTarget;
};
const MATRIX_APPROVAL_DELIVERY_ATTEMPTS = 3;
const MATRIX_APPROVAL_DELIVERY_RETRY_DELAY_MS = 250;
export type MatrixApprovalHandlerDeps = {
nowMs?: () => number;
sendMessage?: typeof sendMessageMatrix;
@@ -176,6 +180,25 @@ function isSingleMatrixMessageLimitError(error: unknown): boolean {
);
}
async function retryMatrixApprovalDelivery<T>(
operation: () => Promise<T>,
params: { shouldRetry?: (error: unknown) => boolean } = {},
): Promise<T> {
let lastError: unknown;
for (let attempt = 1; attempt <= MATRIX_APPROVAL_DELIVERY_ATTEMPTS; attempt += 1) {
try {
return await operation();
} catch (error) {
lastError = error;
if (attempt === MATRIX_APPROVAL_DELIVERY_ATTEMPTS || params.shouldRetry?.(error) === false) {
break;
}
await sleep(MATRIX_APPROVAL_DELIVERY_RETRY_DELAY_MS * attempt);
}
}
throw lastError;
}
async function prepareTarget(
params: MatrixPrepareTargetParams,
): Promise<PreparedMatrixTarget | null> {
@@ -194,11 +217,14 @@ async function prepareTarget(
accountId: resolved.accountId,
});
const repairDirectRooms = resolved.context.deps?.repairDirectRooms ?? repairMatrixDirectRooms;
const repaired = await repairDirectRooms({
client: resolved.context.client,
remoteUserId: target.id,
encrypted: account.config.encryption === true,
});
const repaired = await retryMatrixApprovalDelivery(
async () =>
await repairDirectRooms({
client: resolved.context.client,
remoteUserId: target.id,
encrypted: account.config.encryption === true,
}),
);
if (!repaired.activeRoomId) {
return null;
}
@@ -424,25 +450,32 @@ export const matrixApprovalNativeRuntime = createChannelApprovalNativeRuntimeAda
const reactMessage = resolved.context.deps?.reactMessage ?? reactMatrixMessage;
let result;
try {
result = await sendSingleTextMessage(preparedTarget.to, pendingPayload.text, {
cfg: cfg as CoreConfig,
accountId: resolved.accountId,
client: resolved.context.client,
threadId: preparedTarget.threadId,
extraContent: pendingPayload.extraContent,
});
result = await retryMatrixApprovalDelivery(
async () =>
await sendSingleTextMessage(preparedTarget.to, pendingPayload.text, {
cfg: cfg as CoreConfig,
accountId: resolved.accountId,
client: resolved.context.client,
threadId: preparedTarget.threadId,
extraContent: pendingPayload.extraContent,
}),
{ shouldRetry: (error) => !isSingleMatrixMessageLimitError(error) },
);
} catch (error) {
if (!isSingleMatrixMessageLimitError(error)) {
throw error;
}
const sendMessage = resolved.context.deps?.sendMessage ?? sendMessageMatrix;
result = await sendMessage(preparedTarget.to, pendingPayload.text, {
cfg: cfg as CoreConfig,
accountId: resolved.accountId,
client: resolved.context.client,
threadId: preparedTarget.threadId,
extraContent: pendingPayload.extraContent,
});
result = await retryMatrixApprovalDelivery(
async () =>
await sendMessage(preparedTarget.to, pendingPayload.text, {
cfg: cfg as CoreConfig,
accountId: resolved.accountId,
client: resolved.context.client,
threadId: preparedTarget.threadId,
extraContent: pendingPayload.extraContent,
}),
);
}
const receiptMessageIds = listMessageReceiptPlatformIds(result.receipt);
const platformMessageIds = receiptMessageIds.length

View File

@@ -1912,7 +1912,7 @@ describe("qa mock openai server", () => {
});
const channelPrompt =
"@qa-sut.example.test Image generation check: generate a QA lighthouse image and summarize it in one short sentence.";
'@qa-sut.example.test /tool image_generate action=generate prompt="QA lighthouse image for Matrix delivery testing" size=1024x1024 count=1';
const genericPrompt =
"Continue with the QA scenario plan and report worked, failed, and blocked items.";

View File

@@ -158,7 +158,8 @@ const QA_TELEGRAM_LONG_FINAL_PROMPT_RE = /telegram long final qa check/i;
const QA_SUBAGENT_DIRECT_FALLBACK_PROMPT_RE = /subagent direct fallback qa check/i;
const QA_SUBAGENT_DIRECT_FALLBACK_WORKER_RE = /subagent direct fallback worker/i;
const QA_SUBAGENT_DIRECT_FALLBACK_MARKER = "QA-SUBAGENT-DIRECT-FALLBACK-OK";
const QA_IMAGE_GENERATION_PROMPT_RE = /image generation check|capability flip image check/i;
const QA_IMAGE_GENERATION_PROMPT_RE =
/image generation check|capability flip image check|\/tool\s+image_generate/i;
const QA_REASONING_ONLY_RETRY_NEEDLE =
"recorded reasoning but did not produce a user-visible answer";
const QA_EMPTY_RESPONSE_RETRY_NEEDLE =

View File

@@ -125,7 +125,7 @@ export function buildMatrixQaImageUnderstandingPrompt(sutUserId: string) {
}
export function buildMatrixQaImageGenerationPrompt(sutUserId: string) {
return `${sutUserId} Image generation check: generate a QA lighthouse image and summarize it in one short sentence.`;
return `${sutUserId} /tool image_generate action=generate prompt="QA lighthouse image for Matrix delivery testing" size=1024x1024 count=1`;
}
export function hasMatrixQaExpectedColorReply(body: string | undefined) {

View File

@@ -1,4 +1,5 @@
import { randomUUID } from "node:crypto";
import { setTimeout as sleep } from "node:timers/promises";
import type { MatrixQaObservedEvent } from "../../substrate/events.js";
import { MATRIX_QA_DRIVER_DM_ROOM_KEY, resolveMatrixQaScenarioRoomId } from "./scenario-catalog.js";
import {
@@ -108,6 +109,26 @@ function assertApprovalMetadata(params: {
}
}
function isExpectedApprovalEvent(
event: MatrixQaObservedEvent,
params: {
context: MatrixQaScenarioContext;
expectedApprovalId: string;
expectedKind: MatrixQaApprovalKind;
roomId: string;
threadRootEventId?: string;
},
) {
return (
event.roomId === params.roomId &&
event.sender === params.context.sutUserId &&
event.type === "m.room.message" &&
event.approval?.kind === params.expectedKind &&
event.approval.id === params.expectedApprovalId &&
(!params.threadRootEventId || event.relatesTo?.eventId === params.threadRootEventId)
);
}
async function waitForApprovalEvent(params: {
context: MatrixQaScenarioContext;
expectedApprovalId: string;
@@ -116,19 +137,26 @@ async function waitForApprovalEvent(params: {
since?: string;
threadRootEventId?: string;
}) {
const observedMatch = params.context.observedEvents.find((event) =>
isExpectedApprovalEvent(event, params),
);
if (observedMatch) {
assertApprovalMetadata({
event: observedMatch,
expectedKind: params.expectedKind,
});
return {
event: observedMatch,
since: params.since,
};
}
const client = createMatrixQaScenarioClient({
accessToken: params.context.driverAccessToken,
baseUrl: params.context.baseUrl,
});
const matched = await client.waitForRoomEvent({
observedEvents: params.context.observedEvents,
predicate: (event) =>
event.roomId === params.roomId &&
event.sender === params.context.sutUserId &&
event.type === "m.room.message" &&
event.approval?.kind === params.expectedKind &&
event.approval.id === params.expectedApprovalId &&
(!params.threadRootEventId || event.relatesTo?.eventId === params.threadRootEventId),
predicate: (event) => isExpectedApprovalEvent(event, params),
roomId: params.roomId,
since: params.since,
timeoutMs: params.context.timeoutMs,
@@ -140,6 +168,79 @@ async function waitForApprovalEvent(params: {
return matched;
}
async function waitForObservedApprovalEvent(params: {
context: MatrixQaScenarioContext;
expectedApprovalId: string;
expectedKind: MatrixQaApprovalKind;
roomIds: string[];
timeoutMs: number;
}) {
const client = createMatrixQaDriverScenarioClient(params.context);
const roomIds = Array.from(
new Set(params.roomIds.map((roomId) => roomId.trim()).filter(Boolean)),
);
const primaryRoomId = roomIds[0];
if (!primaryRoomId) {
throw new Error("Matrix approval wait requires at least one candidate room");
}
const startedAt = Date.now();
while (Date.now() - startedAt < params.timeoutMs) {
const observedMatch = params.context.observedEvents.find((event) =>
roomIds.some((roomId) =>
isExpectedApprovalEvent(event, {
...params,
roomId,
}),
),
);
if (observedMatch) {
assertApprovalMetadata({
event: observedMatch,
expectedKind: params.expectedKind,
});
return {
event: observedMatch,
since: undefined,
};
}
const remainingMs = params.timeoutMs - (Date.now() - startedAt);
if (remainingMs <= 0) {
break;
}
await client.waitForOptionalRoomEvent({
observedEvents: params.context.observedEvents,
predicate: (event) =>
roomIds.some((roomId) =>
isExpectedApprovalEvent(event, {
...params,
roomId,
}),
),
roomId: primaryRoomId,
timeoutMs: Math.min(1_000, remainingMs),
});
await sleep(Math.min(100, Math.max(25, params.timeoutMs - (Date.now() - startedAt))));
}
throw new Error(
`timed out waiting for observed Matrix approval ${params.expectedApprovalId} in ${roomIds.join(", ")}`,
);
}
function listDriverDmApprovalCandidateRoomIds(context: MatrixQaScenarioContext) {
const preferredRoomId = resolveMatrixQaScenarioRoomId(context, MATRIX_QA_DRIVER_DM_ROOM_KEY);
return [
preferredRoomId,
...context.topology.rooms
.filter(
(room) =>
room.kind === "dm" &&
room.memberRoles.includes("driver") &&
room.memberRoles.includes("sut"),
)
.map((room) => room.roomId),
];
}
async function reactToApproval(params: {
context: MatrixQaScenarioContext;
decision: MatrixQaApprovalDecision;
@@ -224,14 +325,6 @@ function assertApprovalDecisionResult(params: {
}
}
function assertApprovalResolveResult(result: unknown) {
const resolved =
typeof result === "object" && result !== null ? (result as { ok?: unknown }) : null;
if (resolved?.ok !== true) {
throw new Error(`approval resolve result was ${formatApprovalResultValue(result)}`);
}
}
function formatApprovalResultValue(value: unknown) {
if (typeof value === "string" || typeof value === "number" || typeof value === "boolean") {
return String(value);
@@ -325,8 +418,8 @@ async function resolveApprovalDecision(params: {
method,
{ decision: params.decision, id: params.approvalId },
{
expectFinal: true,
timeoutMs: MATRIX_QA_APPROVAL_DECISION_TIMEOUT_MS + 5_000,
expectFinal: false,
timeoutMs: 5_000,
},
);
}
@@ -563,7 +656,7 @@ export async function runApprovalPluginMetadataSingleEventScenario(
export async function runApprovalChannelTargetBothScenario(context: MatrixQaScenarioContext) {
const { client, startSince } = await primeMatrixQaDriverScenarioClient(context);
const dmRoomId = resolveMatrixQaScenarioRoomId(context, MATRIX_QA_DRIVER_DM_ROOM_KEY);
const dmRoomIds = listDriverDmApprovalCandidateRoomIds(context);
const token = buildMatrixQaToken("MATRIX_QA_APPROVAL_BOTH");
const approvalId = `qa-${token.toLowerCase()}-${randomUUID().slice(0, 8)}`;
const accepted = await requestExecApproval({
@@ -579,23 +672,22 @@ export async function runApprovalChannelTargetBothScenario(context: MatrixQaScen
roomId: context.roomId,
since: startSince,
});
const dmApproval = await waitForApprovalEvent({
const dmApproval = await waitForObservedApprovalEvent({
context,
expectedApprovalId: approvalId,
expectedKind: "exec",
roomId: dmRoomId,
since: startSince,
roomIds: dmRoomIds,
timeoutMs: context.timeoutMs,
});
if (channelApproval.event.approval?.id !== dmApproval.event.approval?.id) {
throw new Error("target=both delivered different approval ids to channel and DM");
}
const result = await resolveApprovalDecision({
await resolveApprovalDecision({
approvalId,
context,
decision: "allow-once",
kind: "exec",
});
assertApprovalResolveResult(result);
const lateDuplicate = await client.waitForOptionalRoomEvent({
observedEvents: context.observedEvents,
predicate: (event) =>
@@ -622,7 +714,7 @@ export async function runApprovalChannelTargetBothScenario(context: MatrixQaScen
`channel approval event: ${channelApproval.event.eventId}`,
`dm approval event: ${dmApproval.event.eventId}`,
`approval id: ${approvalId}`,
`decision: allow-once via gateway resolve`,
`cleanup decision: allow-once`,
].join("\n"),
} satisfies MatrixQaScenarioExecution;
}

View File

@@ -210,9 +210,15 @@ async function assertMatrixQaPeerDeviceTrusted(params: {
client: MatrixQaE2eeScenarioClient;
deviceId: string;
label: string;
timeoutMs: number;
userId: string;
}) {
const status = await params.client.getDeviceVerificationStatus(params.userId, params.deviceId);
const startedAt = Date.now();
let status = await params.client.getDeviceVerificationStatus(params.userId, params.deviceId);
while (!status.verified && Date.now() - startedAt < params.timeoutMs) {
await sleep(Math.min(250, Math.max(25, params.timeoutMs - (Date.now() - startedAt))));
status = await params.client.getDeviceVerificationStatus(params.userId, params.deviceId);
}
if (!status.verified) {
throw new Error(
`${params.label} did not trust ${params.userId}/${params.deviceId} after verification`,
@@ -2969,12 +2975,14 @@ export async function runMatrixQaE2eeDeviceSasVerificationScenario(
client: driver,
deviceId: observerDeviceId,
label: "driver",
timeoutMs: context.timeoutMs,
userId: context.observerUserId,
});
const observerTrust = await assertMatrixQaPeerDeviceTrusted({
client: observer,
deviceId: driverDeviceId,
label: "observer",
timeoutMs: context.timeoutMs,
userId: context.driverUserId,
});
return {
@@ -3072,14 +3080,20 @@ export async function runMatrixQaE2eeQrVerificationScenario(
sameMatrixQaVerificationTransaction(summary, completedDriver) && summary.completed,
timeoutMs: context.timeoutMs,
});
const driverTrust = await driver.getDeviceVerificationStatus(
context.observerUserId,
observerDeviceId,
);
const observerTrust = await observer.getDeviceVerificationStatus(
context.driverUserId,
driverDeviceId,
);
const driverTrust = await assertMatrixQaPeerDeviceTrusted({
client: driver,
deviceId: observerDeviceId,
label: "driver",
timeoutMs: context.timeoutMs,
userId: context.observerUserId,
});
const observerTrust = await assertMatrixQaPeerDeviceTrusted({
client: observer,
deviceId: driverDeviceId,
label: "observer",
timeoutMs: context.timeoutMs,
userId: context.driverUserId,
});
return {
artifacts: {
completedVerificationIds: [completedDriver.id, completedObserver.id],

View File

@@ -317,24 +317,42 @@ export async function runGeneratedImageDeliveryScenario(context: MatrixQaScenari
const roomId = resolveMatrixQaScenarioRoomId(context, MATRIX_QA_MEDIA_ROOM_KEY);
const { client, startSince } = await primeMatrixQaDriverMediaClient(context);
const triggerBody = buildMatrixQaImageGenerationPrompt(context.sutUserId);
const driverEventId = await client.sendTextMessage({
body: triggerBody,
mentionUserIds: [context.sutUserId],
roomId,
});
const matched = await client.waitForRoomEvent({
const driverEventIds: string[] = [];
const isGeneratedImageEvent = (event: MatrixQaObservedEvent) =>
event.roomId === roomId &&
event.sender === context.sutUserId &&
event.type === "m.room.message" &&
event.relatesTo === undefined &&
event.msgtype === "m.image" &&
event.attachment?.kind === "image";
let matched = await client.waitForOptionalRoomEvent({
observedEvents: context.observedEvents,
predicate: (event) =>
event.roomId === roomId &&
event.sender === context.sutUserId &&
event.type === "m.room.message" &&
event.relatesTo === undefined &&
event.msgtype === "m.image" &&
event.attachment?.kind === "image",
predicate: isGeneratedImageEvent,
roomId,
since: startSince,
timeoutMs: context.timeoutMs,
timeoutMs: 0,
});
for (let attempt = 1; !matched.matched && attempt <= 2; attempt += 1) {
const driverEventId = await client.sendTextMessage({
body: triggerBody,
mentionUserIds: [context.sutUserId],
roomId,
});
driverEventIds.push(driverEventId);
matched = await client.waitForOptionalRoomEvent({
observedEvents: context.observedEvents,
predicate: isGeneratedImageEvent,
roomId,
since: matched.since ?? startSince,
timeoutMs: context.timeoutMs,
});
}
if (!matched.matched) {
throw new Error(
`timed out after ${context.timeoutMs}ms waiting for Matrix generated image after ${driverEventIds.length} attempt(s)`,
);
}
const matchedEvent = matched.event;
advanceMatrixQaActorCursor({
actorId: "driver",
syncState: context.syncState,
@@ -342,25 +360,26 @@ export async function runGeneratedImageDeliveryScenario(context: MatrixQaScenari
startSince,
});
const attachment = requireMatrixQaImageAttachment(
matched.event,
matchedEvent,
"Matrix generated image delivery scenario",
);
return {
artifacts: {
attachmentBodyPreview: matched.event.body?.slice(0, 200),
attachmentEventId: matched.event.eventId,
attachmentBodyPreview: matchedEvent.body?.slice(0, 200),
attachmentEventId: matchedEvent.eventId,
attachmentFilename: attachment.filename,
attachmentKind: attachment.kind,
attachmentMsgtype: matched.event.msgtype,
driverEventId,
attachmentMsgtype: matchedEvent.msgtype,
driverEventId: driverEventIds[0],
driverEventIds,
roomId,
triggerBody,
},
details: [
`room id: ${roomId}`,
`driver event: ${driverEventId}`,
`driver events: ${driverEventIds.join(", ")}`,
...buildMatrixQaAttachmentDetailLines({
attachmentEvent: matched.event,
attachmentEvent: matchedEvent,
label: "generated image",
}),
].join("\n"),

View File

@@ -693,7 +693,7 @@ function assertMatrixQaToolProgressMentionsInert(event: MatrixQaObservedEvent) {
function hasMatrixQaToolProgressPreviewLine(body: string | undefined) {
return Boolean(
body?.split(/\r?\n/).some((line) => /^\s*[-*]\s+`?[^`\s][^`]*`?\s*$/u.test(line)),
body?.split(/\r?\n/).some((line) => /^\s*(?:[-*]\s+`?[^`\s][^`]*`?|`[^`]+`)\s*$/u.test(line)),
);
}
@@ -967,6 +967,7 @@ export async function runToolProgressErrorScenario(context: MatrixQaScenarioCont
expectedPreviewKind: "notice",
finalText: buildMatrixQaToken("MATRIX_QA_TOOL_PROGRESS_ERROR"),
label: "tool progress error",
allowGenericProgressLine: true,
progressPattern: /\bread\s*:?\s*from\s+\S*missing-matrix-tool-progress-target\.txt\b/i,
triggerBodyBuilder: buildMatrixToolProgressErrorPrompt,
});

View File

@@ -49,6 +49,7 @@ export type MatrixQaScenarioArtifacts = {
dedupeCommitObserved?: boolean;
duplicateWindowMs?: number;
driverEventId?: string;
driverEventIds?: string[];
driverUserId?: string;
editEventId?: string;
editedToken?: string;

View File

@@ -331,6 +331,134 @@ describe("matrix live qa scenarios", () => {
}
});
it("waits for Matrix SAS device trust after verification completes", async () => {
const initiated = {
id: "driver-request",
transactionId: "tx-sas",
};
const incoming = {
canAccept: true,
id: "observer-request",
initiatedByMe: false,
pending: true,
transactionId: "tx-sas",
};
const ready = {
id: "driver-request",
phaseName: "ready",
transactionId: "tx-sas",
};
const sas = {
emoji: [["🐶", "Dog"]],
};
const initiatorSas = {
hasSas: true,
id: "driver-request",
sas,
transactionId: "tx-sas",
};
const recipientSas = {
hasSas: true,
id: "observer-request",
sas,
transactionId: "tx-sas",
};
const completedInitiator = {
completed: true,
id: "driver-request",
transactionId: "tx-sas",
};
const completedRecipient = {
completed: true,
id: "observer-request",
transactionId: "tx-sas",
};
const driverGetDeviceVerificationStatus = vi
.fn()
.mockResolvedValueOnce({ verified: false })
.mockResolvedValueOnce({ verified: true });
const observerGetDeviceVerificationStatus = vi.fn().mockResolvedValue({ verified: true });
const driverStop = vi.fn().mockResolvedValue(undefined);
const observerStop = vi.fn().mockResolvedValue(undefined);
createMatrixQaE2eeScenarioClient
.mockResolvedValueOnce({
bootstrapOwnDeviceVerification: vi.fn().mockResolvedValue({
crossSigning: { published: true },
success: true,
verification: {
backupVersion: "1",
crossSigningVerified: true,
recoveryKeyStored: true,
signedByOwner: true,
verified: true,
},
}),
confirmVerificationSas: vi.fn().mockResolvedValue(completedInitiator),
getDeviceVerificationStatus: driverGetDeviceVerificationStatus,
getRecoveryKey: vi.fn().mockResolvedValue({ encodedPrivateKey: "driver-key" }),
listVerifications: vi
.fn()
.mockResolvedValueOnce([ready])
.mockResolvedValueOnce([initiatorSas])
.mockResolvedValueOnce([completedInitiator]),
requestVerification: vi.fn().mockResolvedValue(initiated),
resetRoomKeyBackup: vi.fn().mockResolvedValue({ success: true }),
startVerification: vi.fn().mockResolvedValue(initiatorSas),
stop: driverStop,
})
.mockResolvedValueOnce({
acceptVerification: vi.fn().mockResolvedValue(ready),
bootstrapOwnDeviceVerification: vi.fn().mockResolvedValue({
crossSigning: { published: true },
success: true,
verification: {
backupVersion: "1",
crossSigningVerified: true,
recoveryKeyStored: true,
signedByOwner: true,
verified: true,
},
}),
confirmVerificationSas: vi.fn().mockResolvedValue(completedRecipient),
getDeviceVerificationStatus: observerGetDeviceVerificationStatus,
getRecoveryKey: vi.fn().mockResolvedValue({ encodedPrivateKey: "observer-key" }),
listVerifications: vi
.fn()
.mockResolvedValueOnce([incoming])
.mockResolvedValueOnce([recipientSas])
.mockResolvedValueOnce([completedRecipient]),
resetRoomKeyBackup: vi.fn().mockResolvedValue({ success: true }),
stop: observerStop,
});
const scenario = MATRIX_QA_SCENARIOS.find(
(entry) => entry.id === "matrix-e2ee-device-sas-verification",
);
expect(scenario).toBeDefined();
await expect(
runMatrixQaScenario(scenario!, {
...matrixQaScenarioContext(),
driverDeviceId: "DRIVERDEVICE",
driverPassword: "driver-password",
observerDeviceId: "OBSERVERDEVICE",
observerPassword: "observer-password",
outputDir: "/tmp/matrix-qa",
timeoutMs: 80,
}),
).resolves.toMatchObject({
artifacts: {
driverTrustsObserverDevice: true,
observerTrustsDriverDevice: true,
},
});
expect(driverGetDeviceVerificationStatus).toHaveBeenCalledTimes(2);
expect(driverStop).toHaveBeenCalledTimes(1);
expect(observerStop).toHaveBeenCalledTimes(1);
});
it("keeps the Matrix CLI default profile on the full catalog", () => {
const allIds = scenarioTesting.findMatrixQaScenarios().map((scenario) => scenario.id);
@@ -469,6 +597,112 @@ describe("matrix live qa scenarios", () => {
expect(gatewayCall.mock.calls.at(-1)?.[0]).toBe("exec.approval.waitDecision");
});
it("reuses observed Matrix approval events across channel and DM target=both waits", async () => {
const context = matrixQaScenarioContext();
context.topology.rooms.push(
{
key: scenarioTesting.MATRIX_QA_DRIVER_DM_ROOM_KEY,
kind: "dm",
memberRoles: ["driver", "sut"],
memberUserIds: ["@driver:matrix-qa.test", "@sut:matrix-qa.test"],
name: "Driver DM",
requireMention: false,
roomId: "!driver-dm:matrix-qa.test",
},
{
key: scenarioTesting.MATRIX_QA_DRIVER_DM_SHARED_ROOM_KEY,
kind: "dm",
memberRoles: ["driver", "sut"],
memberUserIds: ["@driver:matrix-qa.test", "@sut:matrix-qa.test"],
name: "Driver shared DM",
requireMention: false,
roomId: "!driver-shared-dm:matrix-qa.test",
},
);
let approvalId = "";
const gatewayCall = vi.fn().mockImplementation(async (method: string, ...args: unknown[]) => {
if (method === "exec.approval.request") {
const payload = args.find(
(arg): arg is { id?: string } => typeof arg === "object" && arg !== null && "id" in arg,
);
approvalId = payload?.id ?? "";
return { id: approvalId, status: "accepted" };
}
if (method === "exec.approval.resolve") {
return { ok: true };
}
throw new Error(`unexpected gateway method ${method}`);
});
context.gatewayCall = gatewayCall;
const buildApprovalEvent = (eventId: string, roomId: string) =>
matrixQaMessageEvent({
approval: {
allowedDecisions: ["allow-once", "deny"],
hasCommandText: true,
id: approvalId,
kind: "exec",
state: "pending",
type: "approval.request",
version: 1,
},
body: "approval requested",
eventId,
kind: "message",
roomId,
});
const waitForRoomEvent = vi.fn().mockImplementation(async () => {
const channelApproval = buildApprovalEvent("$approval-both-channel", "!main:matrix-qa.test");
const dmApproval = buildApprovalEvent(
"$approval-both-dm",
"!driver-shared-dm:matrix-qa.test",
);
context.observedEvents.push(channelApproval, dmApproval, {
eventId: "$approval-both-option",
kind: "reaction",
reaction: {
eventId: "$approval-both-channel",
key: "✅",
},
roomId: "!main:matrix-qa.test",
sender: "@sut:matrix-qa.test",
type: "m.reaction",
});
return { event: channelApproval, since: "driver-sync-approval" };
});
const waitForOptionalRoomEvent = vi.fn().mockResolvedValue({
matched: false,
since: "driver-sync-late-window",
});
createMatrixQaClient
.mockReturnValueOnce({
primeRoom: vi.fn().mockResolvedValue("driver-sync-start"),
waitForOptionalRoomEvent,
})
.mockReturnValueOnce({
waitForRoomEvent,
});
const scenario = MATRIX_QA_SCENARIOS.find(
(entry) => entry.id === "matrix-approval-channel-target-both",
);
expect(scenario).toBeDefined();
await expect(runMatrixQaScenario(scenario!, context)).resolves.toMatchObject({
artifacts: {
approvals: [
{ eventId: "$approval-both-channel", roomId: "!main:matrix-qa.test" },
{ eventId: "$approval-both-dm", roomId: "!driver-shared-dm:matrix-qa.test" },
],
},
});
expect(waitForRoomEvent).toHaveBeenCalledTimes(1);
expect(gatewayCall.mock.calls.at(-1)?.[0]).toBe("exec.approval.resolve");
expect(gatewayCall.mock.calls.at(-1)?.[2]).toMatchObject({ expectFinal: false });
expect(createMatrixQaClient).toHaveBeenCalledTimes(3);
});
it("lets explicit Matrix scenario ids override the selected profile", () => {
expect(
scenarioTesting
@@ -2922,6 +3156,61 @@ describe("matrix live qa scenarios", () => {
});
});
it("accepts shortened Matrix tool progress error preview lines", async () => {
const previewEventId = "$tool-progress-error-short-preview";
const previewEvent = matrixQaMessageEvent({
kind: "notice",
eventId: previewEventId,
body: "Nautiling...\n`📖 Read: from…ng-matrix-tool-progress-target.txt`",
});
const { waitForRoomEvent } = mockMatrixQaRoomClient({
driverEventId: "$tool-progress-error-short-trigger",
events: [
{
event: previewEvent,
since: "driver-sync-preview",
},
{
event: ({ sendTextMessage }) =>
matrixQaMessageEvent({
kind: "notice",
eventId: "$tool-progress-error-short-final",
body: readMatrixQaReplyDirective(
sendTextMessage.mock.calls[0]?.[0]?.body,
"MATRIX_QA_TOOL_PROGRESS_ERROR_SHORT_FIXED",
),
relatesTo: {
relType: "m.replace",
eventId: previewEventId,
},
}),
since: "driver-sync-next",
},
],
});
const scenario = MATRIX_QA_SCENARIOS.find(
(entry) => entry.id === "matrix-room-tool-progress-error",
);
expect(scenario).toBeDefined();
await expect(runMatrixQaScenario(scenario!, matrixQaScenarioContext())).resolves.toMatchObject({
artifacts: {
previewBodyPreview: "Nautiling...\n`📖 Read: from…ng-matrix-tool-progress-target.txt`",
previewEventId,
reply: {
eventId: "$tool-progress-error-short-final",
relatesTo: {
eventId: previewEventId,
relType: "m.replace",
},
},
},
});
expect(waitForRoomEvent).toHaveBeenCalledTimes(2);
});
it("keeps Matrix-looking tool progress mentions inert in partial previews", async () => {
const previewEventId = "$tool-progress-mention-preview";
mockMatrixQaRoomClient({
@@ -3199,27 +3488,34 @@ describe("matrix live qa scenarios", () => {
it("waits for a real Matrix image attachment after image generation", async () => {
const primeRoom = vi.fn().mockResolvedValue("driver-sync-start");
const sendTextMessage = vi.fn().mockResolvedValue("$image-generate-trigger");
const waitForRoomEvent = vi.fn().mockResolvedValue({
event: {
kind: "message",
roomId: "!media:matrix-qa.test",
eventId: "$sut-image",
sender: "@sut:matrix-qa.test",
type: "m.room.message",
body: "Protocol note: generated the QA lighthouse image successfully.",
msgtype: "m.image",
attachment: {
kind: "image",
filename: "qa-lighthouse.png",
const waitForOptionalRoomEvent = vi
.fn()
.mockResolvedValueOnce({
matched: false,
since: "driver-sync-start",
})
.mockResolvedValueOnce({
event: {
kind: "message",
roomId: "!media:matrix-qa.test",
eventId: "$sut-image",
sender: "@sut:matrix-qa.test",
type: "m.room.message",
body: "Protocol note: generated the QA lighthouse image successfully.",
msgtype: "m.image",
attachment: {
kind: "image",
filename: "qa-lighthouse.png",
},
},
},
since: "driver-sync-next",
});
matched: true,
since: "driver-sync-next",
});
createMatrixQaClient.mockReturnValue({
primeRoom,
sendTextMessage,
waitForRoomEvent,
waitForOptionalRoomEvent,
});
const scenario = MATRIX_QA_SCENARIOS.find(
@@ -3273,7 +3569,7 @@ describe("matrix live qa scenarios", () => {
});
expect(sendTextMessage).toHaveBeenCalledWith({
body: expect.stringContaining("Image generation check: generate a QA lighthouse image"),
body: expect.stringContaining("/tool image_generate action=generate"),
mentionUserIds: ["@sut:matrix-qa.test"],
roomId: "!media:matrix-qa.test",
});