mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 13:10:43 +00:00
fix: stabilize Matrix tool progress QA (#78179)
* fix: stabilize matrix tool progress QA * fix: handle backtick matrix progress previews * fix: reuse observed matrix approvals * fix: retry matrix generated image QA * fix: wait for matrix sas trust propagation * fix: resolve matrix target both approvals by reaction * fix: avoid matrix target both approval echo wait * fix: reuse observed matrix target both dm approval * fix: retry matrix approval delivery * fix: accept active matrix approval dm * test: align matrix approval retry receipt * test: include matrix approval view in retry fixture
This commit is contained in:
@@ -335,6 +335,94 @@ describe("matrixApprovalNativeRuntime", () => {
|
||||
expect(reactMessage).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("retries transient Matrix approval send failures", async () => {
|
||||
const sendSingleTextMessage = vi
|
||||
.fn()
|
||||
.mockRejectedValueOnce(new Error("transient Matrix send failure"))
|
||||
.mockResolvedValue({
|
||||
messageId: "$approval",
|
||||
primaryMessageId: "$approval",
|
||||
receipt: buildMatrixReceipt(["$approval"]),
|
||||
roomId: "!room:example.org",
|
||||
});
|
||||
const reactMessage = vi.fn().mockResolvedValue(undefined);
|
||||
const view = buildExecApprovalView();
|
||||
const pendingPayload = await buildPendingPayload(view);
|
||||
|
||||
const entry = await matrixApprovalNativeRuntime.transport.deliverPending({
|
||||
cfg: {} as never,
|
||||
accountId: "default",
|
||||
context: {
|
||||
client: {} as never,
|
||||
deps: {
|
||||
sendSingleTextMessage,
|
||||
reactMessage,
|
||||
},
|
||||
},
|
||||
request: {} as never,
|
||||
approvalKind: "exec",
|
||||
plannedTarget: buildMatrixApprovalRoomTarget("!room:example.org"),
|
||||
preparedTarget: {
|
||||
to: "room:!room:example.org",
|
||||
roomId: "!room:example.org",
|
||||
},
|
||||
view,
|
||||
pendingPayload,
|
||||
});
|
||||
|
||||
expect(sendSingleTextMessage).toHaveBeenCalledTimes(2);
|
||||
expect(entry).toMatchObject({
|
||||
roomId: "!room:example.org",
|
||||
platformMessageIds: ["$approval"],
|
||||
});
|
||||
});
|
||||
|
||||
it("retries transient Matrix direct-room repair failures before preparing approval DMs", async () => {
|
||||
const repairDirectRooms = vi
|
||||
.fn()
|
||||
.mockRejectedValueOnce(new Error("direct account data not ready"))
|
||||
.mockResolvedValue({
|
||||
activeRoomId: "!dm:example.org",
|
||||
});
|
||||
|
||||
const prepared = await matrixApprovalNativeRuntime.transport.prepareTarget({
|
||||
cfg: {
|
||||
channels: {
|
||||
matrix: {
|
||||
encryption: false,
|
||||
},
|
||||
},
|
||||
} as never,
|
||||
accountId: "default",
|
||||
context: {
|
||||
client: {} as never,
|
||||
deps: {
|
||||
repairDirectRooms,
|
||||
},
|
||||
},
|
||||
request: {} as never,
|
||||
approvalKind: "exec",
|
||||
view: buildExecApprovalView(),
|
||||
pendingPayload: {} as never,
|
||||
plannedTarget: {
|
||||
surface: "approver-dm",
|
||||
target: {
|
||||
to: "user:@owner:example.org",
|
||||
},
|
||||
reason: "preferred",
|
||||
},
|
||||
});
|
||||
|
||||
expect(repairDirectRooms).toHaveBeenCalledTimes(2);
|
||||
expect(prepared).toMatchObject({
|
||||
target: {
|
||||
to: "room:!dm:example.org",
|
||||
roomId: "!dm:example.org",
|
||||
threadId: undefined,
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it("falls back to chunked Matrix delivery when approval content exceeds one event", async () => {
|
||||
const sendSingleTextMessage = vi
|
||||
.fn()
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import { setTimeout as sleep } from "node:timers/promises";
|
||||
import type {
|
||||
ChannelApprovalCapabilityHandlerContext,
|
||||
PendingApprovalView,
|
||||
@@ -123,6 +124,9 @@ type MatrixPrepareTargetParams = {
|
||||
rawTarget: MatrixRawApprovalTarget;
|
||||
};
|
||||
|
||||
const MATRIX_APPROVAL_DELIVERY_ATTEMPTS = 3;
|
||||
const MATRIX_APPROVAL_DELIVERY_RETRY_DELAY_MS = 250;
|
||||
|
||||
export type MatrixApprovalHandlerDeps = {
|
||||
nowMs?: () => number;
|
||||
sendMessage?: typeof sendMessageMatrix;
|
||||
@@ -176,6 +180,25 @@ function isSingleMatrixMessageLimitError(error: unknown): boolean {
|
||||
);
|
||||
}
|
||||
|
||||
async function retryMatrixApprovalDelivery<T>(
|
||||
operation: () => Promise<T>,
|
||||
params: { shouldRetry?: (error: unknown) => boolean } = {},
|
||||
): Promise<T> {
|
||||
let lastError: unknown;
|
||||
for (let attempt = 1; attempt <= MATRIX_APPROVAL_DELIVERY_ATTEMPTS; attempt += 1) {
|
||||
try {
|
||||
return await operation();
|
||||
} catch (error) {
|
||||
lastError = error;
|
||||
if (attempt === MATRIX_APPROVAL_DELIVERY_ATTEMPTS || params.shouldRetry?.(error) === false) {
|
||||
break;
|
||||
}
|
||||
await sleep(MATRIX_APPROVAL_DELIVERY_RETRY_DELAY_MS * attempt);
|
||||
}
|
||||
}
|
||||
throw lastError;
|
||||
}
|
||||
|
||||
async function prepareTarget(
|
||||
params: MatrixPrepareTargetParams,
|
||||
): Promise<PreparedMatrixTarget | null> {
|
||||
@@ -194,11 +217,14 @@ async function prepareTarget(
|
||||
accountId: resolved.accountId,
|
||||
});
|
||||
const repairDirectRooms = resolved.context.deps?.repairDirectRooms ?? repairMatrixDirectRooms;
|
||||
const repaired = await repairDirectRooms({
|
||||
client: resolved.context.client,
|
||||
remoteUserId: target.id,
|
||||
encrypted: account.config.encryption === true,
|
||||
});
|
||||
const repaired = await retryMatrixApprovalDelivery(
|
||||
async () =>
|
||||
await repairDirectRooms({
|
||||
client: resolved.context.client,
|
||||
remoteUserId: target.id,
|
||||
encrypted: account.config.encryption === true,
|
||||
}),
|
||||
);
|
||||
if (!repaired.activeRoomId) {
|
||||
return null;
|
||||
}
|
||||
@@ -424,25 +450,32 @@ export const matrixApprovalNativeRuntime = createChannelApprovalNativeRuntimeAda
|
||||
const reactMessage = resolved.context.deps?.reactMessage ?? reactMatrixMessage;
|
||||
let result;
|
||||
try {
|
||||
result = await sendSingleTextMessage(preparedTarget.to, pendingPayload.text, {
|
||||
cfg: cfg as CoreConfig,
|
||||
accountId: resolved.accountId,
|
||||
client: resolved.context.client,
|
||||
threadId: preparedTarget.threadId,
|
||||
extraContent: pendingPayload.extraContent,
|
||||
});
|
||||
result = await retryMatrixApprovalDelivery(
|
||||
async () =>
|
||||
await sendSingleTextMessage(preparedTarget.to, pendingPayload.text, {
|
||||
cfg: cfg as CoreConfig,
|
||||
accountId: resolved.accountId,
|
||||
client: resolved.context.client,
|
||||
threadId: preparedTarget.threadId,
|
||||
extraContent: pendingPayload.extraContent,
|
||||
}),
|
||||
{ shouldRetry: (error) => !isSingleMatrixMessageLimitError(error) },
|
||||
);
|
||||
} catch (error) {
|
||||
if (!isSingleMatrixMessageLimitError(error)) {
|
||||
throw error;
|
||||
}
|
||||
const sendMessage = resolved.context.deps?.sendMessage ?? sendMessageMatrix;
|
||||
result = await sendMessage(preparedTarget.to, pendingPayload.text, {
|
||||
cfg: cfg as CoreConfig,
|
||||
accountId: resolved.accountId,
|
||||
client: resolved.context.client,
|
||||
threadId: preparedTarget.threadId,
|
||||
extraContent: pendingPayload.extraContent,
|
||||
});
|
||||
result = await retryMatrixApprovalDelivery(
|
||||
async () =>
|
||||
await sendMessage(preparedTarget.to, pendingPayload.text, {
|
||||
cfg: cfg as CoreConfig,
|
||||
accountId: resolved.accountId,
|
||||
client: resolved.context.client,
|
||||
threadId: preparedTarget.threadId,
|
||||
extraContent: pendingPayload.extraContent,
|
||||
}),
|
||||
);
|
||||
}
|
||||
const receiptMessageIds = listMessageReceiptPlatformIds(result.receipt);
|
||||
const platformMessageIds = receiptMessageIds.length
|
||||
|
||||
@@ -1912,7 +1912,7 @@ describe("qa mock openai server", () => {
|
||||
});
|
||||
|
||||
const channelPrompt =
|
||||
"@qa-sut.example.test Image generation check: generate a QA lighthouse image and summarize it in one short sentence.";
|
||||
'@qa-sut.example.test /tool image_generate action=generate prompt="QA lighthouse image for Matrix delivery testing" size=1024x1024 count=1';
|
||||
const genericPrompt =
|
||||
"Continue with the QA scenario plan and report worked, failed, and blocked items.";
|
||||
|
||||
|
||||
@@ -158,7 +158,8 @@ const QA_TELEGRAM_LONG_FINAL_PROMPT_RE = /telegram long final qa check/i;
|
||||
const QA_SUBAGENT_DIRECT_FALLBACK_PROMPT_RE = /subagent direct fallback qa check/i;
|
||||
const QA_SUBAGENT_DIRECT_FALLBACK_WORKER_RE = /subagent direct fallback worker/i;
|
||||
const QA_SUBAGENT_DIRECT_FALLBACK_MARKER = "QA-SUBAGENT-DIRECT-FALLBACK-OK";
|
||||
const QA_IMAGE_GENERATION_PROMPT_RE = /image generation check|capability flip image check/i;
|
||||
const QA_IMAGE_GENERATION_PROMPT_RE =
|
||||
/image generation check|capability flip image check|\/tool\s+image_generate/i;
|
||||
const QA_REASONING_ONLY_RETRY_NEEDLE =
|
||||
"recorded reasoning but did not produce a user-visible answer";
|
||||
const QA_EMPTY_RESPONSE_RETRY_NEEDLE =
|
||||
|
||||
@@ -125,7 +125,7 @@ export function buildMatrixQaImageUnderstandingPrompt(sutUserId: string) {
|
||||
}
|
||||
|
||||
export function buildMatrixQaImageGenerationPrompt(sutUserId: string) {
|
||||
return `${sutUserId} Image generation check: generate a QA lighthouse image and summarize it in one short sentence.`;
|
||||
return `${sutUserId} /tool image_generate action=generate prompt="QA lighthouse image for Matrix delivery testing" size=1024x1024 count=1`;
|
||||
}
|
||||
|
||||
export function hasMatrixQaExpectedColorReply(body: string | undefined) {
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import { randomUUID } from "node:crypto";
|
||||
import { setTimeout as sleep } from "node:timers/promises";
|
||||
import type { MatrixQaObservedEvent } from "../../substrate/events.js";
|
||||
import { MATRIX_QA_DRIVER_DM_ROOM_KEY, resolveMatrixQaScenarioRoomId } from "./scenario-catalog.js";
|
||||
import {
|
||||
@@ -108,6 +109,26 @@ function assertApprovalMetadata(params: {
|
||||
}
|
||||
}
|
||||
|
||||
function isExpectedApprovalEvent(
|
||||
event: MatrixQaObservedEvent,
|
||||
params: {
|
||||
context: MatrixQaScenarioContext;
|
||||
expectedApprovalId: string;
|
||||
expectedKind: MatrixQaApprovalKind;
|
||||
roomId: string;
|
||||
threadRootEventId?: string;
|
||||
},
|
||||
) {
|
||||
return (
|
||||
event.roomId === params.roomId &&
|
||||
event.sender === params.context.sutUserId &&
|
||||
event.type === "m.room.message" &&
|
||||
event.approval?.kind === params.expectedKind &&
|
||||
event.approval.id === params.expectedApprovalId &&
|
||||
(!params.threadRootEventId || event.relatesTo?.eventId === params.threadRootEventId)
|
||||
);
|
||||
}
|
||||
|
||||
async function waitForApprovalEvent(params: {
|
||||
context: MatrixQaScenarioContext;
|
||||
expectedApprovalId: string;
|
||||
@@ -116,19 +137,26 @@ async function waitForApprovalEvent(params: {
|
||||
since?: string;
|
||||
threadRootEventId?: string;
|
||||
}) {
|
||||
const observedMatch = params.context.observedEvents.find((event) =>
|
||||
isExpectedApprovalEvent(event, params),
|
||||
);
|
||||
if (observedMatch) {
|
||||
assertApprovalMetadata({
|
||||
event: observedMatch,
|
||||
expectedKind: params.expectedKind,
|
||||
});
|
||||
return {
|
||||
event: observedMatch,
|
||||
since: params.since,
|
||||
};
|
||||
}
|
||||
const client = createMatrixQaScenarioClient({
|
||||
accessToken: params.context.driverAccessToken,
|
||||
baseUrl: params.context.baseUrl,
|
||||
});
|
||||
const matched = await client.waitForRoomEvent({
|
||||
observedEvents: params.context.observedEvents,
|
||||
predicate: (event) =>
|
||||
event.roomId === params.roomId &&
|
||||
event.sender === params.context.sutUserId &&
|
||||
event.type === "m.room.message" &&
|
||||
event.approval?.kind === params.expectedKind &&
|
||||
event.approval.id === params.expectedApprovalId &&
|
||||
(!params.threadRootEventId || event.relatesTo?.eventId === params.threadRootEventId),
|
||||
predicate: (event) => isExpectedApprovalEvent(event, params),
|
||||
roomId: params.roomId,
|
||||
since: params.since,
|
||||
timeoutMs: params.context.timeoutMs,
|
||||
@@ -140,6 +168,79 @@ async function waitForApprovalEvent(params: {
|
||||
return matched;
|
||||
}
|
||||
|
||||
async function waitForObservedApprovalEvent(params: {
|
||||
context: MatrixQaScenarioContext;
|
||||
expectedApprovalId: string;
|
||||
expectedKind: MatrixQaApprovalKind;
|
||||
roomIds: string[];
|
||||
timeoutMs: number;
|
||||
}) {
|
||||
const client = createMatrixQaDriverScenarioClient(params.context);
|
||||
const roomIds = Array.from(
|
||||
new Set(params.roomIds.map((roomId) => roomId.trim()).filter(Boolean)),
|
||||
);
|
||||
const primaryRoomId = roomIds[0];
|
||||
if (!primaryRoomId) {
|
||||
throw new Error("Matrix approval wait requires at least one candidate room");
|
||||
}
|
||||
const startedAt = Date.now();
|
||||
while (Date.now() - startedAt < params.timeoutMs) {
|
||||
const observedMatch = params.context.observedEvents.find((event) =>
|
||||
roomIds.some((roomId) =>
|
||||
isExpectedApprovalEvent(event, {
|
||||
...params,
|
||||
roomId,
|
||||
}),
|
||||
),
|
||||
);
|
||||
if (observedMatch) {
|
||||
assertApprovalMetadata({
|
||||
event: observedMatch,
|
||||
expectedKind: params.expectedKind,
|
||||
});
|
||||
return {
|
||||
event: observedMatch,
|
||||
since: undefined,
|
||||
};
|
||||
}
|
||||
const remainingMs = params.timeoutMs - (Date.now() - startedAt);
|
||||
if (remainingMs <= 0) {
|
||||
break;
|
||||
}
|
||||
await client.waitForOptionalRoomEvent({
|
||||
observedEvents: params.context.observedEvents,
|
||||
predicate: (event) =>
|
||||
roomIds.some((roomId) =>
|
||||
isExpectedApprovalEvent(event, {
|
||||
...params,
|
||||
roomId,
|
||||
}),
|
||||
),
|
||||
roomId: primaryRoomId,
|
||||
timeoutMs: Math.min(1_000, remainingMs),
|
||||
});
|
||||
await sleep(Math.min(100, Math.max(25, params.timeoutMs - (Date.now() - startedAt))));
|
||||
}
|
||||
throw new Error(
|
||||
`timed out waiting for observed Matrix approval ${params.expectedApprovalId} in ${roomIds.join(", ")}`,
|
||||
);
|
||||
}
|
||||
|
||||
function listDriverDmApprovalCandidateRoomIds(context: MatrixQaScenarioContext) {
|
||||
const preferredRoomId = resolveMatrixQaScenarioRoomId(context, MATRIX_QA_DRIVER_DM_ROOM_KEY);
|
||||
return [
|
||||
preferredRoomId,
|
||||
...context.topology.rooms
|
||||
.filter(
|
||||
(room) =>
|
||||
room.kind === "dm" &&
|
||||
room.memberRoles.includes("driver") &&
|
||||
room.memberRoles.includes("sut"),
|
||||
)
|
||||
.map((room) => room.roomId),
|
||||
];
|
||||
}
|
||||
|
||||
async function reactToApproval(params: {
|
||||
context: MatrixQaScenarioContext;
|
||||
decision: MatrixQaApprovalDecision;
|
||||
@@ -224,14 +325,6 @@ function assertApprovalDecisionResult(params: {
|
||||
}
|
||||
}
|
||||
|
||||
function assertApprovalResolveResult(result: unknown) {
|
||||
const resolved =
|
||||
typeof result === "object" && result !== null ? (result as { ok?: unknown }) : null;
|
||||
if (resolved?.ok !== true) {
|
||||
throw new Error(`approval resolve result was ${formatApprovalResultValue(result)}`);
|
||||
}
|
||||
}
|
||||
|
||||
function formatApprovalResultValue(value: unknown) {
|
||||
if (typeof value === "string" || typeof value === "number" || typeof value === "boolean") {
|
||||
return String(value);
|
||||
@@ -325,8 +418,8 @@ async function resolveApprovalDecision(params: {
|
||||
method,
|
||||
{ decision: params.decision, id: params.approvalId },
|
||||
{
|
||||
expectFinal: true,
|
||||
timeoutMs: MATRIX_QA_APPROVAL_DECISION_TIMEOUT_MS + 5_000,
|
||||
expectFinal: false,
|
||||
timeoutMs: 5_000,
|
||||
},
|
||||
);
|
||||
}
|
||||
@@ -563,7 +656,7 @@ export async function runApprovalPluginMetadataSingleEventScenario(
|
||||
|
||||
export async function runApprovalChannelTargetBothScenario(context: MatrixQaScenarioContext) {
|
||||
const { client, startSince } = await primeMatrixQaDriverScenarioClient(context);
|
||||
const dmRoomId = resolveMatrixQaScenarioRoomId(context, MATRIX_QA_DRIVER_DM_ROOM_KEY);
|
||||
const dmRoomIds = listDriverDmApprovalCandidateRoomIds(context);
|
||||
const token = buildMatrixQaToken("MATRIX_QA_APPROVAL_BOTH");
|
||||
const approvalId = `qa-${token.toLowerCase()}-${randomUUID().slice(0, 8)}`;
|
||||
const accepted = await requestExecApproval({
|
||||
@@ -579,23 +672,22 @@ export async function runApprovalChannelTargetBothScenario(context: MatrixQaScen
|
||||
roomId: context.roomId,
|
||||
since: startSince,
|
||||
});
|
||||
const dmApproval = await waitForApprovalEvent({
|
||||
const dmApproval = await waitForObservedApprovalEvent({
|
||||
context,
|
||||
expectedApprovalId: approvalId,
|
||||
expectedKind: "exec",
|
||||
roomId: dmRoomId,
|
||||
since: startSince,
|
||||
roomIds: dmRoomIds,
|
||||
timeoutMs: context.timeoutMs,
|
||||
});
|
||||
if (channelApproval.event.approval?.id !== dmApproval.event.approval?.id) {
|
||||
throw new Error("target=both delivered different approval ids to channel and DM");
|
||||
}
|
||||
const result = await resolveApprovalDecision({
|
||||
await resolveApprovalDecision({
|
||||
approvalId,
|
||||
context,
|
||||
decision: "allow-once",
|
||||
kind: "exec",
|
||||
});
|
||||
assertApprovalResolveResult(result);
|
||||
const lateDuplicate = await client.waitForOptionalRoomEvent({
|
||||
observedEvents: context.observedEvents,
|
||||
predicate: (event) =>
|
||||
@@ -622,7 +714,7 @@ export async function runApprovalChannelTargetBothScenario(context: MatrixQaScen
|
||||
`channel approval event: ${channelApproval.event.eventId}`,
|
||||
`dm approval event: ${dmApproval.event.eventId}`,
|
||||
`approval id: ${approvalId}`,
|
||||
`decision: allow-once via gateway resolve`,
|
||||
`cleanup decision: allow-once`,
|
||||
].join("\n"),
|
||||
} satisfies MatrixQaScenarioExecution;
|
||||
}
|
||||
|
||||
@@ -210,9 +210,15 @@ async function assertMatrixQaPeerDeviceTrusted(params: {
|
||||
client: MatrixQaE2eeScenarioClient;
|
||||
deviceId: string;
|
||||
label: string;
|
||||
timeoutMs: number;
|
||||
userId: string;
|
||||
}) {
|
||||
const status = await params.client.getDeviceVerificationStatus(params.userId, params.deviceId);
|
||||
const startedAt = Date.now();
|
||||
let status = await params.client.getDeviceVerificationStatus(params.userId, params.deviceId);
|
||||
while (!status.verified && Date.now() - startedAt < params.timeoutMs) {
|
||||
await sleep(Math.min(250, Math.max(25, params.timeoutMs - (Date.now() - startedAt))));
|
||||
status = await params.client.getDeviceVerificationStatus(params.userId, params.deviceId);
|
||||
}
|
||||
if (!status.verified) {
|
||||
throw new Error(
|
||||
`${params.label} did not trust ${params.userId}/${params.deviceId} after verification`,
|
||||
@@ -2969,12 +2975,14 @@ export async function runMatrixQaE2eeDeviceSasVerificationScenario(
|
||||
client: driver,
|
||||
deviceId: observerDeviceId,
|
||||
label: "driver",
|
||||
timeoutMs: context.timeoutMs,
|
||||
userId: context.observerUserId,
|
||||
});
|
||||
const observerTrust = await assertMatrixQaPeerDeviceTrusted({
|
||||
client: observer,
|
||||
deviceId: driverDeviceId,
|
||||
label: "observer",
|
||||
timeoutMs: context.timeoutMs,
|
||||
userId: context.driverUserId,
|
||||
});
|
||||
return {
|
||||
@@ -3072,14 +3080,20 @@ export async function runMatrixQaE2eeQrVerificationScenario(
|
||||
sameMatrixQaVerificationTransaction(summary, completedDriver) && summary.completed,
|
||||
timeoutMs: context.timeoutMs,
|
||||
});
|
||||
const driverTrust = await driver.getDeviceVerificationStatus(
|
||||
context.observerUserId,
|
||||
observerDeviceId,
|
||||
);
|
||||
const observerTrust = await observer.getDeviceVerificationStatus(
|
||||
context.driverUserId,
|
||||
driverDeviceId,
|
||||
);
|
||||
const driverTrust = await assertMatrixQaPeerDeviceTrusted({
|
||||
client: driver,
|
||||
deviceId: observerDeviceId,
|
||||
label: "driver",
|
||||
timeoutMs: context.timeoutMs,
|
||||
userId: context.observerUserId,
|
||||
});
|
||||
const observerTrust = await assertMatrixQaPeerDeviceTrusted({
|
||||
client: observer,
|
||||
deviceId: driverDeviceId,
|
||||
label: "observer",
|
||||
timeoutMs: context.timeoutMs,
|
||||
userId: context.driverUserId,
|
||||
});
|
||||
return {
|
||||
artifacts: {
|
||||
completedVerificationIds: [completedDriver.id, completedObserver.id],
|
||||
|
||||
@@ -317,24 +317,42 @@ export async function runGeneratedImageDeliveryScenario(context: MatrixQaScenari
|
||||
const roomId = resolveMatrixQaScenarioRoomId(context, MATRIX_QA_MEDIA_ROOM_KEY);
|
||||
const { client, startSince } = await primeMatrixQaDriverMediaClient(context);
|
||||
const triggerBody = buildMatrixQaImageGenerationPrompt(context.sutUserId);
|
||||
const driverEventId = await client.sendTextMessage({
|
||||
body: triggerBody,
|
||||
mentionUserIds: [context.sutUserId],
|
||||
roomId,
|
||||
});
|
||||
const matched = await client.waitForRoomEvent({
|
||||
const driverEventIds: string[] = [];
|
||||
const isGeneratedImageEvent = (event: MatrixQaObservedEvent) =>
|
||||
event.roomId === roomId &&
|
||||
event.sender === context.sutUserId &&
|
||||
event.type === "m.room.message" &&
|
||||
event.relatesTo === undefined &&
|
||||
event.msgtype === "m.image" &&
|
||||
event.attachment?.kind === "image";
|
||||
let matched = await client.waitForOptionalRoomEvent({
|
||||
observedEvents: context.observedEvents,
|
||||
predicate: (event) =>
|
||||
event.roomId === roomId &&
|
||||
event.sender === context.sutUserId &&
|
||||
event.type === "m.room.message" &&
|
||||
event.relatesTo === undefined &&
|
||||
event.msgtype === "m.image" &&
|
||||
event.attachment?.kind === "image",
|
||||
predicate: isGeneratedImageEvent,
|
||||
roomId,
|
||||
since: startSince,
|
||||
timeoutMs: context.timeoutMs,
|
||||
timeoutMs: 0,
|
||||
});
|
||||
for (let attempt = 1; !matched.matched && attempt <= 2; attempt += 1) {
|
||||
const driverEventId = await client.sendTextMessage({
|
||||
body: triggerBody,
|
||||
mentionUserIds: [context.sutUserId],
|
||||
roomId,
|
||||
});
|
||||
driverEventIds.push(driverEventId);
|
||||
matched = await client.waitForOptionalRoomEvent({
|
||||
observedEvents: context.observedEvents,
|
||||
predicate: isGeneratedImageEvent,
|
||||
roomId,
|
||||
since: matched.since ?? startSince,
|
||||
timeoutMs: context.timeoutMs,
|
||||
});
|
||||
}
|
||||
if (!matched.matched) {
|
||||
throw new Error(
|
||||
`timed out after ${context.timeoutMs}ms waiting for Matrix generated image after ${driverEventIds.length} attempt(s)`,
|
||||
);
|
||||
}
|
||||
const matchedEvent = matched.event;
|
||||
advanceMatrixQaActorCursor({
|
||||
actorId: "driver",
|
||||
syncState: context.syncState,
|
||||
@@ -342,25 +360,26 @@ export async function runGeneratedImageDeliveryScenario(context: MatrixQaScenari
|
||||
startSince,
|
||||
});
|
||||
const attachment = requireMatrixQaImageAttachment(
|
||||
matched.event,
|
||||
matchedEvent,
|
||||
"Matrix generated image delivery scenario",
|
||||
);
|
||||
return {
|
||||
artifacts: {
|
||||
attachmentBodyPreview: matched.event.body?.slice(0, 200),
|
||||
attachmentEventId: matched.event.eventId,
|
||||
attachmentBodyPreview: matchedEvent.body?.slice(0, 200),
|
||||
attachmentEventId: matchedEvent.eventId,
|
||||
attachmentFilename: attachment.filename,
|
||||
attachmentKind: attachment.kind,
|
||||
attachmentMsgtype: matched.event.msgtype,
|
||||
driverEventId,
|
||||
attachmentMsgtype: matchedEvent.msgtype,
|
||||
driverEventId: driverEventIds[0],
|
||||
driverEventIds,
|
||||
roomId,
|
||||
triggerBody,
|
||||
},
|
||||
details: [
|
||||
`room id: ${roomId}`,
|
||||
`driver event: ${driverEventId}`,
|
||||
`driver events: ${driverEventIds.join(", ")}`,
|
||||
...buildMatrixQaAttachmentDetailLines({
|
||||
attachmentEvent: matched.event,
|
||||
attachmentEvent: matchedEvent,
|
||||
label: "generated image",
|
||||
}),
|
||||
].join("\n"),
|
||||
|
||||
@@ -693,7 +693,7 @@ function assertMatrixQaToolProgressMentionsInert(event: MatrixQaObservedEvent) {
|
||||
|
||||
function hasMatrixQaToolProgressPreviewLine(body: string | undefined) {
|
||||
return Boolean(
|
||||
body?.split(/\r?\n/).some((line) => /^\s*[-*•]\s+`?[^`\s][^`]*`?\s*$/u.test(line)),
|
||||
body?.split(/\r?\n/).some((line) => /^\s*(?:[-*•]\s+`?[^`\s][^`]*`?|`[^`]+`)\s*$/u.test(line)),
|
||||
);
|
||||
}
|
||||
|
||||
@@ -967,6 +967,7 @@ export async function runToolProgressErrorScenario(context: MatrixQaScenarioCont
|
||||
expectedPreviewKind: "notice",
|
||||
finalText: buildMatrixQaToken("MATRIX_QA_TOOL_PROGRESS_ERROR"),
|
||||
label: "tool progress error",
|
||||
allowGenericProgressLine: true,
|
||||
progressPattern: /\bread\s*:?\s*from\s+\S*missing-matrix-tool-progress-target\.txt\b/i,
|
||||
triggerBodyBuilder: buildMatrixToolProgressErrorPrompt,
|
||||
});
|
||||
|
||||
@@ -49,6 +49,7 @@ export type MatrixQaScenarioArtifacts = {
|
||||
dedupeCommitObserved?: boolean;
|
||||
duplicateWindowMs?: number;
|
||||
driverEventId?: string;
|
||||
driverEventIds?: string[];
|
||||
driverUserId?: string;
|
||||
editEventId?: string;
|
||||
editedToken?: string;
|
||||
|
||||
@@ -331,6 +331,134 @@ describe("matrix live qa scenarios", () => {
|
||||
}
|
||||
});
|
||||
|
||||
it("waits for Matrix SAS device trust after verification completes", async () => {
|
||||
const initiated = {
|
||||
id: "driver-request",
|
||||
transactionId: "tx-sas",
|
||||
};
|
||||
const incoming = {
|
||||
canAccept: true,
|
||||
id: "observer-request",
|
||||
initiatedByMe: false,
|
||||
pending: true,
|
||||
transactionId: "tx-sas",
|
||||
};
|
||||
const ready = {
|
||||
id: "driver-request",
|
||||
phaseName: "ready",
|
||||
transactionId: "tx-sas",
|
||||
};
|
||||
const sas = {
|
||||
emoji: [["🐶", "Dog"]],
|
||||
};
|
||||
const initiatorSas = {
|
||||
hasSas: true,
|
||||
id: "driver-request",
|
||||
sas,
|
||||
transactionId: "tx-sas",
|
||||
};
|
||||
const recipientSas = {
|
||||
hasSas: true,
|
||||
id: "observer-request",
|
||||
sas,
|
||||
transactionId: "tx-sas",
|
||||
};
|
||||
const completedInitiator = {
|
||||
completed: true,
|
||||
id: "driver-request",
|
||||
transactionId: "tx-sas",
|
||||
};
|
||||
const completedRecipient = {
|
||||
completed: true,
|
||||
id: "observer-request",
|
||||
transactionId: "tx-sas",
|
||||
};
|
||||
const driverGetDeviceVerificationStatus = vi
|
||||
.fn()
|
||||
.mockResolvedValueOnce({ verified: false })
|
||||
.mockResolvedValueOnce({ verified: true });
|
||||
const observerGetDeviceVerificationStatus = vi.fn().mockResolvedValue({ verified: true });
|
||||
const driverStop = vi.fn().mockResolvedValue(undefined);
|
||||
const observerStop = vi.fn().mockResolvedValue(undefined);
|
||||
|
||||
createMatrixQaE2eeScenarioClient
|
||||
.mockResolvedValueOnce({
|
||||
bootstrapOwnDeviceVerification: vi.fn().mockResolvedValue({
|
||||
crossSigning: { published: true },
|
||||
success: true,
|
||||
verification: {
|
||||
backupVersion: "1",
|
||||
crossSigningVerified: true,
|
||||
recoveryKeyStored: true,
|
||||
signedByOwner: true,
|
||||
verified: true,
|
||||
},
|
||||
}),
|
||||
confirmVerificationSas: vi.fn().mockResolvedValue(completedInitiator),
|
||||
getDeviceVerificationStatus: driverGetDeviceVerificationStatus,
|
||||
getRecoveryKey: vi.fn().mockResolvedValue({ encodedPrivateKey: "driver-key" }),
|
||||
listVerifications: vi
|
||||
.fn()
|
||||
.mockResolvedValueOnce([ready])
|
||||
.mockResolvedValueOnce([initiatorSas])
|
||||
.mockResolvedValueOnce([completedInitiator]),
|
||||
requestVerification: vi.fn().mockResolvedValue(initiated),
|
||||
resetRoomKeyBackup: vi.fn().mockResolvedValue({ success: true }),
|
||||
startVerification: vi.fn().mockResolvedValue(initiatorSas),
|
||||
stop: driverStop,
|
||||
})
|
||||
.mockResolvedValueOnce({
|
||||
acceptVerification: vi.fn().mockResolvedValue(ready),
|
||||
bootstrapOwnDeviceVerification: vi.fn().mockResolvedValue({
|
||||
crossSigning: { published: true },
|
||||
success: true,
|
||||
verification: {
|
||||
backupVersion: "1",
|
||||
crossSigningVerified: true,
|
||||
recoveryKeyStored: true,
|
||||
signedByOwner: true,
|
||||
verified: true,
|
||||
},
|
||||
}),
|
||||
confirmVerificationSas: vi.fn().mockResolvedValue(completedRecipient),
|
||||
getDeviceVerificationStatus: observerGetDeviceVerificationStatus,
|
||||
getRecoveryKey: vi.fn().mockResolvedValue({ encodedPrivateKey: "observer-key" }),
|
||||
listVerifications: vi
|
||||
.fn()
|
||||
.mockResolvedValueOnce([incoming])
|
||||
.mockResolvedValueOnce([recipientSas])
|
||||
.mockResolvedValueOnce([completedRecipient]),
|
||||
resetRoomKeyBackup: vi.fn().mockResolvedValue({ success: true }),
|
||||
stop: observerStop,
|
||||
});
|
||||
|
||||
const scenario = MATRIX_QA_SCENARIOS.find(
|
||||
(entry) => entry.id === "matrix-e2ee-device-sas-verification",
|
||||
);
|
||||
expect(scenario).toBeDefined();
|
||||
|
||||
await expect(
|
||||
runMatrixQaScenario(scenario!, {
|
||||
...matrixQaScenarioContext(),
|
||||
driverDeviceId: "DRIVERDEVICE",
|
||||
driverPassword: "driver-password",
|
||||
observerDeviceId: "OBSERVERDEVICE",
|
||||
observerPassword: "observer-password",
|
||||
outputDir: "/tmp/matrix-qa",
|
||||
timeoutMs: 80,
|
||||
}),
|
||||
).resolves.toMatchObject({
|
||||
artifacts: {
|
||||
driverTrustsObserverDevice: true,
|
||||
observerTrustsDriverDevice: true,
|
||||
},
|
||||
});
|
||||
|
||||
expect(driverGetDeviceVerificationStatus).toHaveBeenCalledTimes(2);
|
||||
expect(driverStop).toHaveBeenCalledTimes(1);
|
||||
expect(observerStop).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it("keeps the Matrix CLI default profile on the full catalog", () => {
|
||||
const allIds = scenarioTesting.findMatrixQaScenarios().map((scenario) => scenario.id);
|
||||
|
||||
@@ -469,6 +597,112 @@ describe("matrix live qa scenarios", () => {
|
||||
expect(gatewayCall.mock.calls.at(-1)?.[0]).toBe("exec.approval.waitDecision");
|
||||
});
|
||||
|
||||
it("reuses observed Matrix approval events across channel and DM target=both waits", async () => {
|
||||
const context = matrixQaScenarioContext();
|
||||
context.topology.rooms.push(
|
||||
{
|
||||
key: scenarioTesting.MATRIX_QA_DRIVER_DM_ROOM_KEY,
|
||||
kind: "dm",
|
||||
memberRoles: ["driver", "sut"],
|
||||
memberUserIds: ["@driver:matrix-qa.test", "@sut:matrix-qa.test"],
|
||||
name: "Driver DM",
|
||||
requireMention: false,
|
||||
roomId: "!driver-dm:matrix-qa.test",
|
||||
},
|
||||
{
|
||||
key: scenarioTesting.MATRIX_QA_DRIVER_DM_SHARED_ROOM_KEY,
|
||||
kind: "dm",
|
||||
memberRoles: ["driver", "sut"],
|
||||
memberUserIds: ["@driver:matrix-qa.test", "@sut:matrix-qa.test"],
|
||||
name: "Driver shared DM",
|
||||
requireMention: false,
|
||||
roomId: "!driver-shared-dm:matrix-qa.test",
|
||||
},
|
||||
);
|
||||
let approvalId = "";
|
||||
const gatewayCall = vi.fn().mockImplementation(async (method: string, ...args: unknown[]) => {
|
||||
if (method === "exec.approval.request") {
|
||||
const payload = args.find(
|
||||
(arg): arg is { id?: string } => typeof arg === "object" && arg !== null && "id" in arg,
|
||||
);
|
||||
approvalId = payload?.id ?? "";
|
||||
return { id: approvalId, status: "accepted" };
|
||||
}
|
||||
if (method === "exec.approval.resolve") {
|
||||
return { ok: true };
|
||||
}
|
||||
throw new Error(`unexpected gateway method ${method}`);
|
||||
});
|
||||
context.gatewayCall = gatewayCall;
|
||||
|
||||
const buildApprovalEvent = (eventId: string, roomId: string) =>
|
||||
matrixQaMessageEvent({
|
||||
approval: {
|
||||
allowedDecisions: ["allow-once", "deny"],
|
||||
hasCommandText: true,
|
||||
id: approvalId,
|
||||
kind: "exec",
|
||||
state: "pending",
|
||||
type: "approval.request",
|
||||
version: 1,
|
||||
},
|
||||
body: "approval requested",
|
||||
eventId,
|
||||
kind: "message",
|
||||
roomId,
|
||||
});
|
||||
const waitForRoomEvent = vi.fn().mockImplementation(async () => {
|
||||
const channelApproval = buildApprovalEvent("$approval-both-channel", "!main:matrix-qa.test");
|
||||
const dmApproval = buildApprovalEvent(
|
||||
"$approval-both-dm",
|
||||
"!driver-shared-dm:matrix-qa.test",
|
||||
);
|
||||
context.observedEvents.push(channelApproval, dmApproval, {
|
||||
eventId: "$approval-both-option",
|
||||
kind: "reaction",
|
||||
reaction: {
|
||||
eventId: "$approval-both-channel",
|
||||
key: "✅",
|
||||
},
|
||||
roomId: "!main:matrix-qa.test",
|
||||
sender: "@sut:matrix-qa.test",
|
||||
type: "m.reaction",
|
||||
});
|
||||
return { event: channelApproval, since: "driver-sync-approval" };
|
||||
});
|
||||
const waitForOptionalRoomEvent = vi.fn().mockResolvedValue({
|
||||
matched: false,
|
||||
since: "driver-sync-late-window",
|
||||
});
|
||||
createMatrixQaClient
|
||||
.mockReturnValueOnce({
|
||||
primeRoom: vi.fn().mockResolvedValue("driver-sync-start"),
|
||||
waitForOptionalRoomEvent,
|
||||
})
|
||||
.mockReturnValueOnce({
|
||||
waitForRoomEvent,
|
||||
});
|
||||
|
||||
const scenario = MATRIX_QA_SCENARIOS.find(
|
||||
(entry) => entry.id === "matrix-approval-channel-target-both",
|
||||
);
|
||||
expect(scenario).toBeDefined();
|
||||
|
||||
await expect(runMatrixQaScenario(scenario!, context)).resolves.toMatchObject({
|
||||
artifacts: {
|
||||
approvals: [
|
||||
{ eventId: "$approval-both-channel", roomId: "!main:matrix-qa.test" },
|
||||
{ eventId: "$approval-both-dm", roomId: "!driver-shared-dm:matrix-qa.test" },
|
||||
],
|
||||
},
|
||||
});
|
||||
|
||||
expect(waitForRoomEvent).toHaveBeenCalledTimes(1);
|
||||
expect(gatewayCall.mock.calls.at(-1)?.[0]).toBe("exec.approval.resolve");
|
||||
expect(gatewayCall.mock.calls.at(-1)?.[2]).toMatchObject({ expectFinal: false });
|
||||
expect(createMatrixQaClient).toHaveBeenCalledTimes(3);
|
||||
});
|
||||
|
||||
it("lets explicit Matrix scenario ids override the selected profile", () => {
|
||||
expect(
|
||||
scenarioTesting
|
||||
@@ -2922,6 +3156,61 @@ describe("matrix live qa scenarios", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("accepts shortened Matrix tool progress error preview lines", async () => {
|
||||
const previewEventId = "$tool-progress-error-short-preview";
|
||||
const previewEvent = matrixQaMessageEvent({
|
||||
kind: "notice",
|
||||
eventId: previewEventId,
|
||||
body: "Nautiling...\n`📖 Read: from…ng-matrix-tool-progress-target.txt`",
|
||||
});
|
||||
const { waitForRoomEvent } = mockMatrixQaRoomClient({
|
||||
driverEventId: "$tool-progress-error-short-trigger",
|
||||
events: [
|
||||
{
|
||||
event: previewEvent,
|
||||
since: "driver-sync-preview",
|
||||
},
|
||||
{
|
||||
event: ({ sendTextMessage }) =>
|
||||
matrixQaMessageEvent({
|
||||
kind: "notice",
|
||||
eventId: "$tool-progress-error-short-final",
|
||||
body: readMatrixQaReplyDirective(
|
||||
sendTextMessage.mock.calls[0]?.[0]?.body,
|
||||
"MATRIX_QA_TOOL_PROGRESS_ERROR_SHORT_FIXED",
|
||||
),
|
||||
relatesTo: {
|
||||
relType: "m.replace",
|
||||
eventId: previewEventId,
|
||||
},
|
||||
}),
|
||||
since: "driver-sync-next",
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
const scenario = MATRIX_QA_SCENARIOS.find(
|
||||
(entry) => entry.id === "matrix-room-tool-progress-error",
|
||||
);
|
||||
expect(scenario).toBeDefined();
|
||||
|
||||
await expect(runMatrixQaScenario(scenario!, matrixQaScenarioContext())).resolves.toMatchObject({
|
||||
artifacts: {
|
||||
previewBodyPreview: "Nautiling...\n`📖 Read: from…ng-matrix-tool-progress-target.txt`",
|
||||
previewEventId,
|
||||
reply: {
|
||||
eventId: "$tool-progress-error-short-final",
|
||||
relatesTo: {
|
||||
eventId: previewEventId,
|
||||
relType: "m.replace",
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
expect(waitForRoomEvent).toHaveBeenCalledTimes(2);
|
||||
});
|
||||
|
||||
it("keeps Matrix-looking tool progress mentions inert in partial previews", async () => {
|
||||
const previewEventId = "$tool-progress-mention-preview";
|
||||
mockMatrixQaRoomClient({
|
||||
@@ -3199,27 +3488,34 @@ describe("matrix live qa scenarios", () => {
|
||||
it("waits for a real Matrix image attachment after image generation", async () => {
|
||||
const primeRoom = vi.fn().mockResolvedValue("driver-sync-start");
|
||||
const sendTextMessage = vi.fn().mockResolvedValue("$image-generate-trigger");
|
||||
const waitForRoomEvent = vi.fn().mockResolvedValue({
|
||||
event: {
|
||||
kind: "message",
|
||||
roomId: "!media:matrix-qa.test",
|
||||
eventId: "$sut-image",
|
||||
sender: "@sut:matrix-qa.test",
|
||||
type: "m.room.message",
|
||||
body: "Protocol note: generated the QA lighthouse image successfully.",
|
||||
msgtype: "m.image",
|
||||
attachment: {
|
||||
kind: "image",
|
||||
filename: "qa-lighthouse.png",
|
||||
const waitForOptionalRoomEvent = vi
|
||||
.fn()
|
||||
.mockResolvedValueOnce({
|
||||
matched: false,
|
||||
since: "driver-sync-start",
|
||||
})
|
||||
.mockResolvedValueOnce({
|
||||
event: {
|
||||
kind: "message",
|
||||
roomId: "!media:matrix-qa.test",
|
||||
eventId: "$sut-image",
|
||||
sender: "@sut:matrix-qa.test",
|
||||
type: "m.room.message",
|
||||
body: "Protocol note: generated the QA lighthouse image successfully.",
|
||||
msgtype: "m.image",
|
||||
attachment: {
|
||||
kind: "image",
|
||||
filename: "qa-lighthouse.png",
|
||||
},
|
||||
},
|
||||
},
|
||||
since: "driver-sync-next",
|
||||
});
|
||||
matched: true,
|
||||
since: "driver-sync-next",
|
||||
});
|
||||
|
||||
createMatrixQaClient.mockReturnValue({
|
||||
primeRoom,
|
||||
sendTextMessage,
|
||||
waitForRoomEvent,
|
||||
waitForOptionalRoomEvent,
|
||||
});
|
||||
|
||||
const scenario = MATRIX_QA_SCENARIOS.find(
|
||||
@@ -3273,7 +3569,7 @@ describe("matrix live qa scenarios", () => {
|
||||
});
|
||||
|
||||
expect(sendTextMessage).toHaveBeenCalledWith({
|
||||
body: expect.stringContaining("Image generation check: generate a QA lighthouse image"),
|
||||
body: expect.stringContaining("/tool image_generate action=generate"),
|
||||
mentionUserIds: ["@sut:matrix-qa.test"],
|
||||
roomId: "!media:matrix-qa.test",
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user