mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 05:30:42 +00:00
qa-matrix: add streaming tool progress scenarios
This commit is contained in:
@@ -82,6 +82,7 @@ The full scenario id list is the `MatrixQaScenarioId` union in `extensions/qa-ma
|
||||
|
||||
- threading — `matrix-thread-*`, `matrix-subagent-thread-spawn`
|
||||
- top-level / DM / room — `matrix-top-level-reply-shape`, `matrix-room-*`, `matrix-dm-*`
|
||||
- streaming and tool progress — `matrix-room-partial-streaming-preview`, `matrix-room-quiet-streaming-preview`, `matrix-room-tool-progress-*`, `matrix-room-block-streaming`
|
||||
- media — `matrix-media-type-coverage`, `matrix-room-image-understanding-attachment`, `matrix-attachment-only-ignored`, `matrix-unsupported-media-safe`
|
||||
- routing — `matrix-room-autojoin-invite`, `matrix-secondary-room-*`
|
||||
- reactions — `matrix-reaction-*`
|
||||
|
||||
@@ -20,7 +20,12 @@ export type MatrixQaScenarioId =
|
||||
| "matrix-subagent-thread-spawn"
|
||||
| "matrix-top-level-reply-shape"
|
||||
| "matrix-room-thread-reply-override"
|
||||
| "matrix-room-partial-streaming-preview"
|
||||
| "matrix-room-quiet-streaming-preview"
|
||||
| "matrix-room-tool-progress-preview"
|
||||
| "matrix-room-tool-progress-preview-opt-out"
|
||||
| "matrix-room-tool-progress-error"
|
||||
| "matrix-room-tool-progress-mention-safety"
|
||||
| "matrix-room-block-streaming"
|
||||
| "matrix-room-image-understanding-attachment"
|
||||
| "matrix-room-generated-image-delivery"
|
||||
@@ -317,6 +322,14 @@ export const MATRIX_QA_SCENARIOS: MatrixQaScenarioDefinition[] = [
|
||||
threadReplies: "always",
|
||||
},
|
||||
},
|
||||
{
|
||||
id: "matrix-room-partial-streaming-preview",
|
||||
timeoutMs: 45_000,
|
||||
title: "Matrix partial streaming emits text previews before finalizing",
|
||||
configOverrides: {
|
||||
streaming: "partial",
|
||||
},
|
||||
},
|
||||
{
|
||||
id: "matrix-room-quiet-streaming-preview",
|
||||
timeoutMs: 45_000,
|
||||
@@ -325,6 +338,47 @@ export const MATRIX_QA_SCENARIOS: MatrixQaScenarioDefinition[] = [
|
||||
streaming: "quiet",
|
||||
},
|
||||
},
|
||||
{
|
||||
id: "matrix-room-tool-progress-preview",
|
||||
timeoutMs: 60_000,
|
||||
title: "Matrix streaming folds tool progress into the preview message",
|
||||
configOverrides: {
|
||||
streaming: "quiet",
|
||||
toolProfile: "coding",
|
||||
},
|
||||
},
|
||||
{
|
||||
id: "matrix-room-tool-progress-preview-opt-out",
|
||||
timeoutMs: 60_000,
|
||||
title: "Matrix streaming can opt out of preview tool progress",
|
||||
configOverrides: {
|
||||
streaming: {
|
||||
mode: "quiet",
|
||||
preview: {
|
||||
toolProgress: false,
|
||||
},
|
||||
},
|
||||
toolProfile: "coding",
|
||||
},
|
||||
},
|
||||
{
|
||||
id: "matrix-room-tool-progress-error",
|
||||
timeoutMs: 60_000,
|
||||
title: "Matrix streaming finalizes previews after tool errors",
|
||||
configOverrides: {
|
||||
streaming: "quiet",
|
||||
toolProfile: "coding",
|
||||
},
|
||||
},
|
||||
{
|
||||
id: "matrix-room-tool-progress-mention-safety",
|
||||
timeoutMs: 60_000,
|
||||
title: "Matrix streaming keeps tool-progress mentions inert",
|
||||
configOverrides: {
|
||||
streaming: "partial",
|
||||
toolProfile: "coding",
|
||||
},
|
||||
},
|
||||
{
|
||||
id: "matrix-room-block-streaming",
|
||||
timeoutMs: 75_000,
|
||||
|
||||
@@ -15,8 +15,12 @@ import {
|
||||
assertTopLevelReplyArtifact,
|
||||
advanceMatrixQaActorCursor,
|
||||
buildMatrixBlockStreamingPrompt,
|
||||
buildMatrixPartialStreamingPrompt,
|
||||
buildMatrixQuietStreamingPrompt,
|
||||
buildMatrixQaToken,
|
||||
buildMatrixToolProgressErrorPrompt,
|
||||
buildMatrixToolProgressMentionSafetyPrompt,
|
||||
buildMatrixToolProgressPrompt,
|
||||
buildMatrixReplyArtifact,
|
||||
buildMatrixReplyDetails,
|
||||
buildMentionPrompt,
|
||||
@@ -554,9 +558,34 @@ export async function runAllowlistHotReloadScenario(context: MatrixQaScenarioCon
|
||||
}
|
||||
|
||||
export async function runQuietStreamingPreviewScenario(context: MatrixQaScenarioContext) {
|
||||
return runMatrixStreamingPreviewScenario(context, {
|
||||
expectedPreviewKind: "notice",
|
||||
finalText: `MATRIX_QA_QUIET_STREAM_${randomUUID().slice(0, 8).toUpperCase()} preview complete`,
|
||||
label: "quiet streaming",
|
||||
triggerBodyBuilder: buildMatrixQuietStreamingPrompt,
|
||||
});
|
||||
}
|
||||
|
||||
export async function runPartialStreamingPreviewScenario(context: MatrixQaScenarioContext) {
|
||||
return runMatrixStreamingPreviewScenario(context, {
|
||||
expectedPreviewKind: "message",
|
||||
finalText: `MATRIX_QA_PARTIAL_STREAM_${randomUUID().slice(0, 8).toUpperCase()} preview complete`,
|
||||
label: "partial streaming",
|
||||
triggerBodyBuilder: buildMatrixPartialStreamingPrompt,
|
||||
});
|
||||
}
|
||||
|
||||
async function runMatrixStreamingPreviewScenario(
|
||||
context: MatrixQaScenarioContext,
|
||||
params: {
|
||||
expectedPreviewKind: MatrixQaObservedEvent["kind"];
|
||||
finalText: string;
|
||||
label: string;
|
||||
triggerBodyBuilder: (sutUserId: string, finalText: string) => string;
|
||||
},
|
||||
) {
|
||||
const { client, startSince } = await primeMatrixQaDriverScenarioClient(context);
|
||||
const finalText = `MATRIX_QA_QUIET_STREAM_${randomUUID().slice(0, 8).toUpperCase()} preview complete`;
|
||||
const triggerBody = buildMatrixQuietStreamingPrompt(context.sutUserId, finalText);
|
||||
const triggerBody = params.triggerBodyBuilder(context.sutUserId, params.finalText);
|
||||
const driverEventId = await client.sendTextMessage({
|
||||
body: triggerBody,
|
||||
mentionUserIds: [context.sutUserId],
|
||||
@@ -567,7 +596,8 @@ export async function runQuietStreamingPreviewScenario(context: MatrixQaScenario
|
||||
predicate: (event) =>
|
||||
event.roomId === context.roomId &&
|
||||
event.sender === context.sutUserId &&
|
||||
event.kind === "notice",
|
||||
event.kind === params.expectedPreviewKind &&
|
||||
event.relatesTo === undefined,
|
||||
roomId: context.roomId,
|
||||
since: startSince,
|
||||
timeoutMs: context.timeoutMs,
|
||||
@@ -580,7 +610,7 @@ export async function runQuietStreamingPreviewScenario(context: MatrixQaScenario
|
||||
isMatrixQaMessageLikeKind(event.kind) &&
|
||||
event.relatesTo?.relType === "m.replace" &&
|
||||
event.relatesTo.eventId === preview.event.eventId &&
|
||||
event.body === finalText,
|
||||
event.body === params.finalText,
|
||||
roomId: context.roomId,
|
||||
since: preview.since,
|
||||
timeoutMs: context.timeoutMs,
|
||||
@@ -591,23 +621,268 @@ export async function runQuietStreamingPreviewScenario(context: MatrixQaScenario
|
||||
nextSince: finalized.since,
|
||||
startSince,
|
||||
});
|
||||
const finalReply = buildMatrixReplyArtifact(finalized.event, params.finalText);
|
||||
return {
|
||||
artifacts: {
|
||||
driverEventId,
|
||||
previewFormattedBodyPreview: preview.event.formattedBody?.slice(0, 200),
|
||||
previewBodyPreview: preview.event.body?.slice(0, 200),
|
||||
previewEventId: preview.event.eventId,
|
||||
previewMentions: preview.event.mentions,
|
||||
reply: finalReply,
|
||||
token: params.finalText,
|
||||
triggerBody,
|
||||
},
|
||||
details: [
|
||||
`driver event: ${driverEventId}`,
|
||||
`scenario: ${params.label}`,
|
||||
`preview event: ${preview.event.eventId}`,
|
||||
`preview kind: ${preview.event.kind}`,
|
||||
`preview body: ${preview.event.body ?? "<none>"}`,
|
||||
`final reply relation: ${finalized.event.relatesTo?.relType ?? "<none>"}`,
|
||||
`final reply target: ${finalized.event.relatesTo?.eventId ?? "<none>"}`,
|
||||
...buildMatrixReplyDetails("final reply", finalReply),
|
||||
].join("\n"),
|
||||
} satisfies MatrixQaScenarioExecution;
|
||||
}
|
||||
|
||||
function findMatrixQaUnexpectedWorkingEvents(params: {
|
||||
events: MatrixQaObservedEvent[];
|
||||
finalEventId?: string;
|
||||
previewEventId?: string;
|
||||
startIndex: number;
|
||||
sutUserId: string;
|
||||
}) {
|
||||
return params.events.slice(params.startIndex).filter((event) => {
|
||||
if (event.sender !== params.sutUserId || event.type !== "m.room.message") {
|
||||
return false;
|
||||
}
|
||||
if (!/\bWorking\b/i.test(event.body ?? "")) {
|
||||
return false;
|
||||
}
|
||||
if (event.eventId === params.previewEventId || event.eventId === params.finalEventId) {
|
||||
return false;
|
||||
}
|
||||
return event.relatesTo?.eventId !== params.previewEventId;
|
||||
});
|
||||
}
|
||||
|
||||
function assertMatrixQaToolProgressMentionsInert(event: MatrixQaObservedEvent) {
|
||||
const mentions = event.mentions;
|
||||
if (mentions?.room || (mentions?.userIds?.length ?? 0) > 0) {
|
||||
throw new Error(
|
||||
`Matrix tool-progress preview emitted active mentions: ${JSON.stringify(mentions)}`,
|
||||
);
|
||||
}
|
||||
if (/matrix\.to/i.test(event.formattedBody ?? "")) {
|
||||
throw new Error(
|
||||
`Matrix tool-progress preview linked Matrix mentions: ${event.formattedBody ?? "<none>"}`,
|
||||
);
|
||||
}
|
||||
if (
|
||||
!/<code>[^<]*(?:@room|@alice:matrix-qa\.test|!room:matrix-qa\.test)/i.test(
|
||||
event.formattedBody ?? "",
|
||||
)
|
||||
) {
|
||||
throw new Error(
|
||||
`Matrix tool-progress preview did not preserve mention-looking text inside code: ${event.formattedBody ?? "<none>"}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
async function runMatrixToolProgressScenario(
|
||||
context: MatrixQaScenarioContext,
|
||||
params: {
|
||||
expectedPreviewKind: MatrixQaObservedEvent["kind"];
|
||||
finalText: string;
|
||||
label: string;
|
||||
mentionSafety?: boolean;
|
||||
progressPattern: RegExp;
|
||||
triggerBodyBuilder: (sutUserId: string, finalText: string) => string;
|
||||
},
|
||||
) {
|
||||
const { client, startSince } = await primeMatrixQaDriverScenarioClient(context);
|
||||
const startObservedIndex = context.observedEvents.length;
|
||||
const triggerBody = params.triggerBodyBuilder(context.sutUserId, params.finalText);
|
||||
const driverEventId = await client.sendTextMessage({
|
||||
body: triggerBody,
|
||||
mentionUserIds: [context.sutUserId],
|
||||
roomId: context.roomId,
|
||||
});
|
||||
const preview = await client.waitForRoomEvent({
|
||||
observedEvents: context.observedEvents,
|
||||
predicate: (event) =>
|
||||
event.roomId === context.roomId &&
|
||||
event.sender === context.sutUserId &&
|
||||
event.kind === params.expectedPreviewKind &&
|
||||
event.relatesTo === undefined &&
|
||||
/\bWorking\b/i.test(event.body ?? ""),
|
||||
roomId: context.roomId,
|
||||
since: startSince,
|
||||
timeoutMs: context.timeoutMs,
|
||||
});
|
||||
const progress = params.progressPattern.test(preview.event.body ?? "")
|
||||
? preview
|
||||
: await client.waitForRoomEvent({
|
||||
observedEvents: context.observedEvents,
|
||||
predicate: (event) =>
|
||||
event.roomId === context.roomId &&
|
||||
event.sender === context.sutUserId &&
|
||||
event.kind === params.expectedPreviewKind &&
|
||||
event.relatesTo?.relType === "m.replace" &&
|
||||
event.relatesTo.eventId === preview.event.eventId &&
|
||||
/\bWorking\b/i.test(event.body ?? "") &&
|
||||
params.progressPattern.test(event.body ?? ""),
|
||||
roomId: context.roomId,
|
||||
since: preview.since,
|
||||
timeoutMs: context.timeoutMs,
|
||||
});
|
||||
|
||||
if (params.mentionSafety) {
|
||||
assertMatrixQaToolProgressMentionsInert(progress.event);
|
||||
}
|
||||
|
||||
const finalized = await client.waitForRoomEvent({
|
||||
observedEvents: context.observedEvents,
|
||||
predicate: (event) =>
|
||||
event.roomId === context.roomId &&
|
||||
event.sender === context.sutUserId &&
|
||||
isMatrixQaMessageLikeKind(event.kind) &&
|
||||
event.relatesTo?.relType === "m.replace" &&
|
||||
event.relatesTo.eventId === preview.event.eventId &&
|
||||
event.body === params.finalText,
|
||||
roomId: context.roomId,
|
||||
since: progress.since,
|
||||
timeoutMs: context.timeoutMs,
|
||||
});
|
||||
const unexpectedWorkingEvents = findMatrixQaUnexpectedWorkingEvents({
|
||||
events: context.observedEvents,
|
||||
finalEventId: finalized.event.eventId,
|
||||
previewEventId: preview.event.eventId,
|
||||
startIndex: startObservedIndex,
|
||||
sutUserId: context.sutUserId,
|
||||
});
|
||||
if (unexpectedWorkingEvents.length > 0) {
|
||||
throw new Error(
|
||||
`Matrix tool progress leaked outside preview event: ${unexpectedWorkingEvents.map((event) => `${event.eventId}:${event.body ?? ""}`).join("; ")}`,
|
||||
);
|
||||
}
|
||||
advanceMatrixQaActorCursor({
|
||||
actorId: "driver",
|
||||
syncState: context.syncState,
|
||||
nextSince: finalized.since,
|
||||
startSince,
|
||||
});
|
||||
const finalReply = buildMatrixReplyArtifact(finalized.event, params.finalText);
|
||||
return {
|
||||
artifacts: {
|
||||
driverEventId,
|
||||
previewBodyPreview: progress.event.body?.slice(0, 200),
|
||||
previewEventId: preview.event.eventId,
|
||||
previewFormattedBodyPreview: progress.event.formattedBody?.slice(0, 200),
|
||||
previewMentions: progress.event.mentions,
|
||||
reply: finalReply,
|
||||
token: params.finalText,
|
||||
triggerBody,
|
||||
},
|
||||
details: [
|
||||
`driver event: ${driverEventId}`,
|
||||
`scenario: ${params.label}`,
|
||||
`preview event: ${preview.event.eventId}`,
|
||||
`preview kind: ${progress.event.kind}`,
|
||||
`preview body: ${progress.event.body ?? "<none>"}`,
|
||||
`preview mentions: ${JSON.stringify(progress.event.mentions ?? {})}`,
|
||||
`final reply relation: ${finalized.event.relatesTo?.relType ?? "<none>"}`,
|
||||
`final reply target: ${finalized.event.relatesTo?.eventId ?? "<none>"}`,
|
||||
...buildMatrixReplyDetails("final reply", finalReply),
|
||||
].join("\n"),
|
||||
} satisfies MatrixQaScenarioExecution;
|
||||
}
|
||||
|
||||
export async function runToolProgressPreviewScenario(context: MatrixQaScenarioContext) {
|
||||
return runMatrixToolProgressScenario(context, {
|
||||
expectedPreviewKind: "notice",
|
||||
finalText: buildMatrixQaToken("MATRIX_QA_TOOL_PROGRESS"),
|
||||
label: "tool progress preview",
|
||||
progressPattern: /\btool:\s*read\b/i,
|
||||
triggerBodyBuilder: buildMatrixToolProgressPrompt,
|
||||
});
|
||||
}
|
||||
|
||||
export async function runToolProgressErrorScenario(context: MatrixQaScenarioContext) {
|
||||
return runMatrixToolProgressScenario(context, {
|
||||
expectedPreviewKind: "notice",
|
||||
finalText: buildMatrixQaToken("MATRIX_QA_TOOL_PROGRESS_ERROR"),
|
||||
label: "tool progress error",
|
||||
progressPattern: /read from missing-matrix-tool-progress-target\.txt/i,
|
||||
triggerBodyBuilder: buildMatrixToolProgressErrorPrompt,
|
||||
});
|
||||
}
|
||||
|
||||
export async function runToolProgressMentionSafetyScenario(context: MatrixQaScenarioContext) {
|
||||
return runMatrixToolProgressScenario(context, {
|
||||
expectedPreviewKind: "message",
|
||||
finalText: buildMatrixQaToken("MATRIX_QA_TOOL_PROGRESS_MENTION_SAFE"),
|
||||
label: "tool progress mention safety",
|
||||
mentionSafety: true,
|
||||
progressPattern: /@room|@alice:matrix-qa\.test|!room:matrix-qa\.test/i,
|
||||
triggerBodyBuilder: buildMatrixToolProgressMentionSafetyPrompt,
|
||||
});
|
||||
}
|
||||
|
||||
export async function runToolProgressPreviewOptOutScenario(context: MatrixQaScenarioContext) {
|
||||
const { client, startSince } = await primeMatrixQaDriverScenarioClient(context);
|
||||
const startObservedIndex = context.observedEvents.length;
|
||||
const finalText = buildMatrixQaToken("MATRIX_QA_TOOL_PROGRESS_OPTOUT");
|
||||
const triggerBody = buildMatrixToolProgressPrompt(context.sutUserId, finalText);
|
||||
const driverEventId = await client.sendTextMessage({
|
||||
body: triggerBody,
|
||||
mentionUserIds: [context.sutUserId],
|
||||
roomId: context.roomId,
|
||||
});
|
||||
const finalized = await client.waitForRoomEvent({
|
||||
observedEvents: context.observedEvents,
|
||||
predicate: (event) =>
|
||||
event.roomId === context.roomId &&
|
||||
event.sender === context.sutUserId &&
|
||||
isMatrixQaMessageLikeKind(event.kind) &&
|
||||
event.body === finalText,
|
||||
roomId: context.roomId,
|
||||
since: startSince,
|
||||
timeoutMs: context.timeoutMs,
|
||||
});
|
||||
const unexpectedPreviewProgressEvents = context.observedEvents
|
||||
.slice(startObservedIndex)
|
||||
.filter(
|
||||
(event) =>
|
||||
event.sender === context.sutUserId &&
|
||||
event.type === "m.room.message" &&
|
||||
event.eventId !== finalized.event.eventId &&
|
||||
/^Working\.\.\.\n-/i.test(event.body ?? ""),
|
||||
);
|
||||
if (unexpectedPreviewProgressEvents.length > 0) {
|
||||
throw new Error(
|
||||
`Matrix tool-progress opt-out still emitted preview progress: ${unexpectedPreviewProgressEvents.map((event) => `${event.eventId}:${event.body ?? ""}`).join("; ")}`,
|
||||
);
|
||||
}
|
||||
advanceMatrixQaActorCursor({
|
||||
actorId: "driver",
|
||||
syncState: context.syncState,
|
||||
nextSince: finalized.since,
|
||||
startSince,
|
||||
});
|
||||
const finalReply = buildMatrixReplyArtifact(finalized.event, finalText);
|
||||
return {
|
||||
artifacts: {
|
||||
driverEventId,
|
||||
previewBodyPreview: preview.event.body?.slice(0, 200),
|
||||
previewEventId: preview.event.eventId,
|
||||
reply: finalReply,
|
||||
token: finalText,
|
||||
triggerBody,
|
||||
},
|
||||
details: [
|
||||
`driver event: ${driverEventId}`,
|
||||
`preview event: ${preview.event.eventId}`,
|
||||
`preview kind: ${preview.event.kind}`,
|
||||
`preview body: ${preview.event.body ?? "<none>"}`,
|
||||
`final reply relation: ${finalized.event.relatesTo?.relType ?? "<none>"}`,
|
||||
`final reply target: ${finalized.event.relatesTo?.eventId ?? "<none>"}`,
|
||||
"scenario: tool progress preview opt-out",
|
||||
"preview progress events: 0",
|
||||
...buildMatrixReplyDetails("final reply", finalReply),
|
||||
].join("\n"),
|
||||
} satisfies MatrixQaScenarioExecution;
|
||||
|
||||
@@ -81,6 +81,31 @@ export function buildMatrixQuietStreamingPrompt(sutUserId: string, text: string)
|
||||
return `${sutUserId} Quiet streaming QA check: reply exactly \`${text}\`.`;
|
||||
}
|
||||
|
||||
export function buildMatrixPartialStreamingPrompt(sutUserId: string, text: string) {
|
||||
return `${sutUserId} Partial streaming QA check: reply exactly \`${text}\`.`;
|
||||
}
|
||||
|
||||
export function buildMatrixToolProgressPrompt(sutUserId: string, text: string) {
|
||||
return [
|
||||
`${sutUserId} Tool progress QA check: read \`QA_KICKOFF_TASK.md\` before answering.`,
|
||||
`After the read completes, reply exactly \`${text}\`.`,
|
||||
].join(" ");
|
||||
}
|
||||
|
||||
export function buildMatrixToolProgressErrorPrompt(sutUserId: string, text: string) {
|
||||
return [
|
||||
`${sutUserId} Tool progress error QA check: read \`missing-matrix-tool-progress-target.txt\` before answering.`,
|
||||
`After the read fails, reply exactly \`${text}\`.`,
|
||||
].join(" ");
|
||||
}
|
||||
|
||||
export function buildMatrixToolProgressMentionSafetyPrompt(sutUserId: string, text: string) {
|
||||
return [
|
||||
`${sutUserId} Tool progress QA check: read \`matrix-progress-@room-@alice:matrix-qa.test-!room:matrix-qa.test.txt\` before answering.`,
|
||||
`After the read completes, reply exactly \`${text}\`.`,
|
||||
].join(" ");
|
||||
}
|
||||
|
||||
export function buildMatrixBlockStreamingPrompt(
|
||||
sutUserId: string,
|
||||
firstText: string,
|
||||
|
||||
@@ -77,6 +77,7 @@ import {
|
||||
runMatrixQaCanary,
|
||||
runMembershipLossScenario,
|
||||
runObserverAllowlistOverrideScenario,
|
||||
runPartialStreamingPreviewScenario,
|
||||
runQuietStreamingPreviewScenario,
|
||||
runReactionThreadedScenario,
|
||||
runRoomAutoJoinInviteScenario,
|
||||
@@ -86,6 +87,10 @@ import {
|
||||
runThreadIsolationScenario,
|
||||
runThreadNestedReplyShapeScenario,
|
||||
runThreadRootPreservationScenario,
|
||||
runToolProgressErrorScenario,
|
||||
runToolProgressMentionSafetyScenario,
|
||||
runToolProgressPreviewOptOutScenario,
|
||||
runToolProgressPreviewScenario,
|
||||
runTopLevelReplyShapeScenario,
|
||||
} from "./scenario-runtime-room.js";
|
||||
import {
|
||||
@@ -203,8 +208,18 @@ export async function runMatrixQaScenario(
|
||||
return await runTopLevelReplyShapeScenario(context);
|
||||
case "matrix-room-thread-reply-override":
|
||||
return await runRoomThreadReplyOverrideScenario(context);
|
||||
case "matrix-room-partial-streaming-preview":
|
||||
return await runPartialStreamingPreviewScenario(context);
|
||||
case "matrix-room-quiet-streaming-preview":
|
||||
return await runQuietStreamingPreviewScenario(context);
|
||||
case "matrix-room-tool-progress-preview":
|
||||
return await runToolProgressPreviewScenario(context);
|
||||
case "matrix-room-tool-progress-preview-opt-out":
|
||||
return await runToolProgressPreviewOptOutScenario(context);
|
||||
case "matrix-room-tool-progress-error":
|
||||
return await runToolProgressErrorScenario(context);
|
||||
case "matrix-room-tool-progress-mention-safety":
|
||||
return await runToolProgressMentionSafetyScenario(context);
|
||||
case "matrix-room-block-streaming":
|
||||
return await runBlockStreamingScenario(context);
|
||||
case "matrix-room-image-understanding-attachment":
|
||||
|
||||
@@ -93,6 +93,8 @@ export type MatrixQaScenarioArtifacts = {
|
||||
noticeEventId?: string;
|
||||
previewBodyPreview?: string;
|
||||
previewEventId?: string;
|
||||
previewFormattedBodyPreview?: string;
|
||||
previewMentions?: MatrixQaObservedEvent["mentions"];
|
||||
blockEventIds?: string[];
|
||||
bootstrapActor?: "driver" | "observer" | "sut";
|
||||
bootstrapErrorPreview?: string;
|
||||
|
||||
@@ -84,6 +84,48 @@ function matrixQaScenarioContext(): MatrixQaScenarioContext {
|
||||
};
|
||||
}
|
||||
|
||||
function matrixQaMessageEvent(
|
||||
overrides: Partial<MatrixQaObservedEvent> &
|
||||
Pick<MatrixQaObservedEvent, "body" | "eventId" | "kind">,
|
||||
): MatrixQaObservedEvent {
|
||||
return {
|
||||
roomId: "!main:matrix-qa.test",
|
||||
sender: "@sut:matrix-qa.test",
|
||||
type: "m.room.message",
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
function readMatrixQaReplyDirective(body: unknown, fallback: string) {
|
||||
return /reply exactly `([^`]+)`/.exec(String(body))?.[1] ?? fallback;
|
||||
}
|
||||
|
||||
function mockMatrixQaRoomClient(params: {
|
||||
driverEventId: string;
|
||||
events: Array<{
|
||||
event:
|
||||
| MatrixQaObservedEvent
|
||||
| ((client: { sendTextMessage: ReturnType<typeof vi.fn> }) => MatrixQaObservedEvent);
|
||||
since: string;
|
||||
}>;
|
||||
}) {
|
||||
const primeRoom = vi.fn().mockResolvedValue("driver-sync-start");
|
||||
const sendTextMessage = vi.fn().mockResolvedValue(params.driverEventId);
|
||||
const waitForRoomEvent = vi.fn();
|
||||
for (const entry of params.events) {
|
||||
waitForRoomEvent.mockImplementationOnce(async () => ({
|
||||
event: typeof entry.event === "function" ? entry.event({ sendTextMessage }) : entry.event,
|
||||
since: entry.since,
|
||||
}));
|
||||
}
|
||||
createMatrixQaClient.mockReturnValue({
|
||||
primeRoom,
|
||||
sendTextMessage,
|
||||
waitForRoomEvent,
|
||||
});
|
||||
return { primeRoom, sendTextMessage, waitForRoomEvent };
|
||||
}
|
||||
|
||||
function mockMatrixQaCliAccount(params: {
|
||||
accessToken: string;
|
||||
deviceId: string;
|
||||
@@ -165,7 +207,12 @@ describe("matrix live qa scenarios", () => {
|
||||
"matrix-subagent-thread-spawn",
|
||||
"matrix-top-level-reply-shape",
|
||||
"matrix-room-thread-reply-override",
|
||||
"matrix-room-partial-streaming-preview",
|
||||
"matrix-room-quiet-streaming-preview",
|
||||
"matrix-room-tool-progress-preview",
|
||||
"matrix-room-tool-progress-preview-opt-out",
|
||||
"matrix-room-tool-progress-error",
|
||||
"matrix-room-tool-progress-mention-safety",
|
||||
"matrix-room-block-streaming",
|
||||
"matrix-room-image-understanding-attachment",
|
||||
"matrix-room-generated-image-delivery",
|
||||
@@ -2243,6 +2290,269 @@ describe("matrix live qa scenarios", () => {
|
||||
);
|
||||
});
|
||||
|
||||
it("captures partial preview text messages before the finalized Matrix reply", async () => {
|
||||
const previewEventId = "$partial-preview";
|
||||
const fallbackFinalText = "MATRIX_QA_PARTIAL_STREAM_PREVIEW_COMPLETE";
|
||||
const { sendTextMessage } = mockMatrixQaRoomClient({
|
||||
driverEventId: "$partial-stream-trigger",
|
||||
events: [
|
||||
{
|
||||
event: matrixQaMessageEvent({
|
||||
kind: "message",
|
||||
eventId: previewEventId,
|
||||
body: "partial preview",
|
||||
}),
|
||||
since: "driver-sync-preview",
|
||||
},
|
||||
{
|
||||
event: ({ sendTextMessage }) =>
|
||||
matrixQaMessageEvent({
|
||||
kind: "message",
|
||||
eventId: "$partial-final",
|
||||
body: readMatrixQaReplyDirective(
|
||||
sendTextMessage.mock.calls[0]?.[0]?.body,
|
||||
fallbackFinalText,
|
||||
),
|
||||
relatesTo: {
|
||||
relType: "m.replace",
|
||||
eventId: previewEventId,
|
||||
},
|
||||
}),
|
||||
since: "driver-sync-next",
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
const scenario = MATRIX_QA_SCENARIOS.find(
|
||||
(entry) => entry.id === "matrix-room-partial-streaming-preview",
|
||||
);
|
||||
expect(scenario).toBeDefined();
|
||||
|
||||
await expect(runMatrixQaScenario(scenario!, matrixQaScenarioContext())).resolves.toMatchObject({
|
||||
artifacts: {
|
||||
driverEventId: "$partial-stream-trigger",
|
||||
previewEventId: "$partial-preview",
|
||||
reply: {
|
||||
eventId: "$partial-final",
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
expect(sendTextMessage).toHaveBeenCalledWith({
|
||||
body: expect.stringContaining("Partial streaming QA check"),
|
||||
mentionUserIds: ["@sut:matrix-qa.test"],
|
||||
roomId: "!main:matrix-qa.test",
|
||||
});
|
||||
});
|
||||
|
||||
it("captures Matrix tool progress inside the quiet preview before finalizing", async () => {
|
||||
const previewEventId = "$tool-progress-preview";
|
||||
mockMatrixQaRoomClient({
|
||||
driverEventId: "$tool-progress-trigger",
|
||||
events: [
|
||||
{
|
||||
event: matrixQaMessageEvent({
|
||||
kind: "notice",
|
||||
eventId: previewEventId,
|
||||
body: "Working...\n- `tool: read`",
|
||||
}),
|
||||
since: "driver-sync-preview",
|
||||
},
|
||||
{
|
||||
event: ({ sendTextMessage }) =>
|
||||
matrixQaMessageEvent({
|
||||
kind: "notice",
|
||||
eventId: "$tool-progress-final",
|
||||
body: readMatrixQaReplyDirective(
|
||||
sendTextMessage.mock.calls[0]?.[0]?.body,
|
||||
"MATRIX_QA_TOOL_PROGRESS_FIXED",
|
||||
),
|
||||
relatesTo: {
|
||||
relType: "m.replace",
|
||||
eventId: previewEventId,
|
||||
},
|
||||
}),
|
||||
since: "driver-sync-next",
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
const scenario = MATRIX_QA_SCENARIOS.find(
|
||||
(entry) => entry.id === "matrix-room-tool-progress-preview",
|
||||
);
|
||||
expect(scenario).toBeDefined();
|
||||
|
||||
await expect(runMatrixQaScenario(scenario!, matrixQaScenarioContext())).resolves.toMatchObject({
|
||||
artifacts: {
|
||||
driverEventId: "$tool-progress-trigger",
|
||||
previewBodyPreview: "Working...\n- `tool: read`",
|
||||
previewEventId: "$tool-progress-preview",
|
||||
reply: {
|
||||
eventId: "$tool-progress-final",
|
||||
},
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it("keeps Matrix tool progress opt-out from creating Working previews", async () => {
|
||||
const { waitForRoomEvent } = mockMatrixQaRoomClient({
|
||||
driverEventId: "$tool-progress-optout-trigger",
|
||||
events: [
|
||||
{
|
||||
event: ({ sendTextMessage }) =>
|
||||
matrixQaMessageEvent({
|
||||
kind: "message",
|
||||
eventId: "$tool-progress-optout-final",
|
||||
body: readMatrixQaReplyDirective(
|
||||
sendTextMessage.mock.calls[0]?.[0]?.body,
|
||||
"MATRIX_QA_TOOL_PROGRESS_OPTOUT_FIXED",
|
||||
),
|
||||
}),
|
||||
since: "driver-sync-next",
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
const scenario = MATRIX_QA_SCENARIOS.find(
|
||||
(entry) => entry.id === "matrix-room-tool-progress-preview-opt-out",
|
||||
);
|
||||
expect(scenario).toBeDefined();
|
||||
|
||||
await expect(runMatrixQaScenario(scenario!, matrixQaScenarioContext())).resolves.toMatchObject({
|
||||
artifacts: {
|
||||
driverEventId: "$tool-progress-optout-trigger",
|
||||
reply: {
|
||||
eventId: "$tool-progress-optout-final",
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
expect(waitForRoomEvent).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it("finalizes Matrix tool progress previews after tool errors", async () => {
|
||||
const previewEventId = "$tool-progress-error-preview";
|
||||
const { sendTextMessage } = mockMatrixQaRoomClient({
|
||||
driverEventId: "$tool-progress-error-trigger",
|
||||
events: [
|
||||
{
|
||||
event: matrixQaMessageEvent({
|
||||
kind: "notice",
|
||||
eventId: previewEventId,
|
||||
body: "Working...\n- `read from missing-matrix-tool-progress-target.txt`",
|
||||
}),
|
||||
since: "driver-sync-preview",
|
||||
},
|
||||
{
|
||||
event: ({ sendTextMessage }) =>
|
||||
matrixQaMessageEvent({
|
||||
kind: "notice",
|
||||
eventId: "$tool-progress-error-final",
|
||||
body: readMatrixQaReplyDirective(
|
||||
sendTextMessage.mock.calls[0]?.[0]?.body,
|
||||
"MATRIX_QA_TOOL_PROGRESS_ERROR_FIXED",
|
||||
),
|
||||
relatesTo: {
|
||||
relType: "m.replace",
|
||||
eventId: previewEventId,
|
||||
},
|
||||
}),
|
||||
since: "driver-sync-next",
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
const scenario = MATRIX_QA_SCENARIOS.find(
|
||||
(entry) => entry.id === "matrix-room-tool-progress-error",
|
||||
);
|
||||
expect(scenario).toBeDefined();
|
||||
|
||||
await expect(runMatrixQaScenario(scenario!, matrixQaScenarioContext())).resolves.toMatchObject({
|
||||
artifacts: {
|
||||
driverEventId: "$tool-progress-error-trigger",
|
||||
previewBodyPreview: "Working...\n- `read from missing-matrix-tool-progress-target.txt`",
|
||||
previewEventId: "$tool-progress-error-preview",
|
||||
reply: {
|
||||
eventId: "$tool-progress-error-final",
|
||||
relatesTo: {
|
||||
eventId: "$tool-progress-error-preview",
|
||||
relType: "m.replace",
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
expect(sendTextMessage).toHaveBeenCalledWith({
|
||||
body: expect.stringContaining("Tool progress error QA check"),
|
||||
mentionUserIds: ["@sut:matrix-qa.test"],
|
||||
roomId: "!main:matrix-qa.test",
|
||||
});
|
||||
});
|
||||
|
||||
it("keeps Matrix-looking tool progress mentions inert in partial previews", async () => {
|
||||
const previewEventId = "$tool-progress-mention-preview";
|
||||
mockMatrixQaRoomClient({
|
||||
driverEventId: "$tool-progress-mention-trigger",
|
||||
events: [
|
||||
{
|
||||
event: matrixQaMessageEvent({
|
||||
kind: "message",
|
||||
eventId: previewEventId,
|
||||
body: "Working...\n- `tool: read`",
|
||||
}),
|
||||
since: "driver-sync-preview",
|
||||
},
|
||||
{
|
||||
event: matrixQaMessageEvent({
|
||||
kind: "message",
|
||||
eventId: "$tool-progress-mention-edit",
|
||||
body: "Working...\n- `tool: read`\n- `read from matrix-progress-@room-@alice:matrix-qa.test-!room:matrix-qa.test.txt`",
|
||||
formattedBody:
|
||||
"Working...<br><ul><li><code>read from matrix-progress-@room-@alice:matrix-qa.test-!room:matrix-qa.test.txt</code></li></ul>",
|
||||
mentions: {},
|
||||
relatesTo: {
|
||||
relType: "m.replace",
|
||||
eventId: previewEventId,
|
||||
},
|
||||
}),
|
||||
since: "driver-sync-progress",
|
||||
},
|
||||
{
|
||||
event: ({ sendTextMessage }) =>
|
||||
matrixQaMessageEvent({
|
||||
kind: "message",
|
||||
eventId: "$tool-progress-mention-final",
|
||||
body: readMatrixQaReplyDirective(
|
||||
sendTextMessage.mock.calls[0]?.[0]?.body,
|
||||
"MATRIX_QA_TOOL_PROGRESS_MENTION_SAFE_FIXED",
|
||||
),
|
||||
relatesTo: {
|
||||
relType: "m.replace",
|
||||
eventId: previewEventId,
|
||||
},
|
||||
}),
|
||||
since: "driver-sync-next",
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
const scenario = MATRIX_QA_SCENARIOS.find(
|
||||
(entry) => entry.id === "matrix-room-tool-progress-mention-safety",
|
||||
);
|
||||
expect(scenario).toBeDefined();
|
||||
|
||||
await expect(runMatrixQaScenario(scenario!, matrixQaScenarioContext())).resolves.toMatchObject({
|
||||
artifacts: {
|
||||
driverEventId: "$tool-progress-mention-trigger",
|
||||
previewEventId: "$tool-progress-mention-preview",
|
||||
previewMentions: {},
|
||||
reply: {
|
||||
eventId: "$tool-progress-mention-final",
|
||||
},
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it("preserves separate finalized block events when Matrix block streaming is enabled", async () => {
|
||||
const primeRoom = vi.fn().mockResolvedValue("driver-sync-start");
|
||||
const sendTextMessage = vi.fn().mockResolvedValue("$block-stream-trigger");
|
||||
|
||||
@@ -254,11 +254,39 @@ describe("matrix qa config", () => {
|
||||
},
|
||||
replyToMode: "off",
|
||||
streaming: "partial",
|
||||
streamingPreviewToolProgress: true,
|
||||
threadBindings: {},
|
||||
threadReplies: "inbound",
|
||||
});
|
||||
expect(summarizeMatrixQaConfigSnapshot(snapshot)).toContain("autoJoin=allowlist");
|
||||
expect(summarizeMatrixQaConfigSnapshot(snapshot)).toContain("streaming=partial");
|
||||
expect(summarizeMatrixQaConfigSnapshot(snapshot)).toContain(
|
||||
"streaming.preview.toolProgress=true",
|
||||
);
|
||||
});
|
||||
|
||||
it("builds Matrix QA config snapshots from structured streaming overrides", () => {
|
||||
const snapshot = buildMatrixQaConfigSnapshot({
|
||||
driverUserId: "@driver:matrix-qa.test",
|
||||
observerUserId: "@observer:matrix-qa.test",
|
||||
overrides: {
|
||||
streaming: {
|
||||
mode: "quiet",
|
||||
preview: {
|
||||
toolProgress: false,
|
||||
},
|
||||
},
|
||||
},
|
||||
sutUserId: "@sut:matrix-qa.test",
|
||||
topology,
|
||||
});
|
||||
|
||||
expect(snapshot.streaming).toBe("quiet");
|
||||
expect(snapshot.streamingPreviewToolProgress).toBe(false);
|
||||
expect(summarizeMatrixQaConfigSnapshot(snapshot)).toContain("streaming=quiet");
|
||||
expect(summarizeMatrixQaConfigSnapshot(snapshot)).toContain(
|
||||
"streaming.preview.toolProgress=false",
|
||||
);
|
||||
});
|
||||
|
||||
it("resolves role-based Matrix sender allowlist overrides", () => {
|
||||
|
||||
@@ -9,6 +9,13 @@ export type MatrixQaAutoJoinMode = "allowlist" | "always" | "off";
|
||||
export type MatrixQaStreamingMode = "off" | "partial" | "quiet";
|
||||
export type MatrixQaActorRole = "driver" | "observer" | "sut";
|
||||
|
||||
export type MatrixQaStreamingConfig = {
|
||||
mode?: MatrixQaStreamingMode;
|
||||
preview?: {
|
||||
toolProgress?: boolean;
|
||||
};
|
||||
};
|
||||
|
||||
export type MatrixQaAgentDefaultsOverrides = {
|
||||
blockStreamingChunk?: {
|
||||
breakPreference?: "newline" | "paragraph" | "sentence";
|
||||
@@ -62,7 +69,7 @@ export type MatrixQaConfigOverrides = {
|
||||
groupsByKey?: Record<string, MatrixQaGroupConfigOverrides>;
|
||||
replyToMode?: MatrixQaReplyToMode;
|
||||
startupVerification?: "if-unverified" | "off";
|
||||
streaming?: "off" | "partial" | "quiet" | boolean;
|
||||
streaming?: MatrixQaStreamingMode | MatrixQaStreamingConfig | boolean;
|
||||
threadBindings?: MatrixQaThreadBindingsConfigOverrides;
|
||||
threadReplies?: MatrixQaThreadRepliesMode;
|
||||
toolProfile?: "coding" | "messaging" | "minimal";
|
||||
@@ -86,6 +93,7 @@ export type MatrixQaConfigSnapshot = {
|
||||
replyToMode: MatrixQaReplyToMode;
|
||||
startupVerification?: "if-unverified" | "off";
|
||||
streaming: MatrixQaStreamingMode;
|
||||
streamingPreviewToolProgress: boolean;
|
||||
threadBindings: MatrixQaThreadBindingsConfigOverrides;
|
||||
threadReplies: MatrixQaThreadRepliesMode;
|
||||
};
|
||||
@@ -205,9 +213,29 @@ function resolveMatrixQaStreamingMode(
|
||||
if (value === "quiet") {
|
||||
return "quiet";
|
||||
}
|
||||
if (isMatrixQaStreamingConfig(value)) {
|
||||
if (value.mode === "partial" || value.mode === "quiet") {
|
||||
return value.mode;
|
||||
}
|
||||
}
|
||||
return "off";
|
||||
}
|
||||
|
||||
function isMatrixQaStreamingConfig(
|
||||
value: MatrixQaConfigOverrides["streaming"],
|
||||
): value is MatrixQaStreamingConfig {
|
||||
return Boolean(value && typeof value === "object" && !Array.isArray(value));
|
||||
}
|
||||
|
||||
function resolveMatrixQaStreamingPreviewToolProgress(
|
||||
value: MatrixQaConfigOverrides["streaming"],
|
||||
): boolean {
|
||||
if (!isMatrixQaStreamingConfig(value)) {
|
||||
return true;
|
||||
}
|
||||
return value.preview?.toolProgress ?? true;
|
||||
}
|
||||
|
||||
function resolveMatrixQaAutoJoinAllowlist(params: { overrides?: MatrixQaConfigOverrides }) {
|
||||
if (params.overrides?.autoJoin !== "allowlist") {
|
||||
return [];
|
||||
@@ -352,6 +380,9 @@ export function buildMatrixQaConfigSnapshot(params: {
|
||||
replyToMode: params.overrides?.replyToMode ?? "off",
|
||||
startupVerification: params.overrides?.startupVerification,
|
||||
streaming: resolveMatrixQaStreamingMode(params.overrides?.streaming),
|
||||
streamingPreviewToolProgress: resolveMatrixQaStreamingPreviewToolProgress(
|
||||
params.overrides?.streaming,
|
||||
),
|
||||
threadBindings: { ...params.overrides?.threadBindings },
|
||||
threadReplies: params.overrides?.threadReplies ?? "inbound",
|
||||
};
|
||||
@@ -366,6 +397,7 @@ export function summarizeMatrixQaConfigSnapshot(snapshot: MatrixQaConfigSnapshot
|
||||
`dm.sessionScope=${snapshot.dm.sessionScope}`,
|
||||
`dm.threadReplies=${snapshot.dm.threadReplies}`,
|
||||
`streaming=${snapshot.streaming}`,
|
||||
`streaming.preview.toolProgress=${formatMatrixQaBoolean(snapshot.streamingPreviewToolProgress)}`,
|
||||
`blockStreaming=${formatMatrixQaBoolean(snapshot.blockStreaming)}`,
|
||||
`autoJoin=${snapshot.autoJoin}`,
|
||||
`encryption=${formatMatrixQaBoolean(snapshot.encryption)}`,
|
||||
|
||||
Reference in New Issue
Block a user