fix(qa): make matrix block streaming deterministic

This commit is contained in:
Vincent Koc
2026-05-27 20:01:08 +02:00
parent 9755241b56
commit fdbf3cf4e7
6 changed files with 148 additions and 59 deletions

View File

@@ -7,9 +7,6 @@
"url": "https://github.com/openclaw/openclaw"
},
"type": "module",
"scripts": {
"build:viewer": "node scripts/build-viewer.mjs"
},
"devDependencies": {
"@openclaw/plugin-sdk": "workspace:*"
},
@@ -28,7 +25,7 @@
"pluginApi": ">=2026.5.27"
},
"assetScripts": {
"build": "pnpm build:viewer"
"build": "node ../../scripts/build-diffs-viewer-runtime.mjs full"
},
"build": {
"openclawVersion": "2026.5.27",

View File

@@ -1,5 +0,0 @@
#!/usr/bin/env node
import { buildDiffsViewerRuntime } from "../../../scripts/build-diffs-viewer-runtime.mjs";
await buildDiffsViewerRuntime("full");

View File

@@ -7,9 +7,6 @@
"url": "https://github.com/openclaw/openclaw"
},
"type": "module",
"scripts": {
"build:viewer": "node scripts/build-viewer.mjs"
},
"dependencies": {
"@pierre/diffs": "1.2.2",
"@pierre/theme": "1.0.3",
@@ -35,7 +32,7 @@
"pluginApi": ">=2026.5.27"
},
"assetScripts": {
"build": "pnpm build:viewer"
"build": "node ../../scripts/build-diffs-viewer-runtime.mjs curated"
},
"build": {
"openclawVersion": "2026.5.27",

View File

@@ -1,5 +0,0 @@
#!/usr/bin/env node
import { buildDiffsViewerRuntime } from "../../../scripts/build-diffs-viewer-runtime.mjs";
await buildDiffsViewerRuntime("curated");

View File

@@ -314,6 +314,14 @@ describe("qa mock openai server", () => {
expect(telegramThreeChunkLongBody).toContain("TELEGRAM-LONG-FINAL-3CHUNK-END");
expect(telegramThreeChunkLongBody.length).toBeGreaterThan(8_000);
const blockPrompt = [
"Block streaming QA check: complete this whole sequence in one turn.",
"Step 1: send an assistant text block containing only this exact marker: `BLOCK_ONE_OK`.",
"That first marker block must be emitted before any tool call.",
"Step 2: after the first marker block, use the read tool exactly once on `QA_KICKOFF_TASK.md`.",
"Step 3: after that read completes, send a final assistant text block containing only this exact marker: `BLOCK_TWO_OK`.",
"Never put both markers in the same assistant text block.",
].join("\n");
const blockResponse = await fetch(`${server.baseUrl}/v1/responses`, {
method: "POST",
headers: {
@@ -321,26 +329,39 @@ describe("qa mock openai server", () => {
},
body: JSON.stringify({
stream: true,
input: [
makeUserInput(
[
"Block streaming QA check: complete this whole sequence in one turn.",
"Step 1: send an assistant text block containing only this exact marker: `BLOCK_ONE_OK`.",
"That first marker block must be emitted before any tool call.",
"Step 2: after the first marker block, use the read tool exactly once on `QA_KICKOFF_TASK.md`.",
"Step 3: after that read completes, send a final assistant text block containing only this exact marker: `BLOCK_TWO_OK`.",
"Never put both markers in the same assistant text block.",
].join("\n"),
),
],
input: [makeUserInput(blockPrompt)],
}),
});
expect(blockResponse.status).toBe(200);
const blockBody = await blockResponse.text();
expect(blockBody).toContain('"item_id":"msg_mock_block_1"');
expect(blockBody).toContain('"item_id":"msg_mock_block_2"');
expect(blockBody).toContain('"name":"read"');
expect(blockBody).toContain("QA_KICKOFF_TASK.md");
expect(blockBody).toContain("BLOCK_ONE_OK");
expect(blockBody).toContain("BLOCK_TWO_OK");
expect(blockBody).not.toContain('"item_id":"msg_mock_block_2"');
const blockContinuation = await fetch(`${server.baseUrl}/v1/responses`, {
method: "POST",
headers: {
"content-type": "application/json",
},
body: JSON.stringify({
stream: true,
input: [
makeUserInput(blockPrompt),
{
type: "function_call_output",
call_id: "call_mock_read_fixture",
output: "QA kickoff task read",
},
],
}),
});
expect(blockContinuation.status).toBe(200);
const blockContinuationBody = await blockContinuation.text();
expect(blockContinuationBody).toContain('"item_id":"msg_mock_block_2"');
expect(blockContinuationBody).toContain("BLOCK_TWO_OK");
expect(blockContinuationBody).not.toContain('"item_id":"msg_mock_block_1"');
});
it("plans deterministic tool-progress reads from prompt paths", async () => {

View File

@@ -643,7 +643,7 @@ function execCommandFromToolProgressPrompt(prompt: string) {
);
}
function buildToolCallEventsWithArgs(name: string, args: Record<string, unknown>): StreamEvent[] {
function buildMockFunctionCall(name: string, args: Record<string, unknown>) {
const serialized = JSON.stringify(args);
const callSuffix = createHash("sha1")
.update(name)
@@ -653,42 +653,46 @@ function buildToolCallEventsWithArgs(name: string, args: Record<string, unknown>
.slice(0, 10);
const callId = `call_mock_${name}_${callSuffix}`;
const itemId = `fc_mock_${name}_${callSuffix}`;
const item = {
type: "function_call",
id: itemId,
call_id: callId,
name,
arguments: serialized,
};
return {
callId,
item,
itemId,
responseId: `resp_mock_${name}_${callSuffix}`,
serialized,
};
}
function buildToolCallEventsWithArgs(name: string, args: Record<string, unknown>): StreamEvent[] {
const call = buildMockFunctionCall(name, args);
return [
{
type: "response.output_item.added",
item: {
type: "function_call",
id: itemId,
call_id: callId,
id: call.itemId,
call_id: call.callId,
name,
arguments: "",
},
},
{ type: "response.function_call_arguments.delta", delta: serialized },
{ type: "response.function_call_arguments.delta", delta: call.serialized },
{
type: "response.output_item.done",
item: {
type: "function_call",
id: itemId,
call_id: callId,
name,
arguments: serialized,
},
item: call.item,
},
{
type: "response.completed",
response: {
id: `resp_mock_${name}_${callSuffix}`,
id: call.responseId,
status: "completed",
output: [
{
type: "function_call",
id: itemId,
call_id: callId,
name,
arguments: serialized,
},
],
output: [call.item],
usage: { input_tokens: 64, output_tokens: 16, total_tokens: 80 },
},
},
@@ -1449,6 +1453,78 @@ function buildAssistantOutputItem(spec: MockAssistantMessageSpec) {
} as const;
}
function appendAssistantMessageEvents(events: StreamEvent[], spec: MockAssistantMessageSpec) {
events.push({
type: "response.output_item.added",
item: {
type: "message",
id: spec.id,
role: "assistant",
...(spec.phase ? { phase: spec.phase } : {}),
content: [],
status: "in_progress",
},
});
for (const delta of spec.streamDeltas ?? []) {
events.push({
type: "response.output_text.delta",
item_id: spec.id,
output_index: 0,
content_index: 0,
delta,
});
}
if ((spec.streamDeltas ?? []).length > 0) {
events.push({
type: "response.output_text.done",
item_id: spec.id,
output_index: 0,
content_index: 0,
text: spec.text,
});
}
events.push({
type: "response.output_item.done",
item: buildAssistantOutputItem(spec),
});
}
function buildAssistantThenToolCallEvents(
spec: MockAssistantMessageSpec,
name: string,
args: Record<string, unknown>,
): StreamEvent[] {
const call = buildMockFunctionCall(name, args);
const message = buildAssistantOutputItem(spec);
const events: StreamEvent[] = [];
appendAssistantMessageEvents(events, spec);
events.push({
type: "response.output_item.added",
item: {
type: "function_call",
id: call.itemId,
call_id: call.callId,
name,
arguments: "",
},
});
events.push({ type: "response.function_call_arguments.delta", delta: call.serialized });
events.push({
type: "response.output_item.done",
item: call.item,
});
events.push({
type: "response.completed",
response: {
id: call.responseId,
status: "completed",
output: [message, call.item],
usage: { input_tokens: 64, output_tokens: 32, total_tokens: 96 },
},
});
return events;
}
function buildAssistantEvents(specsOrText: MockAssistantMessageSpec[] | string): StreamEvent[] {
const specs =
typeof specsOrText === "string"
@@ -1861,13 +1937,21 @@ async function buildResponsesPayload(
return buildAssistantEvents(toolProgressReplyDirective);
}
if (QA_BLOCK_STREAMING_PROMPT_RE.test(allInputText) && blockStreamingMarkers) {
if (!toolOutput) {
return buildAssistantThenToolCallEvents(
{
id: "msg_mock_block_1",
phase: "final_answer",
streamDeltas: splitMockStreamingText(blockStreamingMarkers.first),
text: blockStreamingMarkers.first,
},
"read",
{
path: readTargetFromPrompt(blockStreamingPrompt),
},
);
}
return buildAssistantEvents([
{
id: "msg_mock_block_1",
phase: "final_answer",
streamDeltas: splitMockStreamingText(blockStreamingMarkers.first),
text: blockStreamingMarkers.first,
},
{
id: "msg_mock_block_2",
phase: "final_answer",