mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 06:30:42 +00:00
QA: genericize mock streaming fixtures
This commit is contained in:
@@ -99,6 +99,47 @@ describe("qa mock openai server", () => {
|
||||
expect(body).toContain('"name":"read"');
|
||||
});
|
||||
|
||||
it("emits deterministic text deltas for generic streaming QA prompts", async () => {
|
||||
const server = await startMockServer();
|
||||
|
||||
const quietResponse = await fetch(`${server.baseUrl}/v1/responses`, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"content-type": "application/json",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
stream: true,
|
||||
input: [makeUserInput("Quiet streaming QA check: reply exactly `MATRIX_QA_STREAMING_OK`.")],
|
||||
}),
|
||||
});
|
||||
expect(quietResponse.status).toBe(200);
|
||||
const quietBody = await quietResponse.text();
|
||||
expect(quietBody).toContain('"type":"response.output_text.delta"');
|
||||
expect(quietBody).toContain('"phase":"final_answer"');
|
||||
expect(quietBody).toContain("MATRIX_QA_STREAMING_OK");
|
||||
|
||||
const blockResponse = await fetch(`${server.baseUrl}/v1/responses`, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"content-type": "application/json",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
stream: true,
|
||||
input: [
|
||||
makeUserInput(
|
||||
"Block streaming QA check: emit exactly two assistant message blocks in order. First exact marker: `BLOCK_ONE_OK`. Second exact marker: `BLOCK_TWO_OK`.",
|
||||
),
|
||||
],
|
||||
}),
|
||||
});
|
||||
expect(blockResponse.status).toBe(200);
|
||||
const blockBody = await blockResponse.text();
|
||||
expect(blockBody).toContain('"item_id":"msg_mock_block_1"');
|
||||
expect(blockBody).toContain('"item_id":"msg_mock_block_2"');
|
||||
expect(blockBody).toContain("BLOCK_ONE_OK");
|
||||
expect(blockBody).toContain("BLOCK_TWO_OK");
|
||||
});
|
||||
|
||||
it("prefers path-like refs over generic quoted keys in prompts", async () => {
|
||||
const server = await startQaMockOpenAiServer({
|
||||
host: "127.0.0.1",
|
||||
|
||||
@@ -6,6 +6,20 @@ type ResponsesInputItem = Record<string, unknown>;
|
||||
|
||||
type StreamEvent =
|
||||
| { type: "response.output_item.added"; item: Record<string, unknown> }
|
||||
| {
|
||||
type: "response.output_text.delta";
|
||||
item_id: string;
|
||||
output_index: number;
|
||||
content_index: number;
|
||||
delta: string;
|
||||
}
|
||||
| {
|
||||
type: "response.output_text.done";
|
||||
item_id: string;
|
||||
output_index: number;
|
||||
content_index: number;
|
||||
text: string;
|
||||
}
|
||||
| { type: "response.function_call_arguments.delta"; delta: string }
|
||||
| { type: "response.output_item.done"; item: Record<string, unknown> }
|
||||
| {
|
||||
@@ -128,6 +142,8 @@ const QA_REASONING_ONLY_RECOVERY_PROMPT_RE = /reasoning-only continuation qa che
|
||||
const QA_REASONING_ONLY_SIDE_EFFECT_PROMPT_RE = /reasoning-only after write safety check/i;
|
||||
const QA_EMPTY_RESPONSE_RECOVERY_PROMPT_RE = /empty response continuation qa check/i;
|
||||
const QA_EMPTY_RESPONSE_EXHAUSTION_PROMPT_RE = /empty response exhaustion qa check/i;
|
||||
const QA_QUIET_STREAMING_PROMPT_RE = /(?:matrix\s+)?quiet streaming qa check/i;
|
||||
const QA_BLOCK_STREAMING_PROMPT_RE = /(?:matrix\s+)?block streaming qa check/i;
|
||||
const QA_REASONING_ONLY_RETRY_NEEDLE =
|
||||
"recorded reasoning but did not produce a user-visible answer";
|
||||
const QA_EMPTY_RESPONSE_RETRY_NEEDLE =
|
||||
@@ -507,6 +523,21 @@ function extractExactMarkerDirective(text: string) {
|
||||
return extractLastCapture(text, /exact marker:\s*([^\s`.,;:!?]+(?:-[^\s`.,;:!?]+)*)/i);
|
||||
}
|
||||
|
||||
function extractLabeledMarkerDirective(text: string, label: string) {
|
||||
const escapedLabel = label.replaceAll(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
||||
const backtickedMatch = extractLastCapture(
|
||||
text,
|
||||
new RegExp(`${escapedLabel}:\\s*\`([^\\\`]+)\``, "i"),
|
||||
);
|
||||
if (backtickedMatch) {
|
||||
return backtickedMatch;
|
||||
}
|
||||
return extractLastCapture(
|
||||
text,
|
||||
new RegExp(`${escapedLabel}:\\s*([^\\s\\\`.,;:!?]+(?:-[^\\s\\\`.,;:!?]+)*)`, "i"),
|
||||
);
|
||||
}
|
||||
|
||||
function isHeartbeatPrompt(text: string) {
|
||||
const trimmed = text.trim();
|
||||
if (!trimmed || /remember this fact/i.test(trimmed)) {
|
||||
@@ -691,39 +722,95 @@ function extractPlannedToolName(events: StreamEvent[]) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function buildAssistantEvents(text: string): StreamEvent[] {
|
||||
const outputItem = {
|
||||
type MockAssistantMessageSpec = {
|
||||
id: string;
|
||||
phase?: "commentary" | "final_answer";
|
||||
streamDeltas?: string[];
|
||||
text: string;
|
||||
};
|
||||
|
||||
function splitMockStreamingText(text: string, parts = 3) {
|
||||
if (text.length <= 1) {
|
||||
return [text];
|
||||
}
|
||||
const chunkSize = Math.max(1, Math.ceil(text.length / parts));
|
||||
const chunks: string[] = [];
|
||||
for (let index = 0; index < text.length; index += chunkSize) {
|
||||
chunks.push(text.slice(index, index + chunkSize));
|
||||
}
|
||||
return chunks.length > 1 ? chunks : [text.slice(0, 1), text.slice(1)];
|
||||
}
|
||||
|
||||
function buildAssistantOutputItem(spec: MockAssistantMessageSpec) {
|
||||
return {
|
||||
type: "message",
|
||||
id: "msg_mock_1",
|
||||
id: spec.id,
|
||||
role: "assistant",
|
||||
status: "completed",
|
||||
content: [{ type: "output_text", text, annotations: [] }],
|
||||
...(spec.phase ? { phase: spec.phase } : {}),
|
||||
content: [{ type: "output_text", text: spec.text, annotations: [] }],
|
||||
} as const;
|
||||
return [
|
||||
{
|
||||
}
|
||||
|
||||
function buildAssistantEvents(specsOrText: MockAssistantMessageSpec[] | string): StreamEvent[] {
|
||||
const specs =
|
||||
typeof specsOrText === "string"
|
||||
? [
|
||||
{
|
||||
id: "msg_mock_1",
|
||||
text: specsOrText,
|
||||
},
|
||||
]
|
||||
: specsOrText;
|
||||
const output = specs.map((spec) => buildAssistantOutputItem(spec));
|
||||
const events: StreamEvent[] = [];
|
||||
|
||||
for (const [outputIndex, spec] of specs.entries()) {
|
||||
events.push({
|
||||
type: "response.output_item.added",
|
||||
item: {
|
||||
type: "message",
|
||||
id: "msg_mock_1",
|
||||
id: spec.id,
|
||||
role: "assistant",
|
||||
...(spec.phase ? { phase: spec.phase } : {}),
|
||||
content: [],
|
||||
status: "in_progress",
|
||||
},
|
||||
},
|
||||
{
|
||||
});
|
||||
for (const delta of spec.streamDeltas ?? []) {
|
||||
events.push({
|
||||
type: "response.output_text.delta",
|
||||
item_id: spec.id,
|
||||
output_index: outputIndex,
|
||||
content_index: 0,
|
||||
delta,
|
||||
});
|
||||
}
|
||||
if ((spec.streamDeltas ?? []).length > 0) {
|
||||
events.push({
|
||||
type: "response.output_text.done",
|
||||
item_id: spec.id,
|
||||
output_index: outputIndex,
|
||||
content_index: 0,
|
||||
text: spec.text,
|
||||
});
|
||||
}
|
||||
events.push({
|
||||
type: "response.output_item.done",
|
||||
item: outputItem,
|
||||
item: output[outputIndex],
|
||||
});
|
||||
}
|
||||
|
||||
events.push({
|
||||
type: "response.completed",
|
||||
response: {
|
||||
id: "resp_mock_msg_1",
|
||||
status: "completed",
|
||||
output,
|
||||
usage: { input_tokens: 64, output_tokens: 24, total_tokens: 88 },
|
||||
},
|
||||
{
|
||||
type: "response.completed",
|
||||
response: {
|
||||
id: "resp_mock_msg_1",
|
||||
status: "completed",
|
||||
output: [outputItem],
|
||||
usage: { input_tokens: 64, output_tokens: 24, total_tokens: 88 },
|
||||
},
|
||||
},
|
||||
];
|
||||
});
|
||||
return events;
|
||||
}
|
||||
|
||||
function buildReasoningOnlyEvents(summaryText: string, id: string): StreamEvent[] {
|
||||
@@ -766,6 +853,16 @@ async function buildResponsesPayload(
|
||||
const toolOutput = extractToolOutput(input);
|
||||
const toolJson = parseToolOutputJson(toolOutput);
|
||||
const allInputText = extractAllRequestTexts(input, body);
|
||||
const exactReplyDirective =
|
||||
extractExactReplyDirective(prompt) ?? extractExactReplyDirective(allInputText);
|
||||
const firstExactMarkerDirective = extractLabeledMarkerDirective(
|
||||
allInputText,
|
||||
"first exact marker",
|
||||
);
|
||||
const secondExactMarkerDirective = extractLabeledMarkerDirective(
|
||||
allInputText,
|
||||
"second exact marker",
|
||||
);
|
||||
const isGroupChat = allInputText.includes('"is_group_chat": true');
|
||||
const isBaselineUnmentionedChannelChatter = /\bno bot ping here\b/i.test(prompt);
|
||||
const hasReasoningOnlyRetryInstruction = allInputText.includes(QA_REASONING_ONLY_RETRY_NEEDLE);
|
||||
@@ -818,6 +915,36 @@ async function buildResponsesPayload(
|
||||
}
|
||||
return buildAssistantEvents("");
|
||||
}
|
||||
if (QA_QUIET_STREAMING_PROMPT_RE.test(allInputText) && exactReplyDirective) {
|
||||
return buildAssistantEvents([
|
||||
{
|
||||
id: "msg_mock_quiet_stream",
|
||||
phase: "final_answer",
|
||||
streamDeltas: splitMockStreamingText(exactReplyDirective),
|
||||
text: exactReplyDirective,
|
||||
},
|
||||
]);
|
||||
}
|
||||
if (
|
||||
QA_BLOCK_STREAMING_PROMPT_RE.test(allInputText) &&
|
||||
firstExactMarkerDirective &&
|
||||
secondExactMarkerDirective
|
||||
) {
|
||||
return buildAssistantEvents([
|
||||
{
|
||||
id: "msg_mock_block_1",
|
||||
phase: "final_answer",
|
||||
streamDeltas: splitMockStreamingText(firstExactMarkerDirective),
|
||||
text: firstExactMarkerDirective,
|
||||
},
|
||||
{
|
||||
id: "msg_mock_block_2",
|
||||
phase: "final_answer",
|
||||
streamDeltas: splitMockStreamingText(secondExactMarkerDirective),
|
||||
text: secondExactMarkerDirective,
|
||||
},
|
||||
]);
|
||||
}
|
||||
if (/lobster invaders/i.test(prompt)) {
|
||||
if (!toolOutput) {
|
||||
return buildToolCallEventsWithArgs("read", { path: "QA_KICKOFF_TASK.md" });
|
||||
|
||||
Reference in New Issue
Block a user