mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 05:20:43 +00:00
fix(qa): align mock tool progress markers
This commit is contained in:
@@ -267,6 +267,43 @@ describe("qa mock openai server", () => {
|
||||
expect(body).toContain("qa-progress-target.txt");
|
||||
});
|
||||
|
||||
it("plans deterministic tool-progress reads for exact-marker prompts", async () => {
|
||||
const server = await startMockServer();
|
||||
const prompt =
|
||||
"Tool progress QA check: use the read tool exactly once on `QA_KICKOFF_TASK.md` before answering. After that read completes, reply with only this exact marker and no other text: `TOOL_PROGRESS_MARKER_OK`.";
|
||||
|
||||
const toolPlan = await fetch(`${server.baseUrl}/v1/responses`, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"content-type": "application/json",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
stream: true,
|
||||
input: [makeUserInput(prompt)],
|
||||
}),
|
||||
});
|
||||
|
||||
expect(toolPlan.status).toBe(200);
|
||||
const toolPlanBody = await toolPlan.text();
|
||||
expect(toolPlanBody).toContain('"name":"read"');
|
||||
expect(toolPlanBody).toContain("QA_KICKOFF_TASK.md");
|
||||
|
||||
const final = await expectResponsesJson<{
|
||||
output: Array<{ content?: Array<{ text?: string }> }>;
|
||||
}>(server, {
|
||||
stream: false,
|
||||
input: [
|
||||
makeUserInput(prompt),
|
||||
{
|
||||
type: "function_call_output",
|
||||
call_id: "call_mock_read_1",
|
||||
output: JSON.stringify({ text: "kickoff task" }),
|
||||
},
|
||||
],
|
||||
});
|
||||
expect(final.output[0]?.content?.[0]?.text).toBe("TOOL_PROGRESS_MARKER_OK");
|
||||
});
|
||||
|
||||
it("requires deterministic tool-progress error prompts to observe a failed tool", async () => {
|
||||
const server = await startMockServer();
|
||||
const prompt =
|
||||
|
||||
@@ -562,11 +562,14 @@ function extractFinishExactlyDirective(text: string) {
|
||||
}
|
||||
|
||||
function extractExactMarkerDirective(text: string) {
|
||||
const backtickedMatch = extractLastCapture(text, /exact marker:\s*`([^`]+)`/i);
|
||||
const backtickedMatch = extractLastCapture(text, /exact marker\b[^:\n]{0,120}:\s*`([^`]+)`/i);
|
||||
if (backtickedMatch) {
|
||||
return backtickedMatch;
|
||||
}
|
||||
return extractLastCapture(text, /exact marker:\s*([^\s`.,;:!?]+(?:-[^\s`.,;:!?]+)*)/i);
|
||||
return extractLastCapture(
|
||||
text,
|
||||
/exact marker\b[^:\n]{0,120}:\s*([^\s`.,;:!?]+(?:-[^\s`.,;:!?]+)*)/i,
|
||||
);
|
||||
}
|
||||
|
||||
function extractLabeledMarkerDirective(text: string, label: string) {
|
||||
@@ -1294,19 +1297,22 @@ async function buildResponsesPayload(
|
||||
},
|
||||
]);
|
||||
}
|
||||
if (QA_TOOL_PROGRESS_ERROR_PROMPT_RE.test(allInputText) && exactReplyDirective) {
|
||||
const toolProgressReplyDirective = exactReplyDirective ?? exactMarkerDirective;
|
||||
if (QA_TOOL_PROGRESS_ERROR_PROMPT_RE.test(allInputText) && toolProgressReplyDirective) {
|
||||
if (!toolOutput) {
|
||||
return buildToolProgressReadEvents(QA_TOOL_PROGRESS_ERROR_PROMPT_RE);
|
||||
}
|
||||
return buildAssistantEvents(
|
||||
hasToolErrorOutput(toolJson, toolOutput) ? exactReplyDirective : "BUG-TOOL-DID-NOT-FAIL",
|
||||
hasToolErrorOutput(toolJson, toolOutput)
|
||||
? toolProgressReplyDirective
|
||||
: "BUG-TOOL-DID-NOT-FAIL",
|
||||
);
|
||||
}
|
||||
if (QA_TOOL_PROGRESS_PROMPT_RE.test(allInputText) && exactReplyDirective) {
|
||||
if (QA_TOOL_PROGRESS_PROMPT_RE.test(allInputText) && toolProgressReplyDirective) {
|
||||
if (!toolOutput) {
|
||||
return buildToolProgressReadEvents(QA_TOOL_PROGRESS_PROMPT_RE);
|
||||
}
|
||||
return buildAssistantEvents(exactReplyDirective);
|
||||
return buildAssistantEvents(toolProgressReplyDirective);
|
||||
}
|
||||
if (
|
||||
QA_BLOCK_STREAMING_PROMPT_RE.test(allInputText) &&
|
||||
|
||||
Reference in New Issue
Block a user