diff --git a/CHANGELOG.md b/CHANGELOG.md index b34cf87224e..02bc7dd9dd4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,6 +34,7 @@ Docs: https://docs.openclaw.ai ### Fixes - Gateway/systemd: preserve operator-added secrets in the Gateway env file across re-stage while clearing OpenClaw-managed keys (such as `OPENCLAW_GATEWAY_TOKEN`) so a fresh staging value is never shadowed by a stale env-file copy; operator secrets are also retained when the state-dir `.env` is empty. Fixes #76860. Thanks @hclsys. +- QA/Matrix: keep the mock OpenAI tool-progress provider aligned with exact-marker Matrix prompts so the hardened live preview scenario still forces a deterministic read before final delivery. Thanks @vincentkoc. - OpenAI/Google Meet: wait for realtime voice `session.updated` before treating the bridge as connected, so Meet joins do not return with audio queued behind an unconfigured realtime session. Thanks @vincentkoc. - Plugins/catalog: merge official external catalog descriptors into partial package channel config metadata, so lagging WeCom/Yuanbao manifests keep their own schema while still exposing host-supplied labels and setup text. Thanks @vincentkoc. - Plugins/catalog: supplement lagging official external WeCom and Yuanbao npm manifests with channel config descriptors and declared tool contracts from the OpenClaw catalog, so trusted package sweeps no longer fail because external package metadata trails the host contract. Thanks @vincentkoc. diff --git a/extensions/qa-lab/src/providers/mock-openai/server.test.ts b/extensions/qa-lab/src/providers/mock-openai/server.test.ts index df3a36c4c4d..804ef592c5e 100644 --- a/extensions/qa-lab/src/providers/mock-openai/server.test.ts +++ b/extensions/qa-lab/src/providers/mock-openai/server.test.ts @@ -267,6 +267,43 @@ describe("qa mock openai server", () => { expect(body).toContain("qa-progress-target.txt"); }); + it("plans deterministic tool-progress reads for exact-marker prompts", async () => { + const server = await startMockServer(); + const prompt = + "Tool progress QA check: use the read tool exactly once on `QA_KICKOFF_TASK.md` before answering. After that read completes, reply with only this exact marker and no other text: `TOOL_PROGRESS_MARKER_OK`."; + + const toolPlan = await fetch(`${server.baseUrl}/v1/responses`, { + method: "POST", + headers: { + "content-type": "application/json", + }, + body: JSON.stringify({ + stream: true, + input: [makeUserInput(prompt)], + }), + }); + + expect(toolPlan.status).toBe(200); + const toolPlanBody = await toolPlan.text(); + expect(toolPlanBody).toContain('"name":"read"'); + expect(toolPlanBody).toContain("QA_KICKOFF_TASK.md"); + + const final = await expectResponsesJson<{ + output: Array<{ content?: Array<{ text?: string }> }>; + }>(server, { + stream: false, + input: [ + makeUserInput(prompt), + { + type: "function_call_output", + call_id: "call_mock_read_1", + output: JSON.stringify({ text: "kickoff task" }), + }, + ], + }); + expect(final.output[0]?.content?.[0]?.text).toBe("TOOL_PROGRESS_MARKER_OK"); + }); + it("requires deterministic tool-progress error prompts to observe a failed tool", async () => { const server = await startMockServer(); const prompt = diff --git a/extensions/qa-lab/src/providers/mock-openai/server.ts b/extensions/qa-lab/src/providers/mock-openai/server.ts index f082207c583..5d1b86e1c23 100644 --- a/extensions/qa-lab/src/providers/mock-openai/server.ts +++ b/extensions/qa-lab/src/providers/mock-openai/server.ts @@ -562,11 +562,14 @@ function extractFinishExactlyDirective(text: string) { } function extractExactMarkerDirective(text: string) { - const backtickedMatch = extractLastCapture(text, /exact marker:\s*`([^`]+)`/i); + const backtickedMatch = extractLastCapture(text, /exact marker\b[^:\n]{0,120}:\s*`([^`]+)`/i); if (backtickedMatch) { return backtickedMatch; } - return extractLastCapture(text, /exact marker:\s*([^\s`.,;:!?]+(?:-[^\s`.,;:!?]+)*)/i); + return extractLastCapture( + text, + /exact marker\b[^:\n]{0,120}:\s*([^\s`.,;:!?]+(?:-[^\s`.,;:!?]+)*)/i, + ); } function extractLabeledMarkerDirective(text: string, label: string) { @@ -1294,19 +1297,22 @@ async function buildResponsesPayload( }, ]); } - if (QA_TOOL_PROGRESS_ERROR_PROMPT_RE.test(allInputText) && exactReplyDirective) { + const toolProgressReplyDirective = exactReplyDirective ?? exactMarkerDirective; + if (QA_TOOL_PROGRESS_ERROR_PROMPT_RE.test(allInputText) && toolProgressReplyDirective) { if (!toolOutput) { return buildToolProgressReadEvents(QA_TOOL_PROGRESS_ERROR_PROMPT_RE); } return buildAssistantEvents( - hasToolErrorOutput(toolJson, toolOutput) ? exactReplyDirective : "BUG-TOOL-DID-NOT-FAIL", + hasToolErrorOutput(toolJson, toolOutput) + ? toolProgressReplyDirective + : "BUG-TOOL-DID-NOT-FAIL", ); } - if (QA_TOOL_PROGRESS_PROMPT_RE.test(allInputText) && exactReplyDirective) { + if (QA_TOOL_PROGRESS_PROMPT_RE.test(allInputText) && toolProgressReplyDirective) { if (!toolOutput) { return buildToolProgressReadEvents(QA_TOOL_PROGRESS_PROMPT_RE); } - return buildAssistantEvents(exactReplyDirective); + return buildAssistantEvents(toolProgressReplyDirective); } if ( QA_BLOCK_STREAMING_PROMPT_RE.test(allInputText) &&