From 4f7038ae33ae62e24952463a2394c8a6ce9de133 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Mon, 27 Apr 2026 11:48:51 +0100 Subject: [PATCH] fix(anthropic): drop prefill with thinking --- CHANGELOG.md | 1 + extensions/anthropic/stream-wrappers.test.ts | 50 ++++++++++++++++ extensions/anthropic/stream-wrappers.ts | 63 +++++++++++++++++++- 3 files changed, 113 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fa742119a64..7dc815cf6d5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,7 @@ Docs: https://docs.openclaw.ai - Agents/OpenAI-compatible: retry replay-safe empty `stop` turns once for `openai-completions` endpoints, so transient empty local backend responses no longer surface as “Agent couldn't generate a response” when a continuation succeeds, and restore `openclaw agent --model` for one-shot CLI runs. Fixes #72751. Thanks @moooV252. - Git hooks: skip ignored staged paths when formatting and restaging pre-commit files, so merge commits no longer abort when `.gitignore` newly ignores staged merged content. Fixes #72744. Thanks @100yenadmin. - Memory-core/dreaming: add a supported `dreaming.model` knob for Dream Diary narrative subagents, wired through phase config and the existing plugin subagent model-override trust gate. Refs #65963. Thanks @esqandil and @mjamiv. +- Agents/Anthropic: remove trailing assistant prefill payloads when extended thinking is enabled, so Opus 4.7/Sonnet 4.6 requests do not fail Anthropic's user-final-turn validation. Fixes #72739. Thanks @superandylin. - Agents/vLLM: honor `compat.thinkingFormat: "qwen-chat-template"` by sending Qwen chat-template thinking kwargs, including preserved thinking for agent loops, and support DashScope-style `qwen` top-level thinking flags. Fixes #72329. Thanks @stavrostzagadouris. - Memory-core/dreaming: treat request-scoped narrative fallback as expected, skip session cleanup when no subagent run was created, and remove duplicate phase-level cleanup so fallback no longer emits warning noise. Fixes #67152. Thanks @jsompis. - Agents/exec: apply configured `tools.exec.timeoutSec` to background, `yieldMs`, and node `system.run` commands when no per-call timeout is set, preventing auto-backgrounded and remote node commands from running indefinitely. Fixes #67600; supersedes #67603. Thanks @dlmpx and @kagura-agent. diff --git a/extensions/anthropic/stream-wrappers.test.ts b/extensions/anthropic/stream-wrappers.test.ts index 04481f2964e..84d91f49fbd 100644 --- a/extensions/anthropic/stream-wrappers.test.ts +++ b/extensions/anthropic/stream-wrappers.test.ts @@ -5,6 +5,7 @@ import { createAnthropicBetaHeadersWrapper, createAnthropicFastModeWrapper, createAnthropicServiceTierWrapper, + createAnthropicThinkingPrefillWrapper, wrapAnthropicProviderStream, } from "./stream-wrappers.js"; @@ -115,6 +116,55 @@ describe("anthropic stream wrappers", () => { }); }); +describe("createAnthropicThinkingPrefillWrapper", () => { + function runThinkingPrefillWrapper(payload: Record): Record { + const wrapper = createAnthropicThinkingPrefillWrapper(((_model, _context, options) => { + options?.onPayload?.(payload as never, {} as never); + return {} as never; + }) as StreamFn); + void wrapper({ provider: "anthropic", api: "anthropic-messages" } as never, {} as never, {}); + return payload; + } + + it("removes trailing assistant prefill when extended thinking is enabled", () => { + const warn = vi.spyOn(__testing.log, "warn").mockImplementation(() => undefined); + const payload = runThinkingPrefillWrapper({ + thinking: { type: "enabled", budget_tokens: 1024 }, + messages: [ + { role: "user", content: "Return JSON." }, + { role: "assistant", content: "{" }, + ], + }); + + expect(payload.messages).toEqual([{ role: "user", content: "Return JSON." }]); + expect(warn).toHaveBeenCalledOnce(); + }); + + it("keeps assistant prefill when thinking is disabled", () => { + const payload = runThinkingPrefillWrapper({ + thinking: { type: "disabled" }, + messages: [ + { role: "user", content: "Return JSON." }, + { role: "assistant", content: "{" }, + ], + }); + + expect(payload.messages).toHaveLength(2); + }); + + it("keeps trailing assistant tool use turns", () => { + const payload = runThinkingPrefillWrapper({ + thinking: { type: "adaptive" }, + messages: [ + { role: "user", content: "Read a file." }, + { role: "assistant", content: [{ type: "tool_use", id: "toolu_1", name: "Read" }] }, + ], + }); + + expect(payload.messages).toHaveLength(2); + }); +}); + describe("createAnthropicFastModeWrapper", () => { function runFastModeWrapper(params: { apiKey?: string; diff --git a/extensions/anthropic/stream-wrappers.ts b/extensions/anthropic/stream-wrappers.ts index 64c8e522a83..12f3bea335f 100644 --- a/extensions/anthropic/stream-wrappers.ts +++ b/extensions/anthropic/stream-wrappers.ts @@ -30,6 +30,51 @@ const PI_AI_OAUTH_ANTHROPIC_BETAS = [ type AnthropicServiceTier = "auto" | "standard_only"; +function isAnthropicThinkingEnabled(payloadObj: Record): boolean { + const thinking = payloadObj.thinking; + if (!thinking || typeof thinking !== "object") { + return false; + } + return (thinking as { type?: unknown }).type !== "disabled"; +} + +function assistantMessageHasToolUse(message: Record): boolean { + if (Array.isArray(message.tool_calls) && message.tool_calls.length > 0) { + return true; + } + const content = message.content; + if (!Array.isArray(content)) { + return false; + } + return content.some( + (block) => + block && + typeof block === "object" && + ((block as { type?: unknown }).type === "tool_use" || + (block as { type?: unknown }).type === "toolCall"), + ); +} + +function stripTrailingAssistantPrefillWhenThinking(payloadObj: Record): number { + if (!isAnthropicThinkingEnabled(payloadObj) || !Array.isArray(payloadObj.messages)) { + return 0; + } + let stripped = 0; + while (payloadObj.messages.length > 0) { + const last = payloadObj.messages[payloadObj.messages.length - 1]; + if (!last || typeof last !== "object") { + break; + } + const message = last as Record; + if (message.role !== "assistant" || assistantMessageHasToolUse(message)) { + break; + } + payloadObj.messages.pop(); + stripped += 1; + } + return stripped; +} + function isAnthropic1MModel(modelId: string): boolean { const normalized = normalizeLowercaseStringOrEmpty(modelId); return ANTHROPIC_1M_MODEL_PREFIXES.some((prefix) => normalized.startsWith(prefix)); @@ -168,6 +213,21 @@ export function createAnthropicServiceTierWrapper( }; } +export function createAnthropicThinkingPrefillWrapper( + baseStreamFn: StreamFn | undefined, +): StreamFn { + const underlying = baseStreamFn ?? streamSimple; + return (model, context, options) => + streamWithPayloadPatch(underlying, model, context, options, (payloadObj) => { + const stripped = stripTrailingAssistantPrefillWhenThinking(payloadObj); + if (stripped > 0) { + log.warn( + `removed ${stripped} trailing assistant prefill message${stripped === 1 ? "" : "s"} because Anthropic extended thinking requires conversations to end with a user turn`, + ); + } + }); +} + export function resolveAnthropicFastMode( extraParams: Record | undefined, ): boolean | undefined { @@ -205,7 +265,8 @@ export function wrapAnthropicProviderStream( fastMode !== undefined ? (streamFn) => createAnthropicFastModeWrapper(streamFn, fastMode) : undefined, + (streamFn) => createAnthropicThinkingPrefillWrapper(streamFn), ); } -export const __testing = { log }; +export const __testing = { log, stripTrailingAssistantPrefillWhenThinking };