fix(anthropic): drop prefill with thinking

2026-05-06 05:10:44 +00:00 · 2026-04-27 11:48:51 +01:00
parent 75c8c1bebe
commit 4f7038ae33
3 changed files with 113 additions and 1 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -29,6 +29,7 @@ Docs: https://docs.openclaw.ai
 - Agents/OpenAI-compatible: retry replay-safe empty `stop` turns once for `openai-completions` endpoints, so transient empty local backend responses no longer surface as “Agent couldn't generate a response” when a continuation succeeds, and restore `openclaw agent --model` for one-shot CLI runs. Fixes #72751. Thanks @moooV252.
 - Git hooks: skip ignored staged paths when formatting and restaging pre-commit files, so merge commits no longer abort when `.gitignore` newly ignores staged merged content. Fixes #72744. Thanks @100yenadmin.
 - Memory-core/dreaming: add a supported `dreaming.model` knob for Dream Diary narrative subagents, wired through phase config and the existing plugin subagent model-override trust gate. Refs #65963. Thanks @esqandil and @mjamiv.
+- Agents/Anthropic: remove trailing assistant prefill payloads when extended thinking is enabled, so Opus 4.7/Sonnet 4.6 requests do not fail Anthropic's user-final-turn validation. Fixes #72739. Thanks @superandylin.
 - Agents/vLLM: honor `compat.thinkingFormat: "qwen-chat-template"` by sending Qwen chat-template thinking kwargs, including preserved thinking for agent loops, and support DashScope-style `qwen` top-level thinking flags. Fixes #72329. Thanks @stavrostzagadouris.
 - Memory-core/dreaming: treat request-scoped narrative fallback as expected, skip session cleanup when no subagent run was created, and remove duplicate phase-level cleanup so fallback no longer emits warning noise. Fixes #67152. Thanks @jsompis.
 - Agents/exec: apply configured `tools.exec.timeoutSec` to background, `yieldMs`, and node `system.run` commands when no per-call timeout is set, preventing auto-backgrounded and remote node commands from running indefinitely. Fixes #67600; supersedes #67603. Thanks @dlmpx and @kagura-agent.
--- a/extensions/anthropic/stream-wrappers.test.ts
+++ b/extensions/anthropic/stream-wrappers.test.ts
@@ -5,6 +5,7 @@ import {
  createAnthropicBetaHeadersWrapper,
  createAnthropicFastModeWrapper,
  createAnthropicServiceTierWrapper,
+  createAnthropicThinkingPrefillWrapper,
  wrapAnthropicProviderStream,
 } from "./stream-wrappers.js";

@@ -115,6 +116,55 @@ describe("anthropic stream wrappers", () => {
  });
 });

+describe("createAnthropicThinkingPrefillWrapper", () => {
+  function runThinkingPrefillWrapper(payload: Record<string, unknown>): Record<string, unknown> {
+    const wrapper = createAnthropicThinkingPrefillWrapper(((_model, _context, options) => {
+      options?.onPayload?.(payload as never, {} as never);
+      return {} as never;
+    }) as StreamFn);
+    void wrapper({ provider: "anthropic", api: "anthropic-messages" } as never, {} as never, {});
+    return payload;
+  }
+
+  it("removes trailing assistant prefill when extended thinking is enabled", () => {
+    const warn = vi.spyOn(__testing.log, "warn").mockImplementation(() => undefined);
+    const payload = runThinkingPrefillWrapper({
+      thinking: { type: "enabled", budget_tokens: 1024 },
+      messages: [
+        { role: "user", content: "Return JSON." },
+        { role: "assistant", content: "{" },
+      ],
+    });
+
+    expect(payload.messages).toEqual([{ role: "user", content: "Return JSON." }]);
+    expect(warn).toHaveBeenCalledOnce();
+  });
+
+  it("keeps assistant prefill when thinking is disabled", () => {
+    const payload = runThinkingPrefillWrapper({
+      thinking: { type: "disabled" },
+      messages: [
+        { role: "user", content: "Return JSON." },
+        { role: "assistant", content: "{" },
+      ],
+    });
+
+    expect(payload.messages).toHaveLength(2);
+  });
+
+  it("keeps trailing assistant tool use turns", () => {
+    const payload = runThinkingPrefillWrapper({
+      thinking: { type: "adaptive" },
+      messages: [
+        { role: "user", content: "Read a file." },
+        { role: "assistant", content: [{ type: "tool_use", id: "toolu_1", name: "Read" }] },
+      ],
+    });
+
+    expect(payload.messages).toHaveLength(2);
+  });
+});
+
 describe("createAnthropicFastModeWrapper", () => {
  function runFastModeWrapper(params: {
    apiKey?: string;
--- a/extensions/anthropic/stream-wrappers.ts
+++ b/extensions/anthropic/stream-wrappers.ts
@@ -30,6 +30,51 @@ const PI_AI_OAUTH_ANTHROPIC_BETAS = [

 type AnthropicServiceTier = "auto" | "standard_only";

+function isAnthropicThinkingEnabled(payloadObj: Record<string, unknown>): boolean {
+  const thinking = payloadObj.thinking;
+  if (!thinking || typeof thinking !== "object") {
+    return false;
+  }
+  return (thinking as { type?: unknown }).type !== "disabled";
+}
+
+function assistantMessageHasToolUse(message: Record<string, unknown>): boolean {
+  if (Array.isArray(message.tool_calls) && message.tool_calls.length > 0) {
+    return true;
+  }
+  const content = message.content;
+  if (!Array.isArray(content)) {
+    return false;
+  }
+  return content.some(
+    (block) =>
+      block &&
+      typeof block === "object" &&
+      ((block as { type?: unknown }).type === "tool_use" ||
+        (block as { type?: unknown }).type === "toolCall"),
+  );
+}
+
+function stripTrailingAssistantPrefillWhenThinking(payloadObj: Record<string, unknown>): number {
+  if (!isAnthropicThinkingEnabled(payloadObj) || !Array.isArray(payloadObj.messages)) {
+    return 0;
+  }
+  let stripped = 0;
+  while (payloadObj.messages.length > 0) {
+    const last = payloadObj.messages[payloadObj.messages.length - 1];
+    if (!last || typeof last !== "object") {
+      break;
+    }
+    const message = last as Record<string, unknown>;
+    if (message.role !== "assistant" || assistantMessageHasToolUse(message)) {
+      break;
+    }
+    payloadObj.messages.pop();
+    stripped += 1;
+  }
+  return stripped;
+}
+
 function isAnthropic1MModel(modelId: string): boolean {
  const normalized = normalizeLowercaseStringOrEmpty(modelId);
  return ANTHROPIC_1M_MODEL_PREFIXES.some((prefix) => normalized.startsWith(prefix));
@@ -168,6 +213,21 @@ export function createAnthropicServiceTierWrapper(
  };
 }

+export function createAnthropicThinkingPrefillWrapper(
+  baseStreamFn: StreamFn | undefined,
+): StreamFn {
+  const underlying = baseStreamFn ?? streamSimple;
+  return (model, context, options) =>
+    streamWithPayloadPatch(underlying, model, context, options, (payloadObj) => {
+      const stripped = stripTrailingAssistantPrefillWhenThinking(payloadObj);
+      if (stripped > 0) {
+        log.warn(
+          `removed ${stripped} trailing assistant prefill message${stripped === 1 ? "" : "s"} because Anthropic extended thinking requires conversations to end with a user turn`,
+        );
+      }
+    });
+}
+
 export function resolveAnthropicFastMode(
  extraParams: Record<string, unknown> | undefined,
 ): boolean | undefined {
@@ -205,7 +265,8 @@ export function wrapAnthropicProviderStream(
    fastMode !== undefined
      ? (streamFn) => createAnthropicFastModeWrapper(streamFn, fastMode)
      : undefined,
+    (streamFn) => createAnthropicThinkingPrefillWrapper(streamFn),
  );
 }

-export const __testing = { log };
+export const __testing = { log, stripTrailingAssistantPrefillWhenThinking };