mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 05:10:44 +00:00
fix(anthropic): drop prefill with thinking
This commit is contained in:
@@ -29,6 +29,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Agents/OpenAI-compatible: retry replay-safe empty `stop` turns once for `openai-completions` endpoints, so transient empty local backend responses no longer surface as “Agent couldn't generate a response” when a continuation succeeds, and restore `openclaw agent --model` for one-shot CLI runs. Fixes #72751. Thanks @moooV252.
|
||||
- Git hooks: skip ignored staged paths when formatting and restaging pre-commit files, so merge commits no longer abort when `.gitignore` newly ignores staged merged content. Fixes #72744. Thanks @100yenadmin.
|
||||
- Memory-core/dreaming: add a supported `dreaming.model` knob for Dream Diary narrative subagents, wired through phase config and the existing plugin subagent model-override trust gate. Refs #65963. Thanks @esqandil and @mjamiv.
|
||||
- Agents/Anthropic: remove trailing assistant prefill payloads when extended thinking is enabled, so Opus 4.7/Sonnet 4.6 requests do not fail Anthropic's user-final-turn validation. Fixes #72739. Thanks @superandylin.
|
||||
- Agents/vLLM: honor `compat.thinkingFormat: "qwen-chat-template"` by sending Qwen chat-template thinking kwargs, including preserved thinking for agent loops, and support DashScope-style `qwen` top-level thinking flags. Fixes #72329. Thanks @stavrostzagadouris.
|
||||
- Memory-core/dreaming: treat request-scoped narrative fallback as expected, skip session cleanup when no subagent run was created, and remove duplicate phase-level cleanup so fallback no longer emits warning noise. Fixes #67152. Thanks @jsompis.
|
||||
- Agents/exec: apply configured `tools.exec.timeoutSec` to background, `yieldMs`, and node `system.run` commands when no per-call timeout is set, preventing auto-backgrounded and remote node commands from running indefinitely. Fixes #67600; supersedes #67603. Thanks @dlmpx and @kagura-agent.
|
||||
|
||||
@@ -5,6 +5,7 @@ import {
|
||||
createAnthropicBetaHeadersWrapper,
|
||||
createAnthropicFastModeWrapper,
|
||||
createAnthropicServiceTierWrapper,
|
||||
createAnthropicThinkingPrefillWrapper,
|
||||
wrapAnthropicProviderStream,
|
||||
} from "./stream-wrappers.js";
|
||||
|
||||
@@ -115,6 +116,55 @@ describe("anthropic stream wrappers", () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe("createAnthropicThinkingPrefillWrapper", () => {
|
||||
function runThinkingPrefillWrapper(payload: Record<string, unknown>): Record<string, unknown> {
|
||||
const wrapper = createAnthropicThinkingPrefillWrapper(((_model, _context, options) => {
|
||||
options?.onPayload?.(payload as never, {} as never);
|
||||
return {} as never;
|
||||
}) as StreamFn);
|
||||
void wrapper({ provider: "anthropic", api: "anthropic-messages" } as never, {} as never, {});
|
||||
return payload;
|
||||
}
|
||||
|
||||
it("removes trailing assistant prefill when extended thinking is enabled", () => {
|
||||
const warn = vi.spyOn(__testing.log, "warn").mockImplementation(() => undefined);
|
||||
const payload = runThinkingPrefillWrapper({
|
||||
thinking: { type: "enabled", budget_tokens: 1024 },
|
||||
messages: [
|
||||
{ role: "user", content: "Return JSON." },
|
||||
{ role: "assistant", content: "{" },
|
||||
],
|
||||
});
|
||||
|
||||
expect(payload.messages).toEqual([{ role: "user", content: "Return JSON." }]);
|
||||
expect(warn).toHaveBeenCalledOnce();
|
||||
});
|
||||
|
||||
it("keeps assistant prefill when thinking is disabled", () => {
|
||||
const payload = runThinkingPrefillWrapper({
|
||||
thinking: { type: "disabled" },
|
||||
messages: [
|
||||
{ role: "user", content: "Return JSON." },
|
||||
{ role: "assistant", content: "{" },
|
||||
],
|
||||
});
|
||||
|
||||
expect(payload.messages).toHaveLength(2);
|
||||
});
|
||||
|
||||
it("keeps trailing assistant tool use turns", () => {
|
||||
const payload = runThinkingPrefillWrapper({
|
||||
thinking: { type: "adaptive" },
|
||||
messages: [
|
||||
{ role: "user", content: "Read a file." },
|
||||
{ role: "assistant", content: [{ type: "tool_use", id: "toolu_1", name: "Read" }] },
|
||||
],
|
||||
});
|
||||
|
||||
expect(payload.messages).toHaveLength(2);
|
||||
});
|
||||
});
|
||||
|
||||
describe("createAnthropicFastModeWrapper", () => {
|
||||
function runFastModeWrapper(params: {
|
||||
apiKey?: string;
|
||||
|
||||
@@ -30,6 +30,51 @@ const PI_AI_OAUTH_ANTHROPIC_BETAS = [
|
||||
|
||||
type AnthropicServiceTier = "auto" | "standard_only";
|
||||
|
||||
function isAnthropicThinkingEnabled(payloadObj: Record<string, unknown>): boolean {
|
||||
const thinking = payloadObj.thinking;
|
||||
if (!thinking || typeof thinking !== "object") {
|
||||
return false;
|
||||
}
|
||||
return (thinking as { type?: unknown }).type !== "disabled";
|
||||
}
|
||||
|
||||
function assistantMessageHasToolUse(message: Record<string, unknown>): boolean {
|
||||
if (Array.isArray(message.tool_calls) && message.tool_calls.length > 0) {
|
||||
return true;
|
||||
}
|
||||
const content = message.content;
|
||||
if (!Array.isArray(content)) {
|
||||
return false;
|
||||
}
|
||||
return content.some(
|
||||
(block) =>
|
||||
block &&
|
||||
typeof block === "object" &&
|
||||
((block as { type?: unknown }).type === "tool_use" ||
|
||||
(block as { type?: unknown }).type === "toolCall"),
|
||||
);
|
||||
}
|
||||
|
||||
function stripTrailingAssistantPrefillWhenThinking(payloadObj: Record<string, unknown>): number {
|
||||
if (!isAnthropicThinkingEnabled(payloadObj) || !Array.isArray(payloadObj.messages)) {
|
||||
return 0;
|
||||
}
|
||||
let stripped = 0;
|
||||
while (payloadObj.messages.length > 0) {
|
||||
const last = payloadObj.messages[payloadObj.messages.length - 1];
|
||||
if (!last || typeof last !== "object") {
|
||||
break;
|
||||
}
|
||||
const message = last as Record<string, unknown>;
|
||||
if (message.role !== "assistant" || assistantMessageHasToolUse(message)) {
|
||||
break;
|
||||
}
|
||||
payloadObj.messages.pop();
|
||||
stripped += 1;
|
||||
}
|
||||
return stripped;
|
||||
}
|
||||
|
||||
function isAnthropic1MModel(modelId: string): boolean {
|
||||
const normalized = normalizeLowercaseStringOrEmpty(modelId);
|
||||
return ANTHROPIC_1M_MODEL_PREFIXES.some((prefix) => normalized.startsWith(prefix));
|
||||
@@ -168,6 +213,21 @@ export function createAnthropicServiceTierWrapper(
|
||||
};
|
||||
}
|
||||
|
||||
export function createAnthropicThinkingPrefillWrapper(
|
||||
baseStreamFn: StreamFn | undefined,
|
||||
): StreamFn {
|
||||
const underlying = baseStreamFn ?? streamSimple;
|
||||
return (model, context, options) =>
|
||||
streamWithPayloadPatch(underlying, model, context, options, (payloadObj) => {
|
||||
const stripped = stripTrailingAssistantPrefillWhenThinking(payloadObj);
|
||||
if (stripped > 0) {
|
||||
log.warn(
|
||||
`removed ${stripped} trailing assistant prefill message${stripped === 1 ? "" : "s"} because Anthropic extended thinking requires conversations to end with a user turn`,
|
||||
);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
export function resolveAnthropicFastMode(
|
||||
extraParams: Record<string, unknown> | undefined,
|
||||
): boolean | undefined {
|
||||
@@ -205,7 +265,8 @@ export function wrapAnthropicProviderStream(
|
||||
fastMode !== undefined
|
||||
? (streamFn) => createAnthropicFastModeWrapper(streamFn, fastMode)
|
||||
: undefined,
|
||||
(streamFn) => createAnthropicThinkingPrefillWrapper(streamFn),
|
||||
);
|
||||
}
|
||||
|
||||
export const __testing = { log };
|
||||
export const __testing = { log, stripTrailingAssistantPrefillWhenThinking };
|
||||
|
||||
Reference in New Issue
Block a user