From 599b1b84620bc9c43e37a385d6f5468289e5156e Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Mon, 27 Apr 2026 21:36:35 +0100 Subject: [PATCH] fix(cloudflare-ai-gateway): strip anthropic thinking prefill --- CHANGELOG.md | 1 + docs/plugins/sdk-provider-plugins.md | 2 +- docs/providers/cloudflare-ai-gateway.md | 5 + docs/reference/transcript-hygiene.md | 2 + extensions/anthropic/stream-wrappers.ts | 67 ++------ .../cloudflare-ai-gateway/index.test.ts | 47 ++++++ extensions/cloudflare-ai-gateway/index.ts | 2 + .../stream-wrappers.test.ts | 155 ++++++++++++++++++ .../cloudflare-ai-gateway/stream-wrappers.ts | 31 ++++ src/plugin-sdk/provider-stream-shared.test.ts | 85 ++++++++++ src/plugin-sdk/provider-stream-shared.ts | 67 ++++++++ src/plugin-sdk/provider-stream.ts | 2 + 12 files changed, 409 insertions(+), 57 deletions(-) create mode 100644 extensions/cloudflare-ai-gateway/index.test.ts create mode 100644 extensions/cloudflare-ai-gateway/stream-wrappers.test.ts create mode 100644 extensions/cloudflare-ai-gateway/stream-wrappers.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 88c92c16bf5..37fb877eb84 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ Docs: https://docs.openclaw.ai ### Fixes +- Providers/Cloudflare AI Gateway: strip assistant prefill turns from Anthropic Messages payloads when thinking is enabled, so Claude requests through Cloudflare AI Gateway no longer fail Anthropic conversation-ending validation. Fixes #72905; carries forward #73005. Thanks @AaronFaby and @sahilsatralkar. - Channels/sessions: prevent guarded inbound session recording from creating route-only phantom sessions while still allowing last-route updates for sessions that already exist. Carries forward #73009. Thanks @jzakirov. - Plugins/runtime deps: stage bundled plugin dependencies imported by mirrored root dist chunks, so packaged memory and status commands do not miss `chokidar` or similar root-chunk dependencies after update. Fixes #72882 and #72970; carries forward #72992. Thanks @shrimpy8, @colin-chang, and @Schnup03. - Agents/runtime context: deliver hidden runtime context through prompt-local system context while keeping the transcript-only custom entry out of provider user turns, and strip stale copied runtime-context prefaces from user-facing replies. Fixes #72386; carries forward #72969. Thanks @jhsmith409. diff --git a/docs/plugins/sdk-provider-plugins.md b/docs/plugins/sdk-provider-plugins.md index ee2fa9f59cb..1efcdac36ff 100644 --- a/docs/plugins/sdk-provider-plugins.md +++ b/docs/plugins/sdk-provider-plugins.md @@ -340,7 +340,7 @@ API key auth, and dynamic model resolution. Each family builder is composed from lower-level public helpers exported from the same package, which you can reach for when a provider needs to go off the common pattern: - `openclaw/plugin-sdk/provider-model-shared` — `ProviderReplayFamily`, `buildProviderReplayFamilyHooks(...)`, and the raw replay builders (`buildOpenAICompatibleReplayPolicy`, `buildAnthropicReplayPolicyForModel`, `buildGoogleGeminiReplayPolicy`, `buildHybridAnthropicOrOpenAIReplayPolicy`). Also exports Gemini replay helpers (`sanitizeGoogleGeminiReplayHistory`, `resolveTaggedReasoningOutputMode`) and endpoint/model helpers (`resolveProviderEndpoint`, `normalizeProviderId`, `normalizeGooglePreviewModelId`, `normalizeNativeXaiModelId`). - - `openclaw/plugin-sdk/provider-stream` — `ProviderStreamFamily`, `buildProviderStreamFamilyHooks(...)`, `composeProviderStreamWrappers(...)`, plus the shared OpenAI/Codex wrappers (`createOpenAIAttributionHeadersWrapper`, `createOpenAIFastModeWrapper`, `createOpenAIServiceTierWrapper`, `createOpenAIResponsesContextManagementWrapper`, `createCodexNativeWebSearchWrapper`), DeepSeek V4 OpenAI-compatible wrapper (`createDeepSeekV4OpenAICompatibleThinkingWrapper`), and shared proxy/provider wrappers (`createOpenRouterWrapper`, `createToolStreamWrapper`, `createMinimaxFastModeWrapper`). + - `openclaw/plugin-sdk/provider-stream` — `ProviderStreamFamily`, `buildProviderStreamFamilyHooks(...)`, `composeProviderStreamWrappers(...)`, plus the shared OpenAI/Codex wrappers (`createOpenAIAttributionHeadersWrapper`, `createOpenAIFastModeWrapper`, `createOpenAIServiceTierWrapper`, `createOpenAIResponsesContextManagementWrapper`, `createCodexNativeWebSearchWrapper`), DeepSeek V4 OpenAI-compatible wrapper (`createDeepSeekV4OpenAICompatibleThinkingWrapper`), Anthropic Messages thinking prefill cleanup (`createAnthropicThinkingPrefillPayloadWrapper`), and shared proxy/provider wrappers (`createOpenRouterWrapper`, `createToolStreamWrapper`, `createMinimaxFastModeWrapper`). - `openclaw/plugin-sdk/provider-tools` — `ProviderToolCompatFamily`, `buildProviderToolCompatFamilyHooks("gemini")`, underlying Gemini schema helpers (`normalizeGeminiToolSchemas`, `inspectGeminiToolSchemas`), and xAI compat helpers (`resolveXaiModelCompatPatch()`, `applyXaiModelCompat(model)`). The bundled xAI plugin uses `normalizeResolvedModel` + `contributeResolvedModelCompat` with these to keep xAI rules owned by the provider. Some stream helpers stay provider-local on purpose. `@openclaw/anthropic-provider` keeps `wrapAnthropicProviderStream`, `resolveAnthropicBetas`, `resolveAnthropicFastMode`, `resolveAnthropicServiceTier`, and the lower-level Anthropic wrapper builders in its own public `api.ts` / `contract-api.ts` seam because they encode Claude OAuth beta handling and `context1m` gating. The xAI plugin similarly keeps native xAI Responses shaping in its own `wrapStreamFn` (`/fast` aliases, default `tool_stream`, unsupported strict-tool cleanup, xAI-specific reasoning-payload removal). diff --git a/docs/providers/cloudflare-ai-gateway.md b/docs/providers/cloudflare-ai-gateway.md index f9768c1002a..f0552e5281f 100644 --- a/docs/providers/cloudflare-ai-gateway.md +++ b/docs/providers/cloudflare-ai-gateway.md @@ -19,6 +19,11 @@ Cloudflare AI Gateway sits in front of provider APIs and lets you add analytics, For Anthropic models routed through Cloudflare AI Gateway, use your **Anthropic API key** as the provider key. +When thinking is enabled for Anthropic Messages models, OpenClaw strips trailing +assistant prefill turns before sending the payload through Cloudflare AI Gateway. +Anthropic rejects response prefilling with extended thinking, while ordinary +non-thinking prefill remains available. + ## Getting started diff --git a/docs/reference/transcript-hygiene.md b/docs/reference/transcript-hygiene.md index 6783d1e6295..198167d3da8 100644 --- a/docs/reference/transcript-hygiene.md +++ b/docs/reference/transcript-hygiene.md @@ -137,6 +137,8 @@ external end-user instructions. - Tool result pairing repair and synthetic tool results. - Turn validation (merge consecutive user turns to satisfy strict alternation). +- Trailing assistant prefill turns are stripped from outgoing Anthropic Messages + payloads when thinking is enabled, including Cloudflare AI Gateway routes. - Thinking blocks with missing, empty, or blank replay signatures are stripped before provider conversion. If that empties an assistant turn, OpenClaw keeps turn shape with non-empty omitted-reasoning text. diff --git a/extensions/anthropic/stream-wrappers.ts b/extensions/anthropic/stream-wrappers.ts index 12f3bea335f..029ce96ecb2 100644 --- a/extensions/anthropic/stream-wrappers.ts +++ b/extensions/anthropic/stream-wrappers.ts @@ -4,7 +4,9 @@ import type { ProviderWrapStreamFnContext } from "openclaw/plugin-sdk/plugin-ent import { applyAnthropicPayloadPolicyToParams, composeProviderStreamWrappers, + createAnthropicThinkingPrefillPayloadWrapper, resolveAnthropicPayloadPolicy, + stripTrailingAnthropicAssistantPrefillWhenThinking, streamWithPayloadPatch, } from "openclaw/plugin-sdk/provider-stream-shared"; import { createSubsystemLogger } from "openclaw/plugin-sdk/runtime-env"; @@ -30,51 +32,6 @@ const PI_AI_OAUTH_ANTHROPIC_BETAS = [ type AnthropicServiceTier = "auto" | "standard_only"; -function isAnthropicThinkingEnabled(payloadObj: Record): boolean { - const thinking = payloadObj.thinking; - if (!thinking || typeof thinking !== "object") { - return false; - } - return (thinking as { type?: unknown }).type !== "disabled"; -} - -function assistantMessageHasToolUse(message: Record): boolean { - if (Array.isArray(message.tool_calls) && message.tool_calls.length > 0) { - return true; - } - const content = message.content; - if (!Array.isArray(content)) { - return false; - } - return content.some( - (block) => - block && - typeof block === "object" && - ((block as { type?: unknown }).type === "tool_use" || - (block as { type?: unknown }).type === "toolCall"), - ); -} - -function stripTrailingAssistantPrefillWhenThinking(payloadObj: Record): number { - if (!isAnthropicThinkingEnabled(payloadObj) || !Array.isArray(payloadObj.messages)) { - return 0; - } - let stripped = 0; - while (payloadObj.messages.length > 0) { - const last = payloadObj.messages[payloadObj.messages.length - 1]; - if (!last || typeof last !== "object") { - break; - } - const message = last as Record; - if (message.role !== "assistant" || assistantMessageHasToolUse(message)) { - break; - } - payloadObj.messages.pop(); - stripped += 1; - } - return stripped; -} - function isAnthropic1MModel(modelId: string): boolean { const normalized = normalizeLowercaseStringOrEmpty(modelId); return ANTHROPIC_1M_MODEL_PREFIXES.some((prefix) => normalized.startsWith(prefix)); @@ -216,16 +173,11 @@ export function createAnthropicServiceTierWrapper( export function createAnthropicThinkingPrefillWrapper( baseStreamFn: StreamFn | undefined, ): StreamFn { - const underlying = baseStreamFn ?? streamSimple; - return (model, context, options) => - streamWithPayloadPatch(underlying, model, context, options, (payloadObj) => { - const stripped = stripTrailingAssistantPrefillWhenThinking(payloadObj); - if (stripped > 0) { - log.warn( - `removed ${stripped} trailing assistant prefill message${stripped === 1 ? "" : "s"} because Anthropic extended thinking requires conversations to end with a user turn`, - ); - } - }); + return createAnthropicThinkingPrefillPayloadWrapper(baseStreamFn, (stripped) => { + log.warn( + `removed ${stripped} trailing assistant prefill message${stripped === 1 ? "" : "s"} because Anthropic extended thinking requires conversations to end with a user turn`, + ); + }); } export function resolveAnthropicFastMode( @@ -269,4 +221,7 @@ export function wrapAnthropicProviderStream( ); } -export const __testing = { log, stripTrailingAssistantPrefillWhenThinking }; +export const __testing = { + log, + stripTrailingAssistantPrefillWhenThinking: stripTrailingAnthropicAssistantPrefillWhenThinking, +}; diff --git a/extensions/cloudflare-ai-gateway/index.test.ts b/extensions/cloudflare-ai-gateway/index.test.ts new file mode 100644 index 00000000000..f5e99f7d2a0 --- /dev/null +++ b/extensions/cloudflare-ai-gateway/index.test.ts @@ -0,0 +1,47 @@ +import type { StreamFn } from "@mariozechner/pi-agent-core"; +import { capturePluginRegistration } from "openclaw/plugin-sdk/testing"; +import { describe, expect, it } from "vitest"; +import plugin from "./index.js"; + +function registerProvider() { + const captured = capturePluginRegistration(plugin); + const provider = captured.providers[0]; + expect(provider?.id).toBe("cloudflare-ai-gateway"); + return provider; +} + +describe("cloudflare-ai-gateway plugin", () => { + it("registers a stream wrapper that strips Anthropic thinking assistant prefill", () => { + const provider = registerProvider(); + expect(provider?.wrapStreamFn).toBeTypeOf("function"); + + let capturedPayload: Record | undefined; + const baseStreamFn: StreamFn = (_model, _context, options) => { + const payload: Record = { + thinking: { type: "enabled", budget_tokens: 1024 }, + messages: [ + { role: "user", content: "Return JSON." }, + { role: "assistant", content: "{" }, + ], + }; + options?.onPayload?.(payload as never, _model as never); + capturedPayload = payload; + return {} as ReturnType; + }; + + const wrapped = provider?.wrapStreamFn?.({ + provider: "cloudflare-ai-gateway", + modelId: "claude-sonnet-4-6", + model: { api: "anthropic-messages" }, + streamFn: baseStreamFn, + } as never); + + void wrapped?.( + { provider: "cloudflare-ai-gateway", api: "anthropic-messages" } as never, + {} as never, + {}, + ); + + expect(capturedPayload?.messages).toEqual([{ role: "user", content: "Return JSON." }]); + }); +}); diff --git a/extensions/cloudflare-ai-gateway/index.ts b/extensions/cloudflare-ai-gateway/index.ts index 1c1f6e714b9..1be23ee5c9e 100644 --- a/extensions/cloudflare-ai-gateway/index.ts +++ b/extensions/cloudflare-ai-gateway/index.ts @@ -14,6 +14,7 @@ import { normalizeOptionalString } from "openclaw/plugin-sdk/text-runtime"; import { buildCloudflareAiGatewayCatalogProvider } from "./catalog-provider.js"; import { CLOUDFLARE_AI_GATEWAY_DEFAULT_MODEL_REF } from "./models.js"; import { applyCloudflareAiGatewayConfig, buildCloudflareAiGatewayConfigPatch } from "./onboard.js"; +import { wrapCloudflareAiGatewayProviderStream } from "./stream-wrappers.js"; const PROVIDER_ID = "cloudflare-ai-gateway"; const PROVIDER_ENV_VAR = "CLOUDFLARE_AI_GATEWAY_API_KEY"; @@ -216,6 +217,7 @@ export default definePluginEntry({ }, classifyFailoverReason: ({ errorMessage }) => /\bworkers?_ai\b.*\b(?:rate|limit|quota)\b/i.test(errorMessage) ? "rate_limit" : undefined, + wrapStreamFn: wrapCloudflareAiGatewayProviderStream, }); }, }); diff --git a/extensions/cloudflare-ai-gateway/stream-wrappers.test.ts b/extensions/cloudflare-ai-gateway/stream-wrappers.test.ts new file mode 100644 index 00000000000..2d3346cd361 --- /dev/null +++ b/extensions/cloudflare-ai-gateway/stream-wrappers.test.ts @@ -0,0 +1,155 @@ +import type { StreamFn } from "@mariozechner/pi-agent-core"; +import { beforeEach, describe, expect, it, vi } from "vitest"; +import { + __testing, + createCloudflareAiGatewayAnthropicThinkingPrefillWrapper, + wrapCloudflareAiGatewayProviderStream, +} from "./stream-wrappers.js"; + +const { warnMock } = vi.hoisted(() => ({ + warnMock: vi.fn(), +})); + +vi.mock("openclaw/plugin-sdk/runtime-env", () => ({ + createSubsystemLogger: () => ({ + debug: vi.fn(), + error: vi.fn(), + info: vi.fn(), + warn: warnMock, + }), +})); + +function createPayloadBaseStream(payload: Record): StreamFn { + return ((model, _context, options) => { + options?.onPayload?.(payload as never, model as never); + return {} as ReturnType; + }) as StreamFn; +} + +function runWrapper(payload: Record): Record { + const wrapper = createCloudflareAiGatewayAnthropicThinkingPrefillWrapper( + createPayloadBaseStream(payload), + ); + void wrapper( + { provider: "cloudflare-ai-gateway", api: "anthropic-messages" } as never, + {} as never, + {}, + ); + return payload; +} + +describe("createCloudflareAiGatewayAnthropicThinkingPrefillWrapper", () => { + beforeEach(() => { + warnMock.mockClear(); + }); + + it("removes trailing assistant prefill when thinking is enabled", () => { + const payload = runWrapper({ + thinking: { type: "enabled", budget_tokens: 1024 }, + messages: [ + { role: "user", content: "Return JSON." }, + { role: "assistant", content: "{" }, + ], + }); + + expect(payload.messages).toEqual([{ role: "user", content: "Return JSON." }]); + expect(warnMock).toHaveBeenCalledWith( + "removed 1 trailing assistant prefill message because Anthropic extended thinking requires conversations to end with a user turn", + ); + }); + + it("removes multiple trailing assistant prefill messages until the conversation ends with user", () => { + const payload = runWrapper({ + thinking: { type: "adaptive" }, + messages: [ + { role: "user", content: "Return JSON." }, + { role: "assistant", content: "{" }, + { role: "assistant", content: '"status"' }, + ], + }); + + expect(payload.messages).toEqual([{ role: "user", content: "Return JSON." }]); + expect(warnMock).toHaveBeenCalledWith( + "removed 2 trailing assistant prefill messages because Anthropic extended thinking requires conversations to end with a user turn", + ); + }); + + it("keeps assistant prefill when thinking is disabled", () => { + const payload = runWrapper({ + thinking: { type: "disabled" }, + messages: [ + { role: "user", content: "Return JSON." }, + { role: "assistant", content: "{" }, + ], + }); + + expect(payload.messages).toHaveLength(2); + expect(warnMock).not.toHaveBeenCalled(); + }); + + it("keeps trailing assistant tool use turns when thinking is enabled", () => { + const payload = runWrapper({ + thinking: { type: "enabled", budget_tokens: 1024 }, + messages: [ + { role: "user", content: "Read a file." }, + { + role: "assistant", + content: [{ type: "tool_use", id: "toolu_1", name: "Read" }], + }, + ], + }); + + expect(payload.messages).toHaveLength(2); + expect(warnMock).not.toHaveBeenCalled(); + }); +}); + +describe("wrapCloudflareAiGatewayProviderStream", () => { + beforeEach(() => { + warnMock.mockClear(); + }); + + it("patches Anthropic Messages models", () => { + const payload = { + thinking: { type: "enabled" }, + messages: [ + { role: "user", content: "Return JSON." }, + { role: "assistant", content: "{" }, + ], + }; + const wrapped = wrapCloudflareAiGatewayProviderStream({ + model: { api: "anthropic-messages" }, + streamFn: createPayloadBaseStream(payload), + } as never); + + void wrapped?.( + { provider: "cloudflare-ai-gateway", api: "anthropic-messages" } as never, + {} as never, + {}, + ); + + expect(payload.messages).toEqual([{ role: "user", content: "Return JSON." }]); + }); + + it("leaves non-Anthropic model APIs on the original stream path", () => { + let onPayloadWasInstalled = false; + const baseStreamFn: StreamFn = (_model, _context, options) => { + onPayloadWasInstalled = typeof options?.onPayload === "function"; + return {} as ReturnType; + }; + + const wrapped = wrapCloudflareAiGatewayProviderStream({ + model: { api: "openai-completions" }, + streamFn: baseStreamFn, + } as never); + void wrapped?.({ api: "openai-completions" } as never, {} as never, {}); + + expect(wrapped).toBe(baseStreamFn); + expect(onPayloadWasInstalled).toBe(false); + expect(warnMock).not.toHaveBeenCalled(); + }); + + it("treats missing model API as the plugin's default Anthropic Messages route", () => { + expect(__testing.shouldPatchAnthropicMessagesPayload({} as never)).toBe(true); + }); +}); diff --git a/extensions/cloudflare-ai-gateway/stream-wrappers.ts b/extensions/cloudflare-ai-gateway/stream-wrappers.ts new file mode 100644 index 00000000000..9c0ed628276 --- /dev/null +++ b/extensions/cloudflare-ai-gateway/stream-wrappers.ts @@ -0,0 +1,31 @@ +import type { StreamFn } from "@mariozechner/pi-agent-core"; +import type { ProviderWrapStreamFnContext } from "openclaw/plugin-sdk/plugin-entry"; +import { createAnthropicThinkingPrefillPayloadWrapper } from "openclaw/plugin-sdk/provider-stream-shared"; +import { createSubsystemLogger } from "openclaw/plugin-sdk/runtime-env"; + +const log = createSubsystemLogger("cloudflare-ai-gateway-stream"); + +function shouldPatchAnthropicMessagesPayload(model: ProviderWrapStreamFnContext["model"]): boolean { + return model?.api === undefined || model.api === "anthropic-messages"; +} + +export function createCloudflareAiGatewayAnthropicThinkingPrefillWrapper( + baseStreamFn: StreamFn | undefined, +): StreamFn { + return createAnthropicThinkingPrefillPayloadWrapper(baseStreamFn, (stripped) => { + log.warn( + `removed ${stripped} trailing assistant prefill message${stripped === 1 ? "" : "s"} because Anthropic extended thinking requires conversations to end with a user turn`, + ); + }); +} + +export function wrapCloudflareAiGatewayProviderStream( + ctx: ProviderWrapStreamFnContext, +): StreamFn | undefined { + if (!shouldPatchAnthropicMessagesPayload(ctx.model)) { + return ctx.streamFn; + } + return createCloudflareAiGatewayAnthropicThinkingPrefillWrapper(ctx.streamFn); +} + +export const __testing = { log, shouldPatchAnthropicMessagesPayload }; diff --git a/src/plugin-sdk/provider-stream-shared.test.ts b/src/plugin-sdk/provider-stream-shared.test.ts index f12ba8e0f16..b0d5e7f6a75 100644 --- a/src/plugin-sdk/provider-stream-shared.test.ts +++ b/src/plugin-sdk/provider-stream-shared.test.ts @@ -3,11 +3,13 @@ import { describe, expect, it } from "vitest"; import { buildCopilotDynamicHeaders, createHtmlEntityToolCallArgumentDecodingWrapper, + createAnthropicThinkingPrefillPayloadWrapper, createPayloadPatchStreamWrapper, defaultToolStreamExtraParams, decodeHtmlEntitiesInObject, hasCopilotVisionInput, isOpenAICompatibleThinkingEnabled, + stripTrailingAnthropicAssistantPrefillWhenThinking, } from "./provider-stream-shared.js"; type FakeWrappedStream = { @@ -265,3 +267,86 @@ describe("createPayloadPatchStreamWrapper", () => { expect(onPayloadWasInstalled).toBe(false); }); }); + +describe("stripTrailingAnthropicAssistantPrefillWhenThinking", () => { + it("removes trailing assistant text turns when Anthropic thinking is enabled", () => { + const payload = { + thinking: { type: "enabled", budget_tokens: 1024 }, + messages: [ + { role: "user", content: "Return JSON." }, + { role: "assistant", content: "{" }, + { role: "assistant", content: '"status"' }, + ], + }; + + expect(stripTrailingAnthropicAssistantPrefillWhenThinking(payload)).toBe(2); + expect(payload.messages).toEqual([{ role: "user", content: "Return JSON." }]); + }); + + it("preserves assistant tool-use turns across Anthropic and OpenAI-shaped payloads", () => { + const anthropicPayload = { + thinking: { type: "adaptive" }, + messages: [ + { role: "user", content: "Read a file." }, + { role: "assistant", content: [{ type: "tool_use", id: "toolu_1", name: "Read" }] }, + ], + }; + const openAiPayload = { + thinking: { type: "adaptive" }, + messages: [ + { role: "user", content: "Read a file." }, + { role: "assistant", content: [{ type: "toolCall", id: "call_1", name: "Read" }] }, + ], + }; + const toolCallsPayload = { + thinking: { type: "adaptive" }, + messages: [{ role: "assistant", tool_calls: [{ id: "call_1", name: "Read" }] }], + }; + + expect(stripTrailingAnthropicAssistantPrefillWhenThinking(anthropicPayload)).toBe(0); + expect(stripTrailingAnthropicAssistantPrefillWhenThinking(openAiPayload)).toBe(0); + expect(stripTrailingAnthropicAssistantPrefillWhenThinking(toolCallsPayload)).toBe(0); + }); + + it("keeps assistant prefill when Anthropic thinking is disabled", () => { + const payload = { + thinking: { type: "disabled" }, + messages: [ + { role: "user", content: "Return JSON." }, + { role: "assistant", content: "{" }, + ], + }; + + expect(stripTrailingAnthropicAssistantPrefillWhenThinking(payload)).toBe(0); + expect(payload.messages).toHaveLength(2); + }); +}); + +describe("createAnthropicThinkingPrefillPayloadWrapper", () => { + it("reports stripped assistant prefill count", () => { + const payload = { + thinking: { type: "enabled" }, + messages: [ + { role: "user", content: "Return JSON." }, + { role: "assistant", content: "{" }, + ], + }; + let strippedCount = 0; + const baseStreamFn: StreamFn = (_model, _context, options) => { + options?.onPayload?.(payload as never, _model as never); + return {} as ReturnType; + }; + + const wrapped = createAnthropicThinkingPrefillPayloadWrapper( + baseStreamFn, + (stripped) => { + strippedCount = stripped; + }, + { shouldPatch: ({ model }) => model.api === "anthropic-messages" }, + ); + void wrapped({ api: "anthropic-messages" } as never, {} as never, {}); + + expect(payload.messages).toEqual([{ role: "user", content: "Return JSON." }]); + expect(strippedCount).toBe(1); + }); +}); diff --git a/src/plugin-sdk/provider-stream-shared.ts b/src/plugin-sdk/provider-stream-shared.ts index 065d699966f..8d7ad90672d 100644 --- a/src/plugin-sdk/provider-stream-shared.ts +++ b/src/plugin-sdk/provider-stream-shared.ts @@ -154,6 +154,73 @@ export function createPayloadPatchStreamWrapper( }; } +function isAnthropicThinkingEnabled(payload: Record): boolean { + const thinking = payload.thinking; + if (!thinking || typeof thinking !== "object") { + return false; + } + return (thinking as { type?: unknown }).type !== "disabled"; +} + +function assistantMessageHasAnthropicToolUse(message: Record): boolean { + if (Array.isArray(message.tool_calls) && message.tool_calls.length > 0) { + return true; + } + const content = message.content; + if (!Array.isArray(content)) { + return false; + } + return content.some( + (block) => + block && + typeof block === "object" && + ((block as { type?: unknown }).type === "tool_use" || + (block as { type?: unknown }).type === "toolCall"), + ); +} + +export function stripTrailingAnthropicAssistantPrefillWhenThinking( + payload: Record, +): number { + if (!isAnthropicThinkingEnabled(payload) || !Array.isArray(payload.messages)) { + return 0; + } + + let stripped = 0; + while (payload.messages.length > 0) { + const finalMessage = payload.messages[payload.messages.length - 1]; + if (!finalMessage || typeof finalMessage !== "object") { + break; + } + + const message = finalMessage as Record; + if (message.role !== "assistant" || assistantMessageHasAnthropicToolUse(message)) { + break; + } + + payload.messages.pop(); + stripped += 1; + } + return stripped; +} + +export function createAnthropicThinkingPrefillPayloadWrapper( + baseStreamFn: StreamFn | undefined, + onStripped?: (stripped: number) => void, + wrapperOptions?: Parameters[2], +): StreamFn { + return createPayloadPatchStreamWrapper( + baseStreamFn, + ({ payload }) => { + const stripped = stripTrailingAnthropicAssistantPrefillWhenThinking(payload); + if (stripped > 0) { + onStripped?.(stripped); + } + }, + wrapperOptions, + ); +} + export type OpenAICompatibleThinkingLevel = ProviderWrapStreamFnContext["thinkingLevel"]; export function isOpenAICompatibleThinkingEnabled(params: { diff --git a/src/plugin-sdk/provider-stream.ts b/src/plugin-sdk/provider-stream.ts index ab9c9f42354..02fccf854e4 100644 --- a/src/plugin-sdk/provider-stream.ts +++ b/src/plugin-sdk/provider-stream.ts @@ -36,6 +36,7 @@ export { applyAnthropicPayloadPolicyToParams, buildCopilotDynamicHeaders, composeProviderStreamWrappers, + createAnthropicThinkingPrefillPayloadWrapper, createBedrockNoCacheWrapper, createMoonshotThinkingWrapper, createToolStreamWrapper, @@ -48,6 +49,7 @@ export { resolveAnthropicPayloadPolicy, resolveMoonshotThinkingType, streamWithPayloadPatch, + stripTrailingAnthropicAssistantPrefillWhenThinking, } from "./provider-stream-shared.js"; export type ProviderStreamFamily =