diff --git a/CHANGELOG.md b/CHANGELOG.md index fd181dceaa1..7caebc7ef78 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -170,6 +170,9 @@ Docs: https://docs.openclaw.ai - Webchat/security: reject remote-host `file://` URLs in the media embedding path. (#67293) Thanks @pgondhi987. - Dreaming/memory-core: use the ingestion day, not the source file day, for daily recall dedupe so repeat sweeps of the same daily note can increment `dailyCount` across days instead of stalling at `1`. (#67091) Thanks @Bartok9. - Node-host/tools.exec: let approval binding distinguish known native binaries from mutable shell payload files, while still fail-closing unknown or racy file probes so absolute-path node-host commands like `/usr/bin/whoami` no longer get rejected as unsafe interpreter/runtime commands. (#66731) Thanks @tmimmanuel. +- Codex/gateway: fix gateway crash when the codex-acp subprocess terminates abruptly; an unhandled EPIPE on the child stdin stream now routes through graceful client shutdown, rejecting pending requests instead of propagating as an uncaught exception that crashes the entire gateway daemon and all connected channels. Fixes #67886. (#67947) thanks @openperf +- Slack/streaming: resolve native streaming recipient teams from the inbound user when available, with a monitor-team fallback, so DM and shared-workspace streams target the right recipient more reliably. +- OpenRouter/streaming: treat `reasoning_details.response.output_text` and `reasoning_details.response.text` as visible assistant output on OpenRouter-compatible completions streams, while keeping `reasoning.text` hidden and refusing to surface ambiguous bare `text` items by default so visible replies, thinking blocks, and tool calls can coexist in the same chunk. (#67410) Thanks @neeravmakwana. ## 2026.4.14 diff --git a/docs/.generated/config-baseline.sha256 b/docs/.generated/config-baseline.sha256 index f82d64619eb..0e7957dd132 100644 --- a/docs/.generated/config-baseline.sha256 +++ b/docs/.generated/config-baseline.sha256 @@ -1,4 +1,4 @@ -c69926b2da5cb8a329e28025506e4bca026888e9848fff930d09345a3aaa7283 config-baseline.json -85cc004ee1628ab2096af61c640d514ef68601bd4eaa1fceaacd91d20baf9833 config-baseline.core.json +5ce9d439f8cf84fc9d20c93436fea6492bdab0e84e9e51867648343e0375b670 config-baseline.json +10b7c57a6198526b846471e1bcda6e361c1f3db2e3b1cd24abd8bac11db56e16 config-baseline.core.json 99bb34fcf83ba6bb50a3fc11f170bd379bee5728b0938707fc39ebd7638e12eb config-baseline.channel.json b695cb31b4c0cf1d31f842f2892e99cc3ff8d84263ae72b72977cae844b81d6e config-baseline.plugin.json diff --git a/src/agents/openai-completions-compat.ts b/src/agents/openai-completions-compat.ts index ae7d2744fdf..248ce09a987 100644 --- a/src/agents/openai-completions-compat.ts +++ b/src/agents/openai-completions-compat.ts @@ -17,6 +17,7 @@ export type OpenAICompletionsCompatDefaults = { supportsUsageInStreaming: boolean; maxTokensField: "max_completion_tokens" | "max_tokens"; thinkingFormat: "openai" | "openrouter" | "zai"; + visibleReasoningDetailTypes: string[]; supportsStrictMode: boolean; }; @@ -82,6 +83,7 @@ export function resolveOpenAICompletionsCompatDefaults( (!isNonStandard && (!usesConfiguredNonOpenAIEndpoint || supportsNativeStreamingUsageCompat)), maxTokensField: usesMaxTokens ? "max_tokens" : "max_completion_tokens", thinkingFormat: isZai ? "zai" : isOpenRouterLike ? "openrouter" : "openai", + visibleReasoningDetailTypes: isOpenRouterLike ? ["response.output_text", "response.text"] : [], supportsStrictMode: !isZai && !usesConfiguredNonOpenAIEndpoint, }; } diff --git a/src/agents/openai-transport-stream.test.ts b/src/agents/openai-transport-stream.test.ts index 7a32fed686a..620b76e857a 100644 --- a/src/agents/openai-transport-stream.test.ts +++ b/src/agents/openai-transport-stream.test.ts @@ -2017,4 +2017,768 @@ describe("openai transport stream", () => { { type: "thinking", thinking: " Still thinking.", thinkingSignature: "reasoning_details" }, ]); }); + + it("surfaces visible OpenRouter response text from reasoning_details without dropping tools", async () => { + const model = { + id: "openrouter/minimax/minimax-m2.7", + name: "MiniMax M2.7", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 200000, + maxTokens: 8192, + } satisfies Model<"openai-completions">; + + const output = { + role: "assistant" as const, + content: [], + api: model.api, + provider: model.provider, + model: model.id, + usage: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + totalTokens: 0, + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, + }, + stopReason: "stop", + timestamp: Date.now(), + }; + + const stream: { push(event: unknown): void } = { push() {} }; + + const mockChunks = [ + { + id: "chatcmpl-minimax", + object: "chat.completion.chunk" as const, + choices: [ + { + index: 0, + delta: { + reasoning_details: [ + { type: "reasoning.text", text: "Need to look something up." }, + { type: "response.output_text", text: "Working on it." }, + ], + tool_calls: [ + { + id: "call_1", + type: "function" as const, + function: { name: "lookup", arguments: '{"query":"weather"}' }, + }, + ], + } as Record, + logprobs: null, + finish_reason: null, + }, + ], + }, + { + id: "chatcmpl-minimax", + object: "chat.completion.chunk" as const, + choices: [ + { + index: 0, + delta: {}, + logprobs: null, + finish_reason: "tool_calls" as const, + }, + ], + }, + ] as const; + + async function* mockStream() { + for (const chunk of mockChunks) { + yield chunk as never; + } + } + + await __testing.processOpenAICompletionsStream(mockStream(), output, model, stream); + + expect(output.stopReason).toBe("toolUse"); + expect(output.content).toMatchObject([ + { + type: "thinking", + thinking: "Need to look something up.", + thinkingSignature: "reasoning_details", + }, + { type: "text", text: "Working on it." }, + { type: "toolCall", id: "call_1", name: "lookup", arguments: { query: "weather" } }, + ]); + }); + + it("does not surface ambiguous reasoning_details text without explicit compat opt-in", async () => { + const model = { + id: "openrouter/x-ai/grok-4", + name: "Grok 4", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 200000, + maxTokens: 8192, + } satisfies Model<"openai-completions">; + + const output = { + role: "assistant" as const, + content: [], + api: model.api, + provider: model.provider, + model: model.id, + usage: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + totalTokens: 0, + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, + }, + stopReason: "stop", + timestamp: Date.now(), + }; + + const stream: { push(event: unknown): void } = { push() {} }; + + const mockChunks = [ + { + id: "chatcmpl-grok", + object: "chat.completion.chunk" as const, + choices: [ + { + index: 0, + delta: { + reasoning_details: [ + { type: "reasoning.text", text: "Internal thought." }, + { type: "text", text: "Do not leak this by default." }, + ], + } as Record, + logprobs: null, + finish_reason: null, + }, + ], + }, + { + id: "chatcmpl-grok", + object: "chat.completion.chunk" as const, + choices: [ + { + index: 0, + delta: {}, + logprobs: null, + finish_reason: "stop" as const, + }, + ], + }, + ] as const; + + async function* mockStream() { + for (const chunk of mockChunks) { + yield chunk as never; + } + } + + await __testing.processOpenAICompletionsStream(mockStream(), output, model, stream); + + expect(output.content).toMatchObject([ + { + type: "thinking", + thinking: "Internal thought.", + thinkingSignature: "reasoning_details", + }, + ]); + }); + + it("preserves reasoning_details item order when visible text and thinking are interleaved", async () => { + const model = { + id: "openrouter/minimax/minimax-m2.7", + name: "MiniMax M2.7", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 200000, + maxTokens: 8192, + } satisfies Model<"openai-completions">; + + const output = { + role: "assistant" as const, + content: [], + api: model.api, + provider: model.provider, + model: model.id, + usage: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + totalTokens: 0, + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, + }, + stopReason: "stop", + timestamp: Date.now(), + }; + + const stream: { push(event: unknown): void } = { push() {} }; + + const mockChunks = [ + { + id: "chatcmpl-minimax-order", + object: "chat.completion.chunk" as const, + choices: [ + { + index: 0, + delta: { + reasoning_details: [ + { type: "response.output_text", text: "Visible first." }, + { type: "reasoning.text", text: " Hidden second." }, + { type: "response.text", text: " Visible third." }, + ], + } as Record, + logprobs: null, + finish_reason: "stop" as const, + }, + ], + }, + ] as const; + + async function* mockStream() { + for (const chunk of mockChunks) { + yield chunk as never; + } + } + + await __testing.processOpenAICompletionsStream(mockStream(), output, model, stream); + + expect(output.content).toMatchObject([ + { type: "text", text: "Visible first." }, + { + type: "thinking", + thinking: " Hidden second.", + thinkingSignature: "reasoning_details", + }, + { type: "text", text: " Visible third." }, + ]); + }); + + it("does not duplicate fallback reasoning fields when reasoning_details already provided thinking", async () => { + const model = { + id: "openrouter/minimax/minimax-m2.7", + name: "MiniMax M2.7", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 200000, + maxTokens: 8192, + } satisfies Model<"openai-completions">; + + const output = { + role: "assistant" as const, + content: [], + api: model.api, + provider: model.provider, + model: model.id, + usage: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + totalTokens: 0, + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, + }, + stopReason: "stop", + timestamp: Date.now(), + }; + + const stream: { push(event: unknown): void } = { push() {} }; + + const mockChunks = [ + { + id: "chatcmpl-fallback-dup", + object: "chat.completion.chunk" as const, + choices: [ + { + index: 0, + delta: { + reasoning_details: [{ type: "reasoning.text", text: "Primary reasoning." }], + reasoning: "Duplicate fallback reasoning.", + } as Record, + logprobs: null, + finish_reason: "stop" as const, + }, + ], + }, + ] as const; + + async function* mockStream() { + for (const chunk of mockChunks) { + yield chunk as never; + } + } + + await __testing.processOpenAICompletionsStream(mockStream(), output, model, stream); + + expect(output.content).toMatchObject([ + { + type: "thinking", + thinking: "Primary reasoning.", + thinkingSignature: "reasoning_details", + }, + ]); + }); + + it("keeps fallback thinking when reasoning_details only carries visible text", async () => { + const model = { + id: "openrouter/minimax/minimax-m2.7", + name: "MiniMax M2.7", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 200000, + maxTokens: 8192, + } satisfies Model<"openai-completions">; + + const output = { + role: "assistant" as const, + content: [], + api: model.api, + provider: model.provider, + model: model.id, + usage: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + totalTokens: 0, + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, + }, + stopReason: "stop", + timestamp: Date.now(), + }; + + const stream: { push(event: unknown): void } = { push() {} }; + + const mockChunks = [ + { + id: "chatcmpl-visible-fallback", + object: "chat.completion.chunk" as const, + choices: [ + { + index: 0, + delta: { + reasoning_details: [{ type: "response.output_text", text: "Visible answer." }], + reasoning: "Hidden fallback reasoning.", + } as Record, + logprobs: null, + finish_reason: "stop" as const, + }, + ], + }, + ] as const; + + async function* mockStream() { + for (const chunk of mockChunks) { + yield chunk as never; + } + } + + await __testing.processOpenAICompletionsStream(mockStream(), output, model, stream); + + expect(output.content).toMatchObject([ + { type: "text", text: "Visible answer." }, + { + type: "thinking", + thinking: "Hidden fallback reasoning.", + thinkingSignature: "reasoning", + }, + ]); + }); + + it("keeps a streaming tool call intact when visible reasoning text arrives mid-call", async () => { + const model = { + id: "openrouter/minimax/minimax-m2.7", + name: "MiniMax M2.7", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 200000, + maxTokens: 8192, + } satisfies Model<"openai-completions">; + + const output = { + role: "assistant" as const, + content: [], + api: model.api, + provider: model.provider, + model: model.id, + usage: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + totalTokens: 0, + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, + }, + stopReason: "stop", + timestamp: Date.now(), + }; + + const stream: { push(event: unknown): void } = { push() {} }; + + const mockChunks = [ + { + id: "chatcmpl-tool-split", + object: "chat.completion.chunk" as const, + choices: [ + { + index: 0, + delta: { + tool_calls: [ + { + id: "call_1", + type: "function" as const, + function: { name: "lookup", arguments: '{"query":' }, + }, + ], + } as Record, + logprobs: null, + finish_reason: null, + }, + ], + }, + { + id: "chatcmpl-tool-split", + object: "chat.completion.chunk" as const, + choices: [ + { + index: 0, + delta: { + reasoning_details: [{ type: "response.output_text", text: "Working on it." }], + tool_calls: [ + { + id: "call_1", + type: "function" as const, + function: { arguments: '"weather"}' }, + }, + ], + } as Record, + logprobs: null, + finish_reason: null, + }, + ], + }, + { + id: "chatcmpl-tool-split", + object: "chat.completion.chunk" as const, + choices: [ + { + index: 0, + delta: {}, + logprobs: null, + finish_reason: "tool_calls" as const, + }, + ], + }, + ] as const; + + async function* mockStream() { + for (const chunk of mockChunks) { + yield chunk as never; + } + } + + await __testing.processOpenAICompletionsStream(mockStream(), output, model, stream); + + expect(output.stopReason).toBe("toolUse"); + expect(output.content).toMatchObject([ + { type: "toolCall", id: "call_1", name: "lookup", arguments: { query: "weather" } }, + { type: "text", text: "Working on it." }, + ]); + }); + + it("keeps a streaming tool call intact when visible reasoning text arrives between chunks", async () => { + const model = { + id: "openrouter/minimax/minimax-m2.7", + name: "MiniMax M2.7", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 200000, + maxTokens: 8192, + } satisfies Model<"openai-completions">; + + const output = { + role: "assistant" as const, + content: [], + api: model.api, + provider: model.provider, + model: model.id, + usage: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + totalTokens: 0, + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, + }, + stopReason: "stop", + timestamp: Date.now(), + }; + + const stream: { push(event: unknown): void } = { push() {} }; + + const mockChunks = [ + { + id: "chatcmpl-tool-split-gap", + object: "chat.completion.chunk" as const, + choices: [ + { + index: 0, + delta: { + tool_calls: [ + { + id: "call_1", + type: "function" as const, + function: { name: "lookup", arguments: '{"query":' }, + }, + ], + } as Record, + logprobs: null, + finish_reason: null, + }, + ], + }, + { + id: "chatcmpl-tool-split-gap", + object: "chat.completion.chunk" as const, + choices: [ + { + index: 0, + delta: { + reasoning_details: [{ type: "response.output_text", text: "Working on it." }], + } as Record, + logprobs: null, + finish_reason: null, + }, + ], + }, + { + id: "chatcmpl-tool-split-gap", + object: "chat.completion.chunk" as const, + choices: [ + { + index: 0, + delta: { + tool_calls: [ + { + id: "call_1", + type: "function" as const, + function: { arguments: '"weather"}' }, + }, + ], + } as Record, + logprobs: null, + finish_reason: null, + }, + ], + }, + { + id: "chatcmpl-tool-split-gap", + object: "chat.completion.chunk" as const, + choices: [ + { + index: 0, + delta: {}, + logprobs: null, + finish_reason: "tool_calls" as const, + }, + ], + }, + ] as const; + + async function* mockStream() { + for (const chunk of mockChunks) { + yield chunk as never; + } + } + + await __testing.processOpenAICompletionsStream(mockStream(), output, model, stream); + + expect(output.stopReason).toBe("toolUse"); + expect(output.content).toMatchObject([ + { type: "toolCall", id: "call_1", name: "lookup", arguments: { query: "weather" } }, + { type: "text", text: "Working on it." }, + ]); + }); + + it("fails fast when post-tool-call buffering grows beyond the safety cap", async () => { + const model = { + id: "openrouter/minimax/minimax-m2.7", + name: "MiniMax M2.7", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 200000, + maxTokens: 8192, + } satisfies Model<"openai-completions">; + + const output = { + role: "assistant" as const, + content: [], + api: model.api, + provider: model.provider, + model: model.id, + usage: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + totalTokens: 0, + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, + }, + stopReason: "stop", + timestamp: Date.now(), + }; + + const stream: { push(event: unknown): void } = { push() {} }; + const oversizedText = "x".repeat(300_000); + + const mockChunks = [ + { + id: "chatcmpl-tool-buffer-cap", + object: "chat.completion.chunk" as const, + choices: [ + { + index: 0, + delta: { + tool_calls: [ + { + id: "call_1", + type: "function" as const, + function: { name: "lookup", arguments: '{"query":' }, + }, + ], + } as Record, + logprobs: null, + finish_reason: null, + }, + ], + }, + { + id: "chatcmpl-tool-buffer-cap", + object: "chat.completion.chunk" as const, + choices: [ + { + index: 0, + delta: { + content: oversizedText, + } as Record, + logprobs: null, + finish_reason: null, + }, + ], + }, + ] as const; + + async function* mockStream() { + for (const chunk of mockChunks) { + yield chunk as never; + } + } + + await expect( + __testing.processOpenAICompletionsStream(mockStream(), output, model, stream), + ).rejects.toThrow("Exceeded post-tool-call delta buffer limit"); + }); + + it("fails fast when streaming tool-call arguments grow beyond the safety cap", async () => { + const model = { + id: "openrouter/minimax/minimax-m2.7", + name: "MiniMax M2.7", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 200000, + maxTokens: 8192, + } satisfies Model<"openai-completions">; + + const output = { + role: "assistant" as const, + content: [], + api: model.api, + provider: model.provider, + model: model.id, + usage: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + totalTokens: 0, + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, + }, + stopReason: "stop", + timestamp: Date.now(), + }; + + const stream: { push(event: unknown): void } = { push() {} }; + const oversizedArgs = `"${"x".repeat(300_000)}"}`; + + const mockChunks = [ + { + id: "chatcmpl-tool-arg-cap", + object: "chat.completion.chunk" as const, + choices: [ + { + index: 0, + delta: { + tool_calls: [ + { + id: "call_1", + type: "function" as const, + function: { name: "lookup", arguments: `{${oversizedArgs}` }, + }, + ], + } as Record, + logprobs: null, + finish_reason: null, + }, + ], + }, + ] as const; + + async function* mockStream() { + for (const chunk of mockChunks) { + yield chunk as never; + } + } + + await expect( + __testing.processOpenAICompletionsStream(mockStream(), output, model, stream), + ).rejects.toThrow("Exceeded tool-call argument buffer limit"); + }); }); diff --git a/src/agents/openai-transport-stream.ts b/src/agents/openai-transport-stream.ts index 64c5946325f..baaa25c9401 100644 --- a/src/agents/openai-transport-stream.ts +++ b/src/agents/openai-transport-stream.ts @@ -1042,6 +1042,9 @@ async function processOpenAICompletionsStream( model: Model, stream: { push(event: unknown): void }, ) { + const MAX_POST_TOOL_CALL_BUFFER_BYTES = 256_000; + const MAX_TOOL_CALL_ARGUMENT_BUFFER_BYTES = 256_000; + const compat = getCompat(model as OpenAIModeModel); let currentBlock: | { type: "text"; text: string } | { type: "thinking"; thinking: string; thinkingSignature?: string } @@ -1053,8 +1056,12 @@ async function processOpenAICompletionsStream( partialArgs: string; } | null = null; - let pendingThinkingDelta: { signature: string; text: string } | null = null; + let pendingPostToolCallDeltas: CompletionsReasoningDelta[] = []; + let pendingPostToolCallBytes = 0; + let currentToolCallArgumentBytes = 0; + let isFlushingPendingPostToolCallDeltas = false; const blockIndex = () => output.content.length - 1; + const measureUtf8Bytes = (text: string) => Buffer.byteLength(text, "utf8"); const finishCurrentBlock = () => { if (!currentBlock) { return; @@ -1068,7 +1075,28 @@ async function processOpenAICompletionsStream( output.content[blockIndex()] = completed; } }; - const appendThinkingDelta = (reasoningDelta: { signature: string; text: string }) => { + const queuePostToolCallDelta = (next: CompletionsReasoningDelta) => { + const nextBytes = measureUtf8Bytes(next.text); + if (pendingPostToolCallBytes + nextBytes > MAX_POST_TOOL_CALL_BUFFER_BYTES) { + throw new Error("Exceeded post-tool-call delta buffer limit"); + } + pendingPostToolCallBytes += nextBytes; + const previous = pendingPostToolCallDeltas[pendingPostToolCallDeltas.length - 1]; + if (!previous || previous.kind !== next.kind) { + pendingPostToolCallDeltas.push(next); + return; + } + if (next.kind === "thinking" && previous.kind === "thinking") { + if (previous.signature !== next.signature) { + pendingPostToolCallDeltas.push(next); + return; + } + previous.text += next.text; + return; + } + previous.text += next.text; + }; + const appendThinkingDeltaInternal = (reasoningDelta: { signature: string; text: string }) => { if (!currentBlock || currentBlock.type !== "thinking") { finishCurrentBlock(); currentBlock = { @@ -1087,13 +1115,49 @@ async function processOpenAICompletionsStream( partial: output, }); }; - const flushPendingThinkingDelta = () => { - if (!pendingThinkingDelta) { + const appendTextDeltaInternal = (text: string) => { + if (!currentBlock || currentBlock.type !== "text") { + finishCurrentBlock(); + currentBlock = { type: "text", text: "" }; + output.content.push(currentBlock); + stream.push({ type: "text_start", contentIndex: blockIndex(), partial: output }); + } + currentBlock.text += text; + stream.push({ + type: "text_delta", + contentIndex: blockIndex(), + delta: text, + partial: output, + }); + }; + const flushPendingPostToolCallDeltas = () => { + if ( + isFlushingPendingPostToolCallDeltas || + currentBlock?.type === "toolCall" || + pendingPostToolCallDeltas.length === 0 + ) { return; } - const bufferedDelta = pendingThinkingDelta; - pendingThinkingDelta = null; - appendThinkingDelta(bufferedDelta); + isFlushingPendingPostToolCallDeltas = true; + const bufferedDeltas = pendingPostToolCallDeltas; + pendingPostToolCallDeltas = []; + pendingPostToolCallBytes = 0; + for (const delta of bufferedDeltas) { + if (delta.kind === "text") { + appendTextDeltaInternal(delta.text); + } else { + appendThinkingDeltaInternal(delta); + } + } + isFlushingPendingPostToolCallDeltas = false; + }; + const appendThinkingDelta = (reasoningDelta: { signature: string; text: string }) => { + flushPendingPostToolCallDeltas(); + appendThinkingDeltaInternal(reasoningDelta); + }; + const appendTextDelta = (text: string) => { + flushPendingPostToolCallDeltas(); + appendTextDeltaInternal(text); }; for await (const chunk of responseStream) { output.responseId ||= chunk.id; @@ -1119,30 +1183,24 @@ async function processOpenAICompletionsStream( continue; } if (choice.delta.content) { - flushPendingThinkingDelta(); - if (!currentBlock || currentBlock.type !== "text") { - finishCurrentBlock(); - currentBlock = { type: "text", text: "" }; - output.content.push(currentBlock); - stream.push({ type: "text_start", contentIndex: blockIndex(), partial: output }); + if (currentBlock?.type === "toolCall") { + queuePostToolCallDelta({ kind: "text", text: choice.delta.content }); + } else { + appendTextDelta(choice.delta.content); } - currentBlock.text += choice.delta.content; - stream.push({ - type: "text_delta", - contentIndex: blockIndex(), - delta: choice.delta.content, - partial: output, - }); continue; } - const reasoningDelta = getCompletionsReasoningDelta(choice.delta as Record); - if (reasoningDelta) { + const reasoningDeltas = getCompletionsReasoningDeltas( + choice.delta as Record, + compat.visibleReasoningDetailTypes, + ); + for (const reasoningDelta of reasoningDeltas) { if (currentBlock?.type === "toolCall") { - if (!pendingThinkingDelta) { - pendingThinkingDelta = { ...reasoningDelta }; - } else { - pendingThinkingDelta.text += reasoningDelta.text; - } + queuePostToolCallDelta({ ...reasoningDelta }); + continue; + } + if (reasoningDelta.kind === "text") { + appendTextDelta(reasoningDelta.text); } else { appendThinkingDelta(reasoningDelta); } @@ -1154,7 +1212,12 @@ async function processOpenAICompletionsStream( currentBlock.type !== "toolCall" || (toolCall.id && currentBlock.id !== toolCall.id) ) { + const switchingToolCall = currentBlock?.type === "toolCall"; finishCurrentBlock(); + if (switchingToolCall) { + currentBlock = null; + flushPendingPostToolCallDeltas(); + } currentBlock = { type: "toolCall", id: toolCall.id || "", @@ -1162,6 +1225,7 @@ async function processOpenAICompletionsStream( arguments: {}, partialArgs: "", }; + currentToolCallArgumentBytes = 0; output.content.push(currentBlock); stream.push({ type: "toolcall_start", contentIndex: blockIndex(), partial: output }); } @@ -1175,6 +1239,14 @@ async function processOpenAICompletionsStream( currentBlock.name = toolCall.function.name; } if (toolCall.function?.arguments) { + const nextArgumentBytes = measureUtf8Bytes(toolCall.function.arguments); + if ( + currentToolCallArgumentBytes + nextArgumentBytes > + MAX_TOOL_CALL_ARGUMENT_BUFFER_BYTES + ) { + throw new Error("Exceeded tool-call argument buffer limit"); + } + currentToolCallArgumentBytes += nextArgumentBytes; currentBlock.partialArgs += toolCall.function.arguments; currentBlock.arguments = parseStreamingJson(currentBlock.partialArgs); stream.push({ @@ -1186,40 +1258,81 @@ async function processOpenAICompletionsStream( } } } + flushPendingPostToolCallDeltas(); } finishCurrentBlock(); - flushPendingThinkingDelta(); + if (currentBlock?.type === "toolCall") { + currentBlock = null; + } + flushPendingPostToolCallDeltas(); const hasToolCalls = output.content.some((block) => block.type === "toolCall"); if (output.stopReason === "toolUse" && !hasToolCalls) { output.stopReason = "stop"; } } -function getCompletionsReasoningDelta(delta: Record): { - signature: string; - text: string; -} | null { +type CompletionsReasoningDelta = + | { + kind: "thinking"; + signature: string; + text: string; + } + | { + kind: "text"; + text: string; + }; + +function getCompletionsReasoningDeltas( + delta: Record, + visibleReasoningDetailTypes: readonly string[], +): CompletionsReasoningDelta[] { + const output: CompletionsReasoningDelta[] = []; + const pushDelta = (next: CompletionsReasoningDelta) => { + const previous = output[output.length - 1]; + if (!previous || previous.kind !== next.kind) { + output.push(next); + return; + } + if (next.kind === "thinking" && previous.kind === "thinking") { + if (previous.signature !== next.signature) { + output.push(next); + return; + } + previous.text += next.text; + return; + } + previous.text += next.text; + }; const reasoningDetails = delta.reasoning_details; + let usedReasoningThinkingDetails = false; if (Array.isArray(reasoningDetails)) { - let text = ""; + const visibleTypes = new Set(visibleReasoningDetailTypes); for (const item of reasoningDetails) { const detail = item as { type?: unknown; text?: unknown }; - if (detail.type === "reasoning.text" && typeof detail.text === "string" && detail.text) { - text += detail.text; + if (typeof detail.text !== "string" || !detail.text) { + continue; + } + if (detail.type === "reasoning.text") { + usedReasoningThinkingDetails = true; + pushDelta({ kind: "thinking", signature: "reasoning_details", text: detail.text }); + continue; + } + if (typeof detail.type === "string" && visibleTypes.has(detail.type)) { + pushDelta({ kind: "text", text: detail.text }); } } - if (text) { - return { signature: "reasoning_details", text }; + } + if (!usedReasoningThinkingDetails) { + const reasoningFields = ["reasoning_content", "reasoning", "reasoning_text"] as const; + for (const field of reasoningFields) { + const value = delta[field]; + if (typeof value === "string" && value.length > 0) { + pushDelta({ kind: "thinking", signature: field, text: value }); + break; + } } } - const reasoningFields = ["reasoning_content", "reasoning", "reasoning_text"] as const; - for (const field of reasoningFields) { - const value = delta[field]; - if (typeof value === "string" && value.length > 0) { - return { signature: field, text: value }; - } - } - return null; + return output; } function detectCompat(model: OpenAIModeModel) { @@ -1249,6 +1362,7 @@ function detectCompat(model: OpenAIModeModel) { requiresAssistantAfterToolResult: false, requiresThinkingAsText: false, thinkingFormat: compatDefaults.thinkingFormat, + visibleReasoningDetailTypes: compatDefaults.visibleReasoningDetailTypes, openRouterRouting: {}, vercelGatewayRouting: {}, supportsStrictMode: compatDefaults.supportsStrictMode, @@ -1270,6 +1384,7 @@ function getCompat(model: OpenAIModeModel): { vercelGatewayRouting: Record; supportsStrictMode: boolean; requiresStringContent: boolean; + visibleReasoningDetailTypes: string[]; } { const detected = detectCompat(model); const compat = model.compat ?? {}; @@ -1303,6 +1418,9 @@ function getCompat(model: OpenAIModeModel): { supportsStrictMode: (compat.supportsStrictMode as boolean | undefined) ?? detected.supportsStrictMode, requiresStringContent: (compat.requiresStringContent as boolean | undefined) ?? false, + visibleReasoningDetailTypes: + (compat.visibleReasoningDetailTypes as string[] | undefined) ?? + detected.visibleReasoningDetailTypes, }; } diff --git a/src/config/schema.base.generated.ts b/src/config/schema.base.generated.ts index 6c5cfd4d5ce..6d5bd724463 100644 --- a/src/config/schema.base.generated.ts +++ b/src/config/schema.base.generated.ts @@ -2819,6 +2819,13 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = { requiresStringContent: { type: "boolean", }, + visibleReasoningDetailTypes: { + type: "array", + items: { + type: "string", + minLength: 1, + }, + }, maxTokensField: { anyOf: [ { diff --git a/src/config/types.models.ts b/src/config/types.models.ts index 5a5d52cfab5..f4a7507e44d 100644 --- a/src/config/types.models.ts +++ b/src/config/types.models.ts @@ -36,6 +36,7 @@ type SupportedThinkingFormat = export type ModelCompatConfig = SupportedOpenAICompatFields & { thinkingFormat?: SupportedThinkingFormat; + visibleReasoningDetailTypes?: string[]; supportsTools?: boolean; supportsPromptCacheKey?: boolean; requiresStringContent?: boolean; diff --git a/src/config/zod-schema.core.ts b/src/config/zod-schema.core.ts index 68280d7c6ed..d284e0f7af1 100644 --- a/src/config/zod-schema.core.ts +++ b/src/config/zod-schema.core.ts @@ -193,6 +193,7 @@ export const ModelCompatSchema = z supportsTools: z.boolean().optional(), supportsStrictMode: z.boolean().optional(), requiresStringContent: z.boolean().optional(), + visibleReasoningDetailTypes: z.array(z.string().min(1)).optional(), maxTokensField: z .union([z.literal("max_completion_tokens"), z.literal("max_tokens")]) .optional(),