fix(openrouter): parse visible reasoning_details output (#68577)

* fix(openrouter): parse visible reasoning_details output

* fix(openrouter): preserve reasoning_details ordering

* fix(openrouter): harden reasoning details compat

* fix(openrouter): queue post-tool-call reasoning text

* chore(config): refresh generated schema baselines

* fix(openrouter): keep fallback reasoning with visible details

* fix(openrouter): bound streaming tool-call buffers
This commit is contained in:
Vincent Koc
2026-04-18 08:18:13 -07:00
committed by GitHub
parent 66385670e4
commit 68502c90d1
8 changed files with 943 additions and 47 deletions

View File

@@ -170,6 +170,9 @@ Docs: https://docs.openclaw.ai
- Webchat/security: reject remote-host `file://` URLs in the media embedding path. (#67293) Thanks @pgondhi987.
- Dreaming/memory-core: use the ingestion day, not the source file day, for daily recall dedupe so repeat sweeps of the same daily note can increment `dailyCount` across days instead of stalling at `1`. (#67091) Thanks @Bartok9.
- Node-host/tools.exec: let approval binding distinguish known native binaries from mutable shell payload files, while still fail-closing unknown or racy file probes so absolute-path node-host commands like `/usr/bin/whoami` no longer get rejected as unsafe interpreter/runtime commands. (#66731) Thanks @tmimmanuel.
- Codex/gateway: fix gateway crash when the codex-acp subprocess terminates abruptly; an unhandled EPIPE on the child stdin stream now routes through graceful client shutdown, rejecting pending requests instead of propagating as an uncaught exception that crashes the entire gateway daemon and all connected channels. Fixes #67886. (#67947) thanks @openperf
- Slack/streaming: resolve native streaming recipient teams from the inbound user when available, with a monitor-team fallback, so DM and shared-workspace streams target the right recipient more reliably.
- OpenRouter/streaming: treat `reasoning_details.response.output_text` and `reasoning_details.response.text` as visible assistant output on OpenRouter-compatible completions streams, while keeping `reasoning.text` hidden and refusing to surface ambiguous bare `text` items by default so visible replies, thinking blocks, and tool calls can coexist in the same chunk. (#67410) Thanks @neeravmakwana.
## 2026.4.14

View File

@@ -1,4 +1,4 @@
c69926b2da5cb8a329e28025506e4bca026888e9848fff930d09345a3aaa7283 config-baseline.json
85cc004ee1628ab2096af61c640d514ef68601bd4eaa1fceaacd91d20baf9833 config-baseline.core.json
5ce9d439f8cf84fc9d20c93436fea6492bdab0e84e9e51867648343e0375b670 config-baseline.json
10b7c57a6198526b846471e1bcda6e361c1f3db2e3b1cd24abd8bac11db56e16 config-baseline.core.json
99bb34fcf83ba6bb50a3fc11f170bd379bee5728b0938707fc39ebd7638e12eb config-baseline.channel.json
b695cb31b4c0cf1d31f842f2892e99cc3ff8d84263ae72b72977cae844b81d6e config-baseline.plugin.json

View File

@@ -17,6 +17,7 @@ export type OpenAICompletionsCompatDefaults = {
supportsUsageInStreaming: boolean;
maxTokensField: "max_completion_tokens" | "max_tokens";
thinkingFormat: "openai" | "openrouter" | "zai";
visibleReasoningDetailTypes: string[];
supportsStrictMode: boolean;
};
@@ -82,6 +83,7 @@ export function resolveOpenAICompletionsCompatDefaults(
(!isNonStandard && (!usesConfiguredNonOpenAIEndpoint || supportsNativeStreamingUsageCompat)),
maxTokensField: usesMaxTokens ? "max_tokens" : "max_completion_tokens",
thinkingFormat: isZai ? "zai" : isOpenRouterLike ? "openrouter" : "openai",
visibleReasoningDetailTypes: isOpenRouterLike ? ["response.output_text", "response.text"] : [],
supportsStrictMode: !isZai && !usesConfiguredNonOpenAIEndpoint,
};
}

View File

@@ -2017,4 +2017,768 @@ describe("openai transport stream", () => {
{ type: "thinking", thinking: " Still thinking.", thinkingSignature: "reasoning_details" },
]);
});
it("surfaces visible OpenRouter response text from reasoning_details without dropping tools", async () => {
const model = {
id: "openrouter/minimax/minimax-m2.7",
name: "MiniMax M2.7",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: true,
input: ["text"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 200000,
maxTokens: 8192,
} satisfies Model<"openai-completions">;
const output = {
role: "assistant" as const,
content: [],
api: model.api,
provider: model.provider,
model: model.id,
usage: {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
totalTokens: 0,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
},
stopReason: "stop",
timestamp: Date.now(),
};
const stream: { push(event: unknown): void } = { push() {} };
const mockChunks = [
{
id: "chatcmpl-minimax",
object: "chat.completion.chunk" as const,
choices: [
{
index: 0,
delta: {
reasoning_details: [
{ type: "reasoning.text", text: "Need to look something up." },
{ type: "response.output_text", text: "Working on it." },
],
tool_calls: [
{
id: "call_1",
type: "function" as const,
function: { name: "lookup", arguments: '{"query":"weather"}' },
},
],
} as Record<string, unknown>,
logprobs: null,
finish_reason: null,
},
],
},
{
id: "chatcmpl-minimax",
object: "chat.completion.chunk" as const,
choices: [
{
index: 0,
delta: {},
logprobs: null,
finish_reason: "tool_calls" as const,
},
],
},
] as const;
async function* mockStream() {
for (const chunk of mockChunks) {
yield chunk as never;
}
}
await __testing.processOpenAICompletionsStream(mockStream(), output, model, stream);
expect(output.stopReason).toBe("toolUse");
expect(output.content).toMatchObject([
{
type: "thinking",
thinking: "Need to look something up.",
thinkingSignature: "reasoning_details",
},
{ type: "text", text: "Working on it." },
{ type: "toolCall", id: "call_1", name: "lookup", arguments: { query: "weather" } },
]);
});
it("does not surface ambiguous reasoning_details text without explicit compat opt-in", async () => {
const model = {
id: "openrouter/x-ai/grok-4",
name: "Grok 4",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: true,
input: ["text"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 200000,
maxTokens: 8192,
} satisfies Model<"openai-completions">;
const output = {
role: "assistant" as const,
content: [],
api: model.api,
provider: model.provider,
model: model.id,
usage: {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
totalTokens: 0,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
},
stopReason: "stop",
timestamp: Date.now(),
};
const stream: { push(event: unknown): void } = { push() {} };
const mockChunks = [
{
id: "chatcmpl-grok",
object: "chat.completion.chunk" as const,
choices: [
{
index: 0,
delta: {
reasoning_details: [
{ type: "reasoning.text", text: "Internal thought." },
{ type: "text", text: "Do not leak this by default." },
],
} as Record<string, unknown>,
logprobs: null,
finish_reason: null,
},
],
},
{
id: "chatcmpl-grok",
object: "chat.completion.chunk" as const,
choices: [
{
index: 0,
delta: {},
logprobs: null,
finish_reason: "stop" as const,
},
],
},
] as const;
async function* mockStream() {
for (const chunk of mockChunks) {
yield chunk as never;
}
}
await __testing.processOpenAICompletionsStream(mockStream(), output, model, stream);
expect(output.content).toMatchObject([
{
type: "thinking",
thinking: "Internal thought.",
thinkingSignature: "reasoning_details",
},
]);
});
it("preserves reasoning_details item order when visible text and thinking are interleaved", async () => {
const model = {
id: "openrouter/minimax/minimax-m2.7",
name: "MiniMax M2.7",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: true,
input: ["text"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 200000,
maxTokens: 8192,
} satisfies Model<"openai-completions">;
const output = {
role: "assistant" as const,
content: [],
api: model.api,
provider: model.provider,
model: model.id,
usage: {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
totalTokens: 0,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
},
stopReason: "stop",
timestamp: Date.now(),
};
const stream: { push(event: unknown): void } = { push() {} };
const mockChunks = [
{
id: "chatcmpl-minimax-order",
object: "chat.completion.chunk" as const,
choices: [
{
index: 0,
delta: {
reasoning_details: [
{ type: "response.output_text", text: "Visible first." },
{ type: "reasoning.text", text: " Hidden second." },
{ type: "response.text", text: " Visible third." },
],
} as Record<string, unknown>,
logprobs: null,
finish_reason: "stop" as const,
},
],
},
] as const;
async function* mockStream() {
for (const chunk of mockChunks) {
yield chunk as never;
}
}
await __testing.processOpenAICompletionsStream(mockStream(), output, model, stream);
expect(output.content).toMatchObject([
{ type: "text", text: "Visible first." },
{
type: "thinking",
thinking: " Hidden second.",
thinkingSignature: "reasoning_details",
},
{ type: "text", text: " Visible third." },
]);
});
it("does not duplicate fallback reasoning fields when reasoning_details already provided thinking", async () => {
const model = {
id: "openrouter/minimax/minimax-m2.7",
name: "MiniMax M2.7",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: true,
input: ["text"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 200000,
maxTokens: 8192,
} satisfies Model<"openai-completions">;
const output = {
role: "assistant" as const,
content: [],
api: model.api,
provider: model.provider,
model: model.id,
usage: {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
totalTokens: 0,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
},
stopReason: "stop",
timestamp: Date.now(),
};
const stream: { push(event: unknown): void } = { push() {} };
const mockChunks = [
{
id: "chatcmpl-fallback-dup",
object: "chat.completion.chunk" as const,
choices: [
{
index: 0,
delta: {
reasoning_details: [{ type: "reasoning.text", text: "Primary reasoning." }],
reasoning: "Duplicate fallback reasoning.",
} as Record<string, unknown>,
logprobs: null,
finish_reason: "stop" as const,
},
],
},
] as const;
async function* mockStream() {
for (const chunk of mockChunks) {
yield chunk as never;
}
}
await __testing.processOpenAICompletionsStream(mockStream(), output, model, stream);
expect(output.content).toMatchObject([
{
type: "thinking",
thinking: "Primary reasoning.",
thinkingSignature: "reasoning_details",
},
]);
});
it("keeps fallback thinking when reasoning_details only carries visible text", async () => {
const model = {
id: "openrouter/minimax/minimax-m2.7",
name: "MiniMax M2.7",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: true,
input: ["text"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 200000,
maxTokens: 8192,
} satisfies Model<"openai-completions">;
const output = {
role: "assistant" as const,
content: [],
api: model.api,
provider: model.provider,
model: model.id,
usage: {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
totalTokens: 0,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
},
stopReason: "stop",
timestamp: Date.now(),
};
const stream: { push(event: unknown): void } = { push() {} };
const mockChunks = [
{
id: "chatcmpl-visible-fallback",
object: "chat.completion.chunk" as const,
choices: [
{
index: 0,
delta: {
reasoning_details: [{ type: "response.output_text", text: "Visible answer." }],
reasoning: "Hidden fallback reasoning.",
} as Record<string, unknown>,
logprobs: null,
finish_reason: "stop" as const,
},
],
},
] as const;
async function* mockStream() {
for (const chunk of mockChunks) {
yield chunk as never;
}
}
await __testing.processOpenAICompletionsStream(mockStream(), output, model, stream);
expect(output.content).toMatchObject([
{ type: "text", text: "Visible answer." },
{
type: "thinking",
thinking: "Hidden fallback reasoning.",
thinkingSignature: "reasoning",
},
]);
});
it("keeps a streaming tool call intact when visible reasoning text arrives mid-call", async () => {
const model = {
id: "openrouter/minimax/minimax-m2.7",
name: "MiniMax M2.7",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: true,
input: ["text"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 200000,
maxTokens: 8192,
} satisfies Model<"openai-completions">;
const output = {
role: "assistant" as const,
content: [],
api: model.api,
provider: model.provider,
model: model.id,
usage: {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
totalTokens: 0,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
},
stopReason: "stop",
timestamp: Date.now(),
};
const stream: { push(event: unknown): void } = { push() {} };
const mockChunks = [
{
id: "chatcmpl-tool-split",
object: "chat.completion.chunk" as const,
choices: [
{
index: 0,
delta: {
tool_calls: [
{
id: "call_1",
type: "function" as const,
function: { name: "lookup", arguments: '{"query":' },
},
],
} as Record<string, unknown>,
logprobs: null,
finish_reason: null,
},
],
},
{
id: "chatcmpl-tool-split",
object: "chat.completion.chunk" as const,
choices: [
{
index: 0,
delta: {
reasoning_details: [{ type: "response.output_text", text: "Working on it." }],
tool_calls: [
{
id: "call_1",
type: "function" as const,
function: { arguments: '"weather"}' },
},
],
} as Record<string, unknown>,
logprobs: null,
finish_reason: null,
},
],
},
{
id: "chatcmpl-tool-split",
object: "chat.completion.chunk" as const,
choices: [
{
index: 0,
delta: {},
logprobs: null,
finish_reason: "tool_calls" as const,
},
],
},
] as const;
async function* mockStream() {
for (const chunk of mockChunks) {
yield chunk as never;
}
}
await __testing.processOpenAICompletionsStream(mockStream(), output, model, stream);
expect(output.stopReason).toBe("toolUse");
expect(output.content).toMatchObject([
{ type: "toolCall", id: "call_1", name: "lookup", arguments: { query: "weather" } },
{ type: "text", text: "Working on it." },
]);
});
it("keeps a streaming tool call intact when visible reasoning text arrives between chunks", async () => {
const model = {
id: "openrouter/minimax/minimax-m2.7",
name: "MiniMax M2.7",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: true,
input: ["text"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 200000,
maxTokens: 8192,
} satisfies Model<"openai-completions">;
const output = {
role: "assistant" as const,
content: [],
api: model.api,
provider: model.provider,
model: model.id,
usage: {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
totalTokens: 0,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
},
stopReason: "stop",
timestamp: Date.now(),
};
const stream: { push(event: unknown): void } = { push() {} };
const mockChunks = [
{
id: "chatcmpl-tool-split-gap",
object: "chat.completion.chunk" as const,
choices: [
{
index: 0,
delta: {
tool_calls: [
{
id: "call_1",
type: "function" as const,
function: { name: "lookup", arguments: '{"query":' },
},
],
} as Record<string, unknown>,
logprobs: null,
finish_reason: null,
},
],
},
{
id: "chatcmpl-tool-split-gap",
object: "chat.completion.chunk" as const,
choices: [
{
index: 0,
delta: {
reasoning_details: [{ type: "response.output_text", text: "Working on it." }],
} as Record<string, unknown>,
logprobs: null,
finish_reason: null,
},
],
},
{
id: "chatcmpl-tool-split-gap",
object: "chat.completion.chunk" as const,
choices: [
{
index: 0,
delta: {
tool_calls: [
{
id: "call_1",
type: "function" as const,
function: { arguments: '"weather"}' },
},
],
} as Record<string, unknown>,
logprobs: null,
finish_reason: null,
},
],
},
{
id: "chatcmpl-tool-split-gap",
object: "chat.completion.chunk" as const,
choices: [
{
index: 0,
delta: {},
logprobs: null,
finish_reason: "tool_calls" as const,
},
],
},
] as const;
async function* mockStream() {
for (const chunk of mockChunks) {
yield chunk as never;
}
}
await __testing.processOpenAICompletionsStream(mockStream(), output, model, stream);
expect(output.stopReason).toBe("toolUse");
expect(output.content).toMatchObject([
{ type: "toolCall", id: "call_1", name: "lookup", arguments: { query: "weather" } },
{ type: "text", text: "Working on it." },
]);
});
it("fails fast when post-tool-call buffering grows beyond the safety cap", async () => {
const model = {
id: "openrouter/minimax/minimax-m2.7",
name: "MiniMax M2.7",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: true,
input: ["text"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 200000,
maxTokens: 8192,
} satisfies Model<"openai-completions">;
const output = {
role: "assistant" as const,
content: [],
api: model.api,
provider: model.provider,
model: model.id,
usage: {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
totalTokens: 0,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
},
stopReason: "stop",
timestamp: Date.now(),
};
const stream: { push(event: unknown): void } = { push() {} };
const oversizedText = "x".repeat(300_000);
const mockChunks = [
{
id: "chatcmpl-tool-buffer-cap",
object: "chat.completion.chunk" as const,
choices: [
{
index: 0,
delta: {
tool_calls: [
{
id: "call_1",
type: "function" as const,
function: { name: "lookup", arguments: '{"query":' },
},
],
} as Record<string, unknown>,
logprobs: null,
finish_reason: null,
},
],
},
{
id: "chatcmpl-tool-buffer-cap",
object: "chat.completion.chunk" as const,
choices: [
{
index: 0,
delta: {
content: oversizedText,
} as Record<string, unknown>,
logprobs: null,
finish_reason: null,
},
],
},
] as const;
async function* mockStream() {
for (const chunk of mockChunks) {
yield chunk as never;
}
}
await expect(
__testing.processOpenAICompletionsStream(mockStream(), output, model, stream),
).rejects.toThrow("Exceeded post-tool-call delta buffer limit");
});
it("fails fast when streaming tool-call arguments grow beyond the safety cap", async () => {
const model = {
id: "openrouter/minimax/minimax-m2.7",
name: "MiniMax M2.7",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: true,
input: ["text"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 200000,
maxTokens: 8192,
} satisfies Model<"openai-completions">;
const output = {
role: "assistant" as const,
content: [],
api: model.api,
provider: model.provider,
model: model.id,
usage: {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
totalTokens: 0,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
},
stopReason: "stop",
timestamp: Date.now(),
};
const stream: { push(event: unknown): void } = { push() {} };
const oversizedArgs = `"${"x".repeat(300_000)}"}`;
const mockChunks = [
{
id: "chatcmpl-tool-arg-cap",
object: "chat.completion.chunk" as const,
choices: [
{
index: 0,
delta: {
tool_calls: [
{
id: "call_1",
type: "function" as const,
function: { name: "lookup", arguments: `{${oversizedArgs}` },
},
],
} as Record<string, unknown>,
logprobs: null,
finish_reason: null,
},
],
},
] as const;
async function* mockStream() {
for (const chunk of mockChunks) {
yield chunk as never;
}
}
await expect(
__testing.processOpenAICompletionsStream(mockStream(), output, model, stream),
).rejects.toThrow("Exceeded tool-call argument buffer limit");
});
});

View File

@@ -1042,6 +1042,9 @@ async function processOpenAICompletionsStream(
model: Model<Api>,
stream: { push(event: unknown): void },
) {
const MAX_POST_TOOL_CALL_BUFFER_BYTES = 256_000;
const MAX_TOOL_CALL_ARGUMENT_BUFFER_BYTES = 256_000;
const compat = getCompat(model as OpenAIModeModel);
let currentBlock:
| { type: "text"; text: string }
| { type: "thinking"; thinking: string; thinkingSignature?: string }
@@ -1053,8 +1056,12 @@ async function processOpenAICompletionsStream(
partialArgs: string;
}
| null = null;
let pendingThinkingDelta: { signature: string; text: string } | null = null;
let pendingPostToolCallDeltas: CompletionsReasoningDelta[] = [];
let pendingPostToolCallBytes = 0;
let currentToolCallArgumentBytes = 0;
let isFlushingPendingPostToolCallDeltas = false;
const blockIndex = () => output.content.length - 1;
const measureUtf8Bytes = (text: string) => Buffer.byteLength(text, "utf8");
const finishCurrentBlock = () => {
if (!currentBlock) {
return;
@@ -1068,7 +1075,28 @@ async function processOpenAICompletionsStream(
output.content[blockIndex()] = completed;
}
};
const appendThinkingDelta = (reasoningDelta: { signature: string; text: string }) => {
const queuePostToolCallDelta = (next: CompletionsReasoningDelta) => {
const nextBytes = measureUtf8Bytes(next.text);
if (pendingPostToolCallBytes + nextBytes > MAX_POST_TOOL_CALL_BUFFER_BYTES) {
throw new Error("Exceeded post-tool-call delta buffer limit");
}
pendingPostToolCallBytes += nextBytes;
const previous = pendingPostToolCallDeltas[pendingPostToolCallDeltas.length - 1];
if (!previous || previous.kind !== next.kind) {
pendingPostToolCallDeltas.push(next);
return;
}
if (next.kind === "thinking" && previous.kind === "thinking") {
if (previous.signature !== next.signature) {
pendingPostToolCallDeltas.push(next);
return;
}
previous.text += next.text;
return;
}
previous.text += next.text;
};
const appendThinkingDeltaInternal = (reasoningDelta: { signature: string; text: string }) => {
if (!currentBlock || currentBlock.type !== "thinking") {
finishCurrentBlock();
currentBlock = {
@@ -1087,13 +1115,49 @@ async function processOpenAICompletionsStream(
partial: output,
});
};
const flushPendingThinkingDelta = () => {
if (!pendingThinkingDelta) {
const appendTextDeltaInternal = (text: string) => {
if (!currentBlock || currentBlock.type !== "text") {
finishCurrentBlock();
currentBlock = { type: "text", text: "" };
output.content.push(currentBlock);
stream.push({ type: "text_start", contentIndex: blockIndex(), partial: output });
}
currentBlock.text += text;
stream.push({
type: "text_delta",
contentIndex: blockIndex(),
delta: text,
partial: output,
});
};
const flushPendingPostToolCallDeltas = () => {
if (
isFlushingPendingPostToolCallDeltas ||
currentBlock?.type === "toolCall" ||
pendingPostToolCallDeltas.length === 0
) {
return;
}
const bufferedDelta = pendingThinkingDelta;
pendingThinkingDelta = null;
appendThinkingDelta(bufferedDelta);
isFlushingPendingPostToolCallDeltas = true;
const bufferedDeltas = pendingPostToolCallDeltas;
pendingPostToolCallDeltas = [];
pendingPostToolCallBytes = 0;
for (const delta of bufferedDeltas) {
if (delta.kind === "text") {
appendTextDeltaInternal(delta.text);
} else {
appendThinkingDeltaInternal(delta);
}
}
isFlushingPendingPostToolCallDeltas = false;
};
const appendThinkingDelta = (reasoningDelta: { signature: string; text: string }) => {
flushPendingPostToolCallDeltas();
appendThinkingDeltaInternal(reasoningDelta);
};
const appendTextDelta = (text: string) => {
flushPendingPostToolCallDeltas();
appendTextDeltaInternal(text);
};
for await (const chunk of responseStream) {
output.responseId ||= chunk.id;
@@ -1119,30 +1183,24 @@ async function processOpenAICompletionsStream(
continue;
}
if (choice.delta.content) {
flushPendingThinkingDelta();
if (!currentBlock || currentBlock.type !== "text") {
finishCurrentBlock();
currentBlock = { type: "text", text: "" };
output.content.push(currentBlock);
stream.push({ type: "text_start", contentIndex: blockIndex(), partial: output });
if (currentBlock?.type === "toolCall") {
queuePostToolCallDelta({ kind: "text", text: choice.delta.content });
} else {
appendTextDelta(choice.delta.content);
}
currentBlock.text += choice.delta.content;
stream.push({
type: "text_delta",
contentIndex: blockIndex(),
delta: choice.delta.content,
partial: output,
});
continue;
}
const reasoningDelta = getCompletionsReasoningDelta(choice.delta as Record<string, unknown>);
if (reasoningDelta) {
const reasoningDeltas = getCompletionsReasoningDeltas(
choice.delta as Record<string, unknown>,
compat.visibleReasoningDetailTypes,
);
for (const reasoningDelta of reasoningDeltas) {
if (currentBlock?.type === "toolCall") {
if (!pendingThinkingDelta) {
pendingThinkingDelta = { ...reasoningDelta };
} else {
pendingThinkingDelta.text += reasoningDelta.text;
}
queuePostToolCallDelta({ ...reasoningDelta });
continue;
}
if (reasoningDelta.kind === "text") {
appendTextDelta(reasoningDelta.text);
} else {
appendThinkingDelta(reasoningDelta);
}
@@ -1154,7 +1212,12 @@ async function processOpenAICompletionsStream(
currentBlock.type !== "toolCall" ||
(toolCall.id && currentBlock.id !== toolCall.id)
) {
const switchingToolCall = currentBlock?.type === "toolCall";
finishCurrentBlock();
if (switchingToolCall) {
currentBlock = null;
flushPendingPostToolCallDeltas();
}
currentBlock = {
type: "toolCall",
id: toolCall.id || "",
@@ -1162,6 +1225,7 @@ async function processOpenAICompletionsStream(
arguments: {},
partialArgs: "",
};
currentToolCallArgumentBytes = 0;
output.content.push(currentBlock);
stream.push({ type: "toolcall_start", contentIndex: blockIndex(), partial: output });
}
@@ -1175,6 +1239,14 @@ async function processOpenAICompletionsStream(
currentBlock.name = toolCall.function.name;
}
if (toolCall.function?.arguments) {
const nextArgumentBytes = measureUtf8Bytes(toolCall.function.arguments);
if (
currentToolCallArgumentBytes + nextArgumentBytes >
MAX_TOOL_CALL_ARGUMENT_BUFFER_BYTES
) {
throw new Error("Exceeded tool-call argument buffer limit");
}
currentToolCallArgumentBytes += nextArgumentBytes;
currentBlock.partialArgs += toolCall.function.arguments;
currentBlock.arguments = parseStreamingJson(currentBlock.partialArgs);
stream.push({
@@ -1186,40 +1258,81 @@ async function processOpenAICompletionsStream(
}
}
}
flushPendingPostToolCallDeltas();
}
finishCurrentBlock();
flushPendingThinkingDelta();
if (currentBlock?.type === "toolCall") {
currentBlock = null;
}
flushPendingPostToolCallDeltas();
const hasToolCalls = output.content.some((block) => block.type === "toolCall");
if (output.stopReason === "toolUse" && !hasToolCalls) {
output.stopReason = "stop";
}
}
function getCompletionsReasoningDelta(delta: Record<string, unknown>): {
signature: string;
text: string;
} | null {
type CompletionsReasoningDelta =
| {
kind: "thinking";
signature: string;
text: string;
}
| {
kind: "text";
text: string;
};
function getCompletionsReasoningDeltas(
delta: Record<string, unknown>,
visibleReasoningDetailTypes: readonly string[],
): CompletionsReasoningDelta[] {
const output: CompletionsReasoningDelta[] = [];
const pushDelta = (next: CompletionsReasoningDelta) => {
const previous = output[output.length - 1];
if (!previous || previous.kind !== next.kind) {
output.push(next);
return;
}
if (next.kind === "thinking" && previous.kind === "thinking") {
if (previous.signature !== next.signature) {
output.push(next);
return;
}
previous.text += next.text;
return;
}
previous.text += next.text;
};
const reasoningDetails = delta.reasoning_details;
let usedReasoningThinkingDetails = false;
if (Array.isArray(reasoningDetails)) {
let text = "";
const visibleTypes = new Set(visibleReasoningDetailTypes);
for (const item of reasoningDetails) {
const detail = item as { type?: unknown; text?: unknown };
if (detail.type === "reasoning.text" && typeof detail.text === "string" && detail.text) {
text += detail.text;
if (typeof detail.text !== "string" || !detail.text) {
continue;
}
if (detail.type === "reasoning.text") {
usedReasoningThinkingDetails = true;
pushDelta({ kind: "thinking", signature: "reasoning_details", text: detail.text });
continue;
}
if (typeof detail.type === "string" && visibleTypes.has(detail.type)) {
pushDelta({ kind: "text", text: detail.text });
}
}
if (text) {
return { signature: "reasoning_details", text };
}
if (!usedReasoningThinkingDetails) {
const reasoningFields = ["reasoning_content", "reasoning", "reasoning_text"] as const;
for (const field of reasoningFields) {
const value = delta[field];
if (typeof value === "string" && value.length > 0) {
pushDelta({ kind: "thinking", signature: field, text: value });
break;
}
}
}
const reasoningFields = ["reasoning_content", "reasoning", "reasoning_text"] as const;
for (const field of reasoningFields) {
const value = delta[field];
if (typeof value === "string" && value.length > 0) {
return { signature: field, text: value };
}
}
return null;
return output;
}
function detectCompat(model: OpenAIModeModel) {
@@ -1249,6 +1362,7 @@ function detectCompat(model: OpenAIModeModel) {
requiresAssistantAfterToolResult: false,
requiresThinkingAsText: false,
thinkingFormat: compatDefaults.thinkingFormat,
visibleReasoningDetailTypes: compatDefaults.visibleReasoningDetailTypes,
openRouterRouting: {},
vercelGatewayRouting: {},
supportsStrictMode: compatDefaults.supportsStrictMode,
@@ -1270,6 +1384,7 @@ function getCompat(model: OpenAIModeModel): {
vercelGatewayRouting: Record<string, unknown>;
supportsStrictMode: boolean;
requiresStringContent: boolean;
visibleReasoningDetailTypes: string[];
} {
const detected = detectCompat(model);
const compat = model.compat ?? {};
@@ -1303,6 +1418,9 @@ function getCompat(model: OpenAIModeModel): {
supportsStrictMode:
(compat.supportsStrictMode as boolean | undefined) ?? detected.supportsStrictMode,
requiresStringContent: (compat.requiresStringContent as boolean | undefined) ?? false,
visibleReasoningDetailTypes:
(compat.visibleReasoningDetailTypes as string[] | undefined) ??
detected.visibleReasoningDetailTypes,
};
}

View File

@@ -2819,6 +2819,13 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = {
requiresStringContent: {
type: "boolean",
},
visibleReasoningDetailTypes: {
type: "array",
items: {
type: "string",
minLength: 1,
},
},
maxTokensField: {
anyOf: [
{

View File

@@ -36,6 +36,7 @@ type SupportedThinkingFormat =
export type ModelCompatConfig = SupportedOpenAICompatFields & {
thinkingFormat?: SupportedThinkingFormat;
visibleReasoningDetailTypes?: string[];
supportsTools?: boolean;
supportsPromptCacheKey?: boolean;
requiresStringContent?: boolean;

View File

@@ -193,6 +193,7 @@ export const ModelCompatSchema = z
supportsTools: z.boolean().optional(),
supportsStrictMode: z.boolean().optional(),
requiresStringContent: z.boolean().optional(),
visibleReasoningDetailTypes: z.array(z.string().min(1)).optional(),
maxTokensField: z
.union([z.literal("max_completion_tokens"), z.literal("max_tokens")])
.optional(),