From efab9763dc8727d8a7c14130b256e0f6bb74b45d Mon Sep 17 00:00:00 2001 From: Balaji Siva Date: Fri, 10 Apr 2026 19:14:48 -0700 Subject: [PATCH] Fix vLLM reasoning model response parsing (empty tool_calls array) (#61534) Merged via squash. Prepared head SHA: dfe6a3581c59a309bf6b005cb69dc7827098dd22 Co-authored-by: balajisiva <13068516+balajisiva@users.noreply.github.com> Co-authored-by: scoootscooob <167050519+scoootscooob@users.noreply.github.com> Reviewed-by: @scoootscooob --- CHANGELOG.md | 3 + src/agents/openai-transport-stream.test.ts | 162 +++++++++++++++++++++ src/agents/openai-transport-stream.ts | 16 +- 3 files changed, 177 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 973edfc2438..05de90c2a00 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -121,6 +121,9 @@ Docs: https://docs.openclaw.ai - Agents/OpenAI replay: preserve malformed function-call arguments in stored assistant history, avoid double-encoding preserved raw strings on replay, and coerce replayed string args back to objects at Anthropic and Google provider boundaries. (#61956) Thanks @100yenadmin. - Heartbeat/config: accept and honor `agents.defaults.heartbeat.timeoutSeconds` and per-agent heartbeat timeout overrides for heartbeat agent turns. (#64491) Thanks @cedillarack. - CLI/devices: make implicit `openclaw devices approve` selection preview-only and require approving the exact request ID, preventing latest-request races during device pairing. (#64160) Thanks @coygeek. +- Media/security: honor sender-scoped `toolsBySender` policy for outbound host-media reads so denied senders cannot trigger host file disclosure via attachment hydration. (#64459) Thanks @eleqtrizit. +- Browser/security: reject strict-policy hostname navigation unless the hostname is an explicit allowlist exception or IP literal, and route CDP HTTP discovery through the pinned SSRF fetch path. (#64367) Thanks @eleqtrizit. +- Models/vLLM: ignore empty `tool_calls` arrays from reasoning-model OpenAI-compatible replies, reset false `toolUse` stop reasons when no actual tool calls were parsed, and stop sending `tool_choice` unless tools are present so vLLM reasoning responses no longer hang indefinitely. (#61197, #61534) Thanks @balajisiva. ## 2026.4.9 diff --git a/src/agents/openai-transport-stream.test.ts b/src/agents/openai-transport-stream.test.ts index 82301b746e3..31081ce1d84 100644 --- a/src/agents/openai-transport-stream.test.ts +++ b/src/agents/openai-transport-stream.test.ts @@ -6,6 +6,7 @@ import { parseTransportChunkUsage, resolveAzureOpenAIApiVersion, sanitizeTransportPayloadText, + __testing, } from "./openai-transport-stream.js"; import { attachModelProviderRequestTransport } from "./provider-request-config.js"; import { @@ -1457,4 +1458,165 @@ describe("openai transport stream", () => { expect(functionCall).toBeDefined(); expect(functionCall?.arguments).toBe("not valid json"); }); + + it("does not send tool_choice when tools are provided but toolChoice option is not set", () => { + const params = buildOpenAICompletionsParams( + { + id: "test-model", + name: "Test Model", + api: "openai-completions", + provider: "vllm", + baseUrl: "http://localhost:8000/v1", + reasoning: false, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 4096, + maxTokens: 2048, + } satisfies Model<"openai-completions">, + { + systemPrompt: "You are a helpful assistant", + messages: [], + tools: [ + { + name: "get_weather", + description: "Get weather information", + parameters: { type: "object", properties: {} }, + }, + ], + } as never, + undefined, + ); + + expect(params).toHaveProperty("tools"); + expect(params).not.toHaveProperty("tool_choice"); + }); + + it("sends tool_choice when explicitly configured", () => { + const params = buildOpenAICompletionsParams( + { + id: "test-model", + name: "Test Model", + api: "openai-completions", + provider: "vllm", + baseUrl: "http://localhost:8000/v1", + reasoning: false, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 4096, + maxTokens: 2048, + } satisfies Model<"openai-completions">, + { + systemPrompt: "You are a helpful assistant", + messages: [], + tools: [ + { + name: "get_weather", + description: "Get weather information", + parameters: { type: "object", properties: {} }, + }, + ], + } as never, + { + toolChoice: "required", + }, + ); + + expect(params).toHaveProperty("tools"); + expect(params).toHaveProperty("tool_choice", "required"); + }); + + it("resets stopReason to stop when finish_reason is tool_calls but tool_calls array is empty", async () => { + const model = { + id: "nemotron-3-super", + name: "Nemotron 3 Super", + api: "openai-completions", + provider: "vllm", + baseUrl: "http://localhost:8000/v1", + reasoning: true, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 1000000, + maxTokens: 8192, + } satisfies Model<"openai-completions">; + + const output = { + role: "assistant" as const, + content: [], + api: model.api, + provider: model.provider, + model: model.id, + usage: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + totalTokens: 0, + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, + }, + stopReason: "stop", + timestamp: Date.now(), + }; + + const stream = { + push: () => {}, + }; + + const mockChunks = [ + { + id: "chatcmpl-test", + object: "chat.completion.chunk" as const, + created: 1775425651, + model: "nemotron-3-super", + choices: [ + { + index: 0, + delta: { role: "assistant" as const, content: "" }, + logprobs: null, + finish_reason: null, + }, + ], + }, + { + id: "chatcmpl-test", + object: "chat.completion.chunk" as const, + created: 1775425651, + model: "nemotron-3-super", + choices: [ + { + index: 0, + delta: { content: "4" }, + logprobs: null, + finish_reason: null, + }, + ], + }, + { + id: "chatcmpl-test", + object: "chat.completion.chunk" as const, + created: 1775425651, + model: "nemotron-3-super", + choices: [ + { + index: 0, + delta: { tool_calls: [] as never[] }, + logprobs: null, + finish_reason: "tool_calls" as const, + }, + ], + }, + ] as const; + + async function* mockStream() { + for (const chunk of mockChunks) { + yield chunk as never; + } + } + + await __testing.processOpenAICompletionsStream(mockStream(), output, model, stream); + + expect(output.stopReason).toBe("stop"); + expect(output.content.some((block) => (block as { type?: string }).type === "toolCall")).toBe( + false, + ); + }); }); diff --git a/src/agents/openai-transport-stream.ts b/src/agents/openai-transport-stream.ts index e30b93ecb23..84dbbcb5ad8 100644 --- a/src/agents/openai-transport-stream.ts +++ b/src/agents/openai-transport-stream.ts @@ -1093,7 +1093,7 @@ async function processOpenAICompletionsStream( }); continue; } - if (choice.delta.tool_calls) { + if (choice.delta.tool_calls && choice.delta.tool_calls.length > 0) { for (const toolCall of choice.delta.tool_calls) { if ( !currentBlock || @@ -1134,6 +1134,10 @@ async function processOpenAICompletionsStream( } } finishCurrentBlock(); + const hasToolCalls = output.content.some((block) => block.type === "toolCall"); + if (output.stopReason === "toolUse" && !hasToolCalls) { + output.stopReason = "stop"; + } } function detectCompat(model: OpenAIModeModel) { @@ -1312,12 +1316,12 @@ export function buildOpenAICompletionsParams( } if (context.tools) { params.tools = convertTools(context.tools, compat, model); + if (options?.toolChoice) { + params.tool_choice = options.toolChoice; + } } else if (hasToolHistory(context.messages)) { params.tools = []; } - if (options?.toolChoice) { - params.tool_choice = options.toolChoice; - } const completionsReasoningEffort = resolveOpenAICompletionsReasoningEffort(options); if (compat.thinkingFormat === "openrouter" && model.reasoning && completionsReasoningEffort) { params.reasoning = { @@ -1376,3 +1380,7 @@ function mapStopReason(reason: string | null) { }; } } + +export const __testing = { + processOpenAICompletionsStream, +};