From efab9763dc8727d8a7c14130b256e0f6bb74b45d Mon Sep 17 00:00:00 2001
From: Balaji Siva <basivasu@redhat.com>
Date: Fri, 10 Apr 2026 19:14:48 -0700
Subject: [PATCH]   Fix vLLM reasoning model response parsing (empty tool_calls
 array)  (#61534)

Merged via squash.

Prepared head SHA: dfe6a3581c59a309bf6b005cb69dc7827098dd22
Co-authored-by: balajisiva <13068516+balajisiva@users.noreply.github.com>
Co-authored-by: scoootscooob <167050519+scoootscooob@users.noreply.github.com>
Reviewed-by: @scoootscooob
---
 CHANGELOG.md                               |   3 +
 src/agents/openai-transport-stream.test.ts | 162 +++++++++++++++++++++
 src/agents/openai-transport-stream.ts      |  16 +-
 3 files changed, 177 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 973edfc2438..05de90c2a00 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -121,6 +121,9 @@ Docs: https://docs.openclaw.ai
 - Agents/OpenAI replay: preserve malformed function-call arguments in stored assistant history, avoid double-encoding preserved raw strings on replay, and coerce replayed string args back to objects at Anthropic and Google provider boundaries. (#61956) Thanks @100yenadmin.
 - Heartbeat/config: accept and honor `agents.defaults.heartbeat.timeoutSeconds` and per-agent heartbeat timeout overrides for heartbeat agent turns. (#64491) Thanks @cedillarack.
 - CLI/devices: make implicit `openclaw devices approve` selection preview-only and require approving the exact request ID, preventing latest-request races during device pairing. (#64160) Thanks @coygeek.
+- Media/security: honor sender-scoped `toolsBySender` policy for outbound host-media reads so denied senders cannot trigger host file disclosure via attachment hydration. (#64459) Thanks @eleqtrizit.
+- Browser/security: reject strict-policy hostname navigation unless the hostname is an explicit allowlist exception or IP literal, and route CDP HTTP discovery through the pinned SSRF fetch path. (#64367) Thanks @eleqtrizit.
+- Models/vLLM: ignore empty `tool_calls` arrays from reasoning-model OpenAI-compatible replies, reset false `toolUse` stop reasons when no actual tool calls were parsed, and stop sending `tool_choice` unless tools are present so vLLM reasoning responses no longer hang indefinitely. (#61197, #61534) Thanks @balajisiva.
 
 ## 2026.4.9
 
diff --git a/src/agents/openai-transport-stream.test.ts b/src/agents/openai-transport-stream.test.ts
index 82301b746e3..31081ce1d84 100644
--- a/src/agents/openai-transport-stream.test.ts
+++ b/src/agents/openai-transport-stream.test.ts
@@ -6,6 +6,7 @@ import {
   parseTransportChunkUsage,
   resolveAzureOpenAIApiVersion,
   sanitizeTransportPayloadText,
+  __testing,
 } from "./openai-transport-stream.js";
 import { attachModelProviderRequestTransport } from "./provider-request-config.js";
 import {
@@ -1457,4 +1458,165 @@ describe("openai transport stream", () => {
     expect(functionCall).toBeDefined();
     expect(functionCall?.arguments).toBe("not valid json");
   });
+
+  it("does not send tool_choice when tools are provided but toolChoice option is not set", () => {
+    const params = buildOpenAICompletionsParams(
+      {
+        id: "test-model",
+        name: "Test Model",
+        api: "openai-completions",
+        provider: "vllm",
+        baseUrl: "http://localhost:8000/v1",
+        reasoning: false,
+        input: ["text"],
+        cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+        contextWindow: 4096,
+        maxTokens: 2048,
+      } satisfies Model<"openai-completions">,
+      {
+        systemPrompt: "You are a helpful assistant",
+        messages: [],
+        tools: [
+          {
+            name: "get_weather",
+            description: "Get weather information",
+            parameters: { type: "object", properties: {} },
+          },
+        ],
+      } as never,
+      undefined,
+    );
+
+    expect(params).toHaveProperty("tools");
+    expect(params).not.toHaveProperty("tool_choice");
+  });
+
+  it("sends tool_choice when explicitly configured", () => {
+    const params = buildOpenAICompletionsParams(
+      {
+        id: "test-model",
+        name: "Test Model",
+        api: "openai-completions",
+        provider: "vllm",
+        baseUrl: "http://localhost:8000/v1",
+        reasoning: false,
+        input: ["text"],
+        cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+        contextWindow: 4096,
+        maxTokens: 2048,
+      } satisfies Model<"openai-completions">,
+      {
+        systemPrompt: "You are a helpful assistant",
+        messages: [],
+        tools: [
+          {
+            name: "get_weather",
+            description: "Get weather information",
+            parameters: { type: "object", properties: {} },
+          },
+        ],
+      } as never,
+      {
+        toolChoice: "required",
+      },
+    );
+
+    expect(params).toHaveProperty("tools");
+    expect(params).toHaveProperty("tool_choice", "required");
+  });
+
+  it("resets stopReason to stop when finish_reason is tool_calls but tool_calls array is empty", async () => {
+    const model = {
+      id: "nemotron-3-super",
+      name: "Nemotron 3 Super",
+      api: "openai-completions",
+      provider: "vllm",
+      baseUrl: "http://localhost:8000/v1",
+      reasoning: true,
+      input: ["text"],
+      cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+      contextWindow: 1000000,
+      maxTokens: 8192,
+    } satisfies Model<"openai-completions">;
+
+    const output = {
+      role: "assistant" as const,
+      content: [],
+      api: model.api,
+      provider: model.provider,
+      model: model.id,
+      usage: {
+        input: 0,
+        output: 0,
+        cacheRead: 0,
+        cacheWrite: 0,
+        totalTokens: 0,
+        cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+      },
+      stopReason: "stop",
+      timestamp: Date.now(),
+    };
+
+    const stream = {
+      push: () => {},
+    };
+
+    const mockChunks = [
+      {
+        id: "chatcmpl-test",
+        object: "chat.completion.chunk" as const,
+        created: 1775425651,
+        model: "nemotron-3-super",
+        choices: [
+          {
+            index: 0,
+            delta: { role: "assistant" as const, content: "" },
+            logprobs: null,
+            finish_reason: null,
+          },
+        ],
+      },
+      {
+        id: "chatcmpl-test",
+        object: "chat.completion.chunk" as const,
+        created: 1775425651,
+        model: "nemotron-3-super",
+        choices: [
+          {
+            index: 0,
+            delta: { content: "4" },
+            logprobs: null,
+            finish_reason: null,
+          },
+        ],
+      },
+      {
+        id: "chatcmpl-test",
+        object: "chat.completion.chunk" as const,
+        created: 1775425651,
+        model: "nemotron-3-super",
+        choices: [
+          {
+            index: 0,
+            delta: { tool_calls: [] as never[] },
+            logprobs: null,
+            finish_reason: "tool_calls" as const,
+          },
+        ],
+      },
+    ] as const;
+
+    async function* mockStream() {
+      for (const chunk of mockChunks) {
+        yield chunk as never;
+      }
+    }
+
+    await __testing.processOpenAICompletionsStream(mockStream(), output, model, stream);
+
+    expect(output.stopReason).toBe("stop");
+    expect(output.content.some((block) => (block as { type?: string }).type === "toolCall")).toBe(
+      false,
+    );
+  });
 });
diff --git a/src/agents/openai-transport-stream.ts b/src/agents/openai-transport-stream.ts
index e30b93ecb23..84dbbcb5ad8 100644
--- a/src/agents/openai-transport-stream.ts
+++ b/src/agents/openai-transport-stream.ts
@@ -1093,7 +1093,7 @@ async function processOpenAICompletionsStream(
       });
       continue;
     }
-    if (choice.delta.tool_calls) {
+    if (choice.delta.tool_calls && choice.delta.tool_calls.length > 0) {
       for (const toolCall of choice.delta.tool_calls) {
         if (
           !currentBlock ||
@@ -1134,6 +1134,10 @@ async function processOpenAICompletionsStream(
     }
   }
   finishCurrentBlock();
+  const hasToolCalls = output.content.some((block) => block.type === "toolCall");
+  if (output.stopReason === "toolUse" && !hasToolCalls) {
+    output.stopReason = "stop";
+  }
 }
 
 function detectCompat(model: OpenAIModeModel) {
@@ -1312,12 +1316,12 @@ export function buildOpenAICompletionsParams(
   }
   if (context.tools) {
     params.tools = convertTools(context.tools, compat, model);
+    if (options?.toolChoice) {
+      params.tool_choice = options.toolChoice;
+    }
   } else if (hasToolHistory(context.messages)) {
     params.tools = [];
   }
-  if (options?.toolChoice) {
-    params.tool_choice = options.toolChoice;
-  }
   const completionsReasoningEffort = resolveOpenAICompletionsReasoningEffort(options);
   if (compat.thinkingFormat === "openrouter" && model.reasoning && completionsReasoningEffort) {
     params.reasoning = {
@@ -1376,3 +1380,7 @@ function mapStopReason(reason: string | null) {
       };
   }
 }
+
+export const __testing = {
+  processOpenAICompletionsStream,
+};