Fix vLLM reasoning model response parsing (empty tool_calls array) (#61534)

Merged via squash. Prepared head SHA: dfe6a3581c Co-authored-by: balajisiva <13068516+balajisiva@users.noreply.github.com> Co-authored-by: scoootscooob <167050519+scoootscooob@users.noreply.github.com> Reviewed-by: @scoootscooob
2026-06-09 18:52:54 +00:00 · 2026-04-10 19:14:48 -07:00
parent 4360a59c6d
commit efab9763dc
3 changed files with 177 additions and 4 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -121,6 +121,9 @@ Docs: https://docs.openclaw.ai
 - Agents/OpenAI replay: preserve malformed function-call arguments in stored assistant history, avoid double-encoding preserved raw strings on replay, and coerce replayed string args back to objects at Anthropic and Google provider boundaries. (#61956) Thanks @100yenadmin.
 - Heartbeat/config: accept and honor `agents.defaults.heartbeat.timeoutSeconds` and per-agent heartbeat timeout overrides for heartbeat agent turns. (#64491) Thanks @cedillarack.
 - CLI/devices: make implicit `openclaw devices approve` selection preview-only and require approving the exact request ID, preventing latest-request races during device pairing. (#64160) Thanks @coygeek.
+- Media/security: honor sender-scoped `toolsBySender` policy for outbound host-media reads so denied senders cannot trigger host file disclosure via attachment hydration. (#64459) Thanks @eleqtrizit.
+- Browser/security: reject strict-policy hostname navigation unless the hostname is an explicit allowlist exception or IP literal, and route CDP HTTP discovery through the pinned SSRF fetch path. (#64367) Thanks @eleqtrizit.
+- Models/vLLM: ignore empty `tool_calls` arrays from reasoning-model OpenAI-compatible replies, reset false `toolUse` stop reasons when no actual tool calls were parsed, and stop sending `tool_choice` unless tools are present so vLLM reasoning responses no longer hang indefinitely. (#61197, #61534) Thanks @balajisiva.

 ## 2026.4.9

--- a/src/agents/openai-transport-stream.test.ts
+++ b/src/agents/openai-transport-stream.test.ts
@@ -6,6 +6,7 @@ import {
  parseTransportChunkUsage,
  resolveAzureOpenAIApiVersion,
  sanitizeTransportPayloadText,
+  __testing,
 } from "./openai-transport-stream.js";
 import { attachModelProviderRequestTransport } from "./provider-request-config.js";
 import {
@@ -1457,4 +1458,165 @@ describe("openai transport stream", () => {
    expect(functionCall).toBeDefined();
    expect(functionCall?.arguments).toBe("not valid json");
  });
+
+  it("does not send tool_choice when tools are provided but toolChoice option is not set", () => {
+    const params = buildOpenAICompletionsParams(
+      {
+        id: "test-model",
+        name: "Test Model",
+        api: "openai-completions",
+        provider: "vllm",
+        baseUrl: "http://localhost:8000/v1",
+        reasoning: false,
+        input: ["text"],
+        cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+        contextWindow: 4096,
+        maxTokens: 2048,
+      } satisfies Model<"openai-completions">,
+      {
+        systemPrompt: "You are a helpful assistant",
+        messages: [],
+        tools: [
+          {
+            name: "get_weather",
+            description: "Get weather information",
+            parameters: { type: "object", properties: {} },
+          },
+        ],
+      } as never,
+      undefined,
+    );
+
+    expect(params).toHaveProperty("tools");
+    expect(params).not.toHaveProperty("tool_choice");
+  });
+
+  it("sends tool_choice when explicitly configured", () => {
+    const params = buildOpenAICompletionsParams(
+      {
+        id: "test-model",
+        name: "Test Model",
+        api: "openai-completions",
+        provider: "vllm",
+        baseUrl: "http://localhost:8000/v1",
+        reasoning: false,
+        input: ["text"],
+        cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+        contextWindow: 4096,
+        maxTokens: 2048,
+      } satisfies Model<"openai-completions">,
+      {
+        systemPrompt: "You are a helpful assistant",
+        messages: [],
+        tools: [
+          {
+            name: "get_weather",
+            description: "Get weather information",
+            parameters: { type: "object", properties: {} },
+          },
+        ],
+      } as never,
+      {
+        toolChoice: "required",
+      },
+    );
+
+    expect(params).toHaveProperty("tools");
+    expect(params).toHaveProperty("tool_choice", "required");
+  });
+
+  it("resets stopReason to stop when finish_reason is tool_calls but tool_calls array is empty", async () => {
+    const model = {
+      id: "nemotron-3-super",
+      name: "Nemotron 3 Super",
+      api: "openai-completions",
+      provider: "vllm",
+      baseUrl: "http://localhost:8000/v1",
+      reasoning: true,
+      input: ["text"],
+      cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+      contextWindow: 1000000,
+      maxTokens: 8192,
+    } satisfies Model<"openai-completions">;
+
+    const output = {
+      role: "assistant" as const,
+      content: [],
+      api: model.api,
+      provider: model.provider,
+      model: model.id,
+      usage: {
+        input: 0,
+        output: 0,
+        cacheRead: 0,
+        cacheWrite: 0,
+        totalTokens: 0,
+        cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+      },
+      stopReason: "stop",
+      timestamp: Date.now(),
+    };
+
+    const stream = {
+      push: () => {},
+    };
+
+    const mockChunks = [
+      {
+        id: "chatcmpl-test",
+        object: "chat.completion.chunk" as const,
+        created: 1775425651,
+        model: "nemotron-3-super",
+        choices: [
+          {
+            index: 0,
+            delta: { role: "assistant" as const, content: "" },
+            logprobs: null,
+            finish_reason: null,
+          },
+        ],
+      },
+      {
+        id: "chatcmpl-test",
+        object: "chat.completion.chunk" as const,
+        created: 1775425651,
+        model: "nemotron-3-super",
+        choices: [
+          {
+            index: 0,
+            delta: { content: "4" },
+            logprobs: null,
+            finish_reason: null,
+          },
+        ],
+      },
+      {
+        id: "chatcmpl-test",
+        object: "chat.completion.chunk" as const,
+        created: 1775425651,
+        model: "nemotron-3-super",
+        choices: [
+          {
+            index: 0,
+            delta: { tool_calls: [] as never[] },
+            logprobs: null,
+            finish_reason: "tool_calls" as const,
+          },
+        ],
+      },
+    ] as const;
+
+    async function* mockStream() {
+      for (const chunk of mockChunks) {
+        yield chunk as never;
+      }
+    }
+
+    await __testing.processOpenAICompletionsStream(mockStream(), output, model, stream);
+
+    expect(output.stopReason).toBe("stop");
+    expect(output.content.some((block) => (block as { type?: string }).type === "toolCall")).toBe(
+      false,
+    );
+  });
 });
--- a/src/agents/openai-transport-stream.ts
+++ b/src/agents/openai-transport-stream.ts
@@ -1093,7 +1093,7 @@ async function processOpenAICompletionsStream(
      });
      continue;
    }
-    if (choice.delta.tool_calls) {
+    if (choice.delta.tool_calls && choice.delta.tool_calls.length > 0) {
      for (const toolCall of choice.delta.tool_calls) {
        if (
          !currentBlock ||
@@ -1134,6 +1134,10 @@ async function processOpenAICompletionsStream(
    }
  }
  finishCurrentBlock();
+  const hasToolCalls = output.content.some((block) => block.type === "toolCall");
+  if (output.stopReason === "toolUse" && !hasToolCalls) {
+    output.stopReason = "stop";
+  }
 }

 function detectCompat(model: OpenAIModeModel) {
@@ -1312,12 +1316,12 @@ export function buildOpenAICompletionsParams(
  }
  if (context.tools) {
    params.tools = convertTools(context.tools, compat, model);
+    if (options?.toolChoice) {
+      params.tool_choice = options.toolChoice;
+    }
  } else if (hasToolHistory(context.messages)) {
    params.tools = [];
  }
-  if (options?.toolChoice) {
-    params.tool_choice = options.toolChoice;
-  }
  const completionsReasoningEffort = resolveOpenAICompletionsReasoningEffort(options);
  if (compat.thinkingFormat === "openrouter" && model.reasoning && completionsReasoningEffort) {
    params.reasoning = {
@@ -1376,3 +1380,7 @@ function mapStopReason(reason: string | null) {
      };
  }
 }
+
+export const __testing = {
+  processOpenAICompletionsStream,
+};