Fix vLLM reasoning model response parsing (empty tool_calls array) (#61534)

Merged via squash.

Prepared head SHA: dfe6a3581c
Co-authored-by: balajisiva <13068516+balajisiva@users.noreply.github.com>
Co-authored-by: scoootscooob <167050519+scoootscooob@users.noreply.github.com>
Reviewed-by: @scoootscooob
This commit is contained in:
Balaji Siva
2026-04-10 19:14:48 -07:00
committed by GitHub
parent 4360a59c6d
commit efab9763dc
3 changed files with 177 additions and 4 deletions

View File

@@ -121,6 +121,9 @@ Docs: https://docs.openclaw.ai
- Agents/OpenAI replay: preserve malformed function-call arguments in stored assistant history, avoid double-encoding preserved raw strings on replay, and coerce replayed string args back to objects at Anthropic and Google provider boundaries. (#61956) Thanks @100yenadmin.
- Heartbeat/config: accept and honor `agents.defaults.heartbeat.timeoutSeconds` and per-agent heartbeat timeout overrides for heartbeat agent turns. (#64491) Thanks @cedillarack.
- CLI/devices: make implicit `openclaw devices approve` selection preview-only and require approving the exact request ID, preventing latest-request races during device pairing. (#64160) Thanks @coygeek.
- Media/security: honor sender-scoped `toolsBySender` policy for outbound host-media reads so denied senders cannot trigger host file disclosure via attachment hydration. (#64459) Thanks @eleqtrizit.
- Browser/security: reject strict-policy hostname navigation unless the hostname is an explicit allowlist exception or IP literal, and route CDP HTTP discovery through the pinned SSRF fetch path. (#64367) Thanks @eleqtrizit.
- Models/vLLM: ignore empty `tool_calls` arrays from reasoning-model OpenAI-compatible replies, reset false `toolUse` stop reasons when no actual tool calls were parsed, and stop sending `tool_choice` unless tools are present so vLLM reasoning responses no longer hang indefinitely. (#61197, #61534) Thanks @balajisiva.
## 2026.4.9

View File

@@ -6,6 +6,7 @@ import {
parseTransportChunkUsage,
resolveAzureOpenAIApiVersion,
sanitizeTransportPayloadText,
__testing,
} from "./openai-transport-stream.js";
import { attachModelProviderRequestTransport } from "./provider-request-config.js";
import {
@@ -1457,4 +1458,165 @@ describe("openai transport stream", () => {
expect(functionCall).toBeDefined();
expect(functionCall?.arguments).toBe("not valid json");
});
it("does not send tool_choice when tools are provided but toolChoice option is not set", () => {
const params = buildOpenAICompletionsParams(
{
id: "test-model",
name: "Test Model",
api: "openai-completions",
provider: "vllm",
baseUrl: "http://localhost:8000/v1",
reasoning: false,
input: ["text"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 4096,
maxTokens: 2048,
} satisfies Model<"openai-completions">,
{
systemPrompt: "You are a helpful assistant",
messages: [],
tools: [
{
name: "get_weather",
description: "Get weather information",
parameters: { type: "object", properties: {} },
},
],
} as never,
undefined,
);
expect(params).toHaveProperty("tools");
expect(params).not.toHaveProperty("tool_choice");
});
it("sends tool_choice when explicitly configured", () => {
const params = buildOpenAICompletionsParams(
{
id: "test-model",
name: "Test Model",
api: "openai-completions",
provider: "vllm",
baseUrl: "http://localhost:8000/v1",
reasoning: false,
input: ["text"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 4096,
maxTokens: 2048,
} satisfies Model<"openai-completions">,
{
systemPrompt: "You are a helpful assistant",
messages: [],
tools: [
{
name: "get_weather",
description: "Get weather information",
parameters: { type: "object", properties: {} },
},
],
} as never,
{
toolChoice: "required",
},
);
expect(params).toHaveProperty("tools");
expect(params).toHaveProperty("tool_choice", "required");
});
it("resets stopReason to stop when finish_reason is tool_calls but tool_calls array is empty", async () => {
const model = {
id: "nemotron-3-super",
name: "Nemotron 3 Super",
api: "openai-completions",
provider: "vllm",
baseUrl: "http://localhost:8000/v1",
reasoning: true,
input: ["text"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 1000000,
maxTokens: 8192,
} satisfies Model<"openai-completions">;
const output = {
role: "assistant" as const,
content: [],
api: model.api,
provider: model.provider,
model: model.id,
usage: {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
totalTokens: 0,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
},
stopReason: "stop",
timestamp: Date.now(),
};
const stream = {
push: () => {},
};
const mockChunks = [
{
id: "chatcmpl-test",
object: "chat.completion.chunk" as const,
created: 1775425651,
model: "nemotron-3-super",
choices: [
{
index: 0,
delta: { role: "assistant" as const, content: "" },
logprobs: null,
finish_reason: null,
},
],
},
{
id: "chatcmpl-test",
object: "chat.completion.chunk" as const,
created: 1775425651,
model: "nemotron-3-super",
choices: [
{
index: 0,
delta: { content: "4" },
logprobs: null,
finish_reason: null,
},
],
},
{
id: "chatcmpl-test",
object: "chat.completion.chunk" as const,
created: 1775425651,
model: "nemotron-3-super",
choices: [
{
index: 0,
delta: { tool_calls: [] as never[] },
logprobs: null,
finish_reason: "tool_calls" as const,
},
],
},
] as const;
async function* mockStream() {
for (const chunk of mockChunks) {
yield chunk as never;
}
}
await __testing.processOpenAICompletionsStream(mockStream(), output, model, stream);
expect(output.stopReason).toBe("stop");
expect(output.content.some((block) => (block as { type?: string }).type === "toolCall")).toBe(
false,
);
});
});

View File

@@ -1093,7 +1093,7 @@ async function processOpenAICompletionsStream(
});
continue;
}
if (choice.delta.tool_calls) {
if (choice.delta.tool_calls && choice.delta.tool_calls.length > 0) {
for (const toolCall of choice.delta.tool_calls) {
if (
!currentBlock ||
@@ -1134,6 +1134,10 @@ async function processOpenAICompletionsStream(
}
}
finishCurrentBlock();
const hasToolCalls = output.content.some((block) => block.type === "toolCall");
if (output.stopReason === "toolUse" && !hasToolCalls) {
output.stopReason = "stop";
}
}
function detectCompat(model: OpenAIModeModel) {
@@ -1312,12 +1316,12 @@ export function buildOpenAICompletionsParams(
}
if (context.tools) {
params.tools = convertTools(context.tools, compat, model);
if (options?.toolChoice) {
params.tool_choice = options.toolChoice;
}
} else if (hasToolHistory(context.messages)) {
params.tools = [];
}
if (options?.toolChoice) {
params.tool_choice = options.toolChoice;
}
const completionsReasoningEffort = resolveOpenAICompletionsReasoningEffort(options);
if (compat.thinkingFormat === "openrouter" && model.reasoning && completionsReasoningEffort) {
params.reasoning = {
@@ -1376,3 +1380,7 @@ function mapStopReason(reason: string | null) {
};
}
}
export const __testing = {
processOpenAICompletionsStream,
};