fix: always send stream_options.include_usage when streaming openai-completions

Backends like llama-cpp and LM Studio require stream_options: { include_usage: true }
in the request payload to report token usage in streaming responses.
buildOpenAICompletionsParams() previously gated this behind supportsUsageInStreaming
compat detection, which excluded non-standard and custom endpoints. The OpenAI SDK
sends this unconditionally, so we now do the same.

Fixes #68707
This commit is contained in:
kagura-agent
2026-04-19 08:59:40 +08:00
committed by Ayaan Zaidi
parent 1212412ff1
commit c560793482
2 changed files with 31 additions and 4 deletions

View File

@@ -1251,6 +1251,35 @@ describe("openai transport stream", () => {
expect(params.stream_options).toMatchObject({ include_usage: true });
});
it("always includes stream_options.include_usage for non-standard backends like llama-cpp", () => {
const params = buildOpenAICompletionsParams(
{
id: "llama-3",
name: "Llama 3",
api: "openai-completions",
provider: "custom-cpa",
baseUrl: "http://localhost:8080/v1",
reasoning: false,
input: ["text"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 8192,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
{
systemPrompt: "system",
messages: [],
tools: [],
} as never,
undefined,
) as {
stream?: boolean;
stream_options?: { include_usage?: boolean };
};
expect(params.stream).toBe(true);
expect(params.stream_options).toEqual({ include_usage: true });
});
it("disables developer-role-only compat defaults for configured custom proxy completions providers", () => {
const params = buildOpenAICompletionsParams(
{
@@ -1289,7 +1318,7 @@ describe("openai transport stream", () => {
expect(params.messages?.[0]).toMatchObject({ role: "system" });
expect(params).not.toHaveProperty("reasoning_effort");
expect(params).not.toHaveProperty("stream_options");
expect(params.stream_options).toMatchObject({ include_usage: true });
expect(params).not.toHaveProperty("store");
expect(params.tools?.[0]?.function).not.toHaveProperty("strict");
});

View File

@@ -1502,10 +1502,8 @@ export function buildOpenAICompletionsParams(
? flattenCompletionMessagesToStringContent(messages)
: messages,
stream: true,
stream_options: { include_usage: true },
};
if (compat.supportsUsageInStreaming) {
params.stream_options = { include_usage: true };
}
if (compat.supportsStore) {
params.store = false;
}