fix(ollama): enable streaming usage for openai-compat (#66439)

* fix(ollama): enable streaming usage for openai-compat

* Update CHANGELOG.md
This commit is contained in:
Vincent Koc
2026-04-14 09:57:42 +01:00
committed by GitHub
parent b90d4ea3d7
commit 4f15d77ecc
4 changed files with 67 additions and 2 deletions

View File

@@ -0,0 +1,34 @@
import { describe, expect, it } from "vitest";
import { resolveOpenAICompletionsCompatDefaults } from "./openai-completions-compat.js";
describe("resolveOpenAICompletionsCompatDefaults", () => {
it("enables streaming usage for local ollama OpenAI-compat endpoints", () => {
expect(
resolveOpenAICompletionsCompatDefaults({
provider: "ollama",
endpointClass: "local",
knownProviderFamily: "ollama",
}).supportsUsageInStreaming,
).toBe(true);
});
it("keeps streaming usage enabled for custom ollama OpenAI-compat endpoints", () => {
expect(
resolveOpenAICompletionsCompatDefaults({
provider: "ollama",
endpointClass: "custom",
knownProviderFamily: "ollama",
}).supportsUsageInStreaming,
).toBe(true);
});
it("does not broaden streaming usage for generic custom providers", () => {
expect(
resolveOpenAICompletionsCompatDefaults({
provider: "custom-cpa",
endpointClass: "custom",
knownProviderFamily: "custom-cpa",
}).supportsUsageInStreaming,
).toBe(false);
});
});

View File

@@ -33,6 +33,7 @@ export function resolveOpenAICompletionsCompatDefaults(
input: OpenAICompletionsCompatDefaultsInput,
): OpenAICompletionsCompatDefaults {
const {
provider,
endpointClass,
knownProviderFamily,
supportsNativeStreamingUsageCompat = false,
@@ -64,7 +65,8 @@ export function resolveOpenAICompletionsCompatDefaults(
endpointClass === "chutes-native" ||
endpointClass === "mistral-public" ||
knownProviderFamily === "mistral" ||
(isDefaultRoute && isDefaultRouteProvider(input.provider, "chutes"));
(isDefaultRoute && isDefaultRouteProvider(provider, "chutes"));
const isOllamaCompatProvider = provider === "ollama";
return {
supportsStore:
@@ -76,7 +78,8 @@ export function resolveOpenAICompletionsCompatDefaults(
endpointClass !== "xai-native" &&
!usesExplicitProxyLikeEndpoint,
supportsUsageInStreaming:
!isNonStandard && (!usesConfiguredNonOpenAIEndpoint || supportsNativeStreamingUsageCompat),
isOllamaCompatProvider ||
(!isNonStandard && (!usesConfiguredNonOpenAIEndpoint || supportsNativeStreamingUsageCompat)),
maxTokensField: usesMaxTokens ? "max_tokens" : "max_completion_tokens",
thinkingFormat: isZai ? "zai" : isOpenRouterLike ? "openrouter" : "openai",
supportsStrictMode: !isZai && !usesConfiguredNonOpenAIEndpoint,

View File

@@ -1196,6 +1196,33 @@ describe("openai transport stream", () => {
expect(params.stream_options).toMatchObject({ include_usage: true });
});
it("enables streaming usage compat for Ollama OpenAI-compat endpoints", () => {
const params = buildOpenAICompletionsParams(
{
id: "qwen2.5:7b",
name: "Qwen 2.5 7B",
api: "openai-completions",
provider: "ollama",
baseUrl: "http://127.0.0.1:11434/v1",
reasoning: true,
input: ["text"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 32768,
maxTokens: 8192,
} satisfies Model<"openai-completions">,
{
systemPrompt: "system",
messages: [],
tools: [],
} as never,
undefined,
) as {
stream_options?: { include_usage?: boolean };
};
expect(params.stream_options).toMatchObject({ include_usage: true });
});
it("disables developer-role-only compat defaults for configured custom proxy completions providers", () => {
const params = buildOpenAICompletionsParams(
{