From 4f15d77ecc08e24aa74254483a94c235531e5e6c Mon Sep 17 00:00:00 2001 From: Vincent Koc Date: Tue, 14 Apr 2026 09:57:42 +0100 Subject: [PATCH] fix(ollama): enable streaming usage for openai-compat (#66439) * fix(ollama): enable streaming usage for openai-compat * Update CHANGELOG.md --- CHANGELOG.md | 1 + src/agents/openai-completions-compat.test.ts | 34 ++++++++++++++++++++ src/agents/openai-completions-compat.ts | 7 ++-- src/agents/openai-transport-stream.test.ts | 27 ++++++++++++++++ 4 files changed, 67 insertions(+), 2 deletions(-) create mode 100644 src/agents/openai-completions-compat.test.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 52a174785b2..a93a48e56ff 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ Docs: https://docs.openclaw.ai - Agents/gateway-tool: reject `config.patch` and `config.apply` calls from the model-facing gateway tool when they would newly enable any flag enumerated by `openclaw security audit` (for example `dangerouslyDisableDeviceAuth`, `allowInsecureAuth`, `dangerouslyAllowHostHeaderOriginFallback`, `hooks.gmail.allowUnsafeExternalContent`, `tools.exec.applyPatch.workspaceOnly: false`); already-enabled flags pass through unchanged so non-dangerous edits in the same patch still apply, and direct authenticated operator RPC behavior is unchanged. (#62006) Thanks @eleqtrizit. - Telegram/forum topics: persist learned topic names to the Telegram session sidecar store so agent context can keep using human topic names after a restart instead of relearning from future service metadata. (#66107) Thanks @obviyus. - Doctor/systemd: keep `openclaw doctor --repair` and service reinstall from re-embedding dotenv-backed secrets in user systemd units, while preserving newer inline overrides over stale state-dir `.env` values. (#66249) Thanks @tmimmanuel. +- Ollama/OpenAI-compat: send `stream_options.include_usage` for Ollama streaming completions so local Ollama runs report real usage instead of falling back to bogus prompt-token counts that trigger premature compaction. (#64568) Thanks @xchunzhao and @vincentkoc. - Doctor/plugins: cache external `preferOver` catalog lookups within each plugin auto-enable pass so large `agents.list` configs no longer peg CPU and repeatedly reread plugin catalogs during doctor/plugins resolution. (#66246) Thanks @yfge. - Agents/local models: clarify low-context preflight hints for self-hosted models, point config-backed caps at the relevant OpenClaw setting, and stop suggesting larger models when `agents.defaults.contextTokens` is the real limit. (#66236) Thanks @ImLukeF. - Browser/SSRF: restore hostname navigation under the default browser SSRF policy while keeping explicit strict mode reachable from config, and keep managed loopback CDP `/json/new` fallback requests on the local CDP control policy so browser follow-up fixes stop regressing normal navigation or self-blocking local CDP control. (#66386) Thanks @obviyus. diff --git a/src/agents/openai-completions-compat.test.ts b/src/agents/openai-completions-compat.test.ts new file mode 100644 index 00000000000..efad45ece06 --- /dev/null +++ b/src/agents/openai-completions-compat.test.ts @@ -0,0 +1,34 @@ +import { describe, expect, it } from "vitest"; +import { resolveOpenAICompletionsCompatDefaults } from "./openai-completions-compat.js"; + +describe("resolveOpenAICompletionsCompatDefaults", () => { + it("enables streaming usage for local ollama OpenAI-compat endpoints", () => { + expect( + resolveOpenAICompletionsCompatDefaults({ + provider: "ollama", + endpointClass: "local", + knownProviderFamily: "ollama", + }).supportsUsageInStreaming, + ).toBe(true); + }); + + it("keeps streaming usage enabled for custom ollama OpenAI-compat endpoints", () => { + expect( + resolveOpenAICompletionsCompatDefaults({ + provider: "ollama", + endpointClass: "custom", + knownProviderFamily: "ollama", + }).supportsUsageInStreaming, + ).toBe(true); + }); + + it("does not broaden streaming usage for generic custom providers", () => { + expect( + resolveOpenAICompletionsCompatDefaults({ + provider: "custom-cpa", + endpointClass: "custom", + knownProviderFamily: "custom-cpa", + }).supportsUsageInStreaming, + ).toBe(false); + }); +}); diff --git a/src/agents/openai-completions-compat.ts b/src/agents/openai-completions-compat.ts index 6492817ee1b..ae7d2744fdf 100644 --- a/src/agents/openai-completions-compat.ts +++ b/src/agents/openai-completions-compat.ts @@ -33,6 +33,7 @@ export function resolveOpenAICompletionsCompatDefaults( input: OpenAICompletionsCompatDefaultsInput, ): OpenAICompletionsCompatDefaults { const { + provider, endpointClass, knownProviderFamily, supportsNativeStreamingUsageCompat = false, @@ -64,7 +65,8 @@ export function resolveOpenAICompletionsCompatDefaults( endpointClass === "chutes-native" || endpointClass === "mistral-public" || knownProviderFamily === "mistral" || - (isDefaultRoute && isDefaultRouteProvider(input.provider, "chutes")); + (isDefaultRoute && isDefaultRouteProvider(provider, "chutes")); + const isOllamaCompatProvider = provider === "ollama"; return { supportsStore: @@ -76,7 +78,8 @@ export function resolveOpenAICompletionsCompatDefaults( endpointClass !== "xai-native" && !usesExplicitProxyLikeEndpoint, supportsUsageInStreaming: - !isNonStandard && (!usesConfiguredNonOpenAIEndpoint || supportsNativeStreamingUsageCompat), + isOllamaCompatProvider || + (!isNonStandard && (!usesConfiguredNonOpenAIEndpoint || supportsNativeStreamingUsageCompat)), maxTokensField: usesMaxTokens ? "max_tokens" : "max_completion_tokens", thinkingFormat: isZai ? "zai" : isOpenRouterLike ? "openrouter" : "openai", supportsStrictMode: !isZai && !usesConfiguredNonOpenAIEndpoint, diff --git a/src/agents/openai-transport-stream.test.ts b/src/agents/openai-transport-stream.test.ts index 1c7e5b4a101..30479b5adee 100644 --- a/src/agents/openai-transport-stream.test.ts +++ b/src/agents/openai-transport-stream.test.ts @@ -1196,6 +1196,33 @@ describe("openai transport stream", () => { expect(params.stream_options).toMatchObject({ include_usage: true }); }); + it("enables streaming usage compat for Ollama OpenAI-compat endpoints", () => { + const params = buildOpenAICompletionsParams( + { + id: "qwen2.5:7b", + name: "Qwen 2.5 7B", + api: "openai-completions", + provider: "ollama", + baseUrl: "http://127.0.0.1:11434/v1", + reasoning: true, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 32768, + maxTokens: 8192, + } satisfies Model<"openai-completions">, + { + systemPrompt: "system", + messages: [], + tools: [], + } as never, + undefined, + ) as { + stream_options?: { include_usage?: boolean }; + }; + + expect(params.stream_options).toMatchObject({ include_usage: true }); + }); + it("disables developer-role-only compat defaults for configured custom proxy completions providers", () => { const params = buildOpenAICompletionsParams( {