From 930b443c9e2b105ff6a88d37a03bfc61e3b0fe4e Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Mon, 27 Apr 2026 20:30:00 +0100 Subject: [PATCH] fix(ollama): preserve streaming usage compat --- CHANGELOG.md | 1 + extensions/ollama/src/provider-models.test.ts | 5 ++++- extensions/ollama/src/provider-models.ts | 5 +++-- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fd3a2471655..4301cbaeb7f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ Docs: https://docs.openclaw.ai - CLI/models: keep route-first `models status --json` stdout reserved for the JSON payload by routing auth-profile and startup diagnostics to stderr. Fixes #72962. Thanks @vishutdhar. - Sessions: ignore future-dated session activity timestamps during reset freshness checks and cap future `updatedAt` values at the merge boundary so clock-skewed messages cannot keep stale sessions alive forever. Fixes #72989. Thanks @martingarramon. - Plugins/CLI: allow managed plugin installs when the active extensions root is a symlink to a real state directory, while keeping nested target symlinks blocked and suppressing misleading hook-pack fallback errors for install-boundary failures. Fixes #72946. Thanks @mayank6136. +- Providers/Ollama: mark discovered Ollama catalog models as supporting streaming usage metadata so token accounting stays enabled for local models. (#72976) Thanks @sdeyang. - Gateway/startup: keep hot Gateway boot paths on leaf config imports and add max-RSS reporting to the gateway startup bench so low-memory startup regressions are visible before release. Thanks @vincentkoc. - WebChat: read `chat.history` from active transcript branches, drop stale streamed assistant tails once final history catches up, and coalesce duplicate in-flight Control UI submits, so rewritten prompts, completed replies, and rapid send events no longer render or process twice. Fixes #72975, #72963, and #72974. Thanks @dmagdici, @lhtpluto, and @Benjamin5281999. - WebChat/TTS: persist automatic final-mode TTS audio as a supplemental audio-only transcript update instead of adding a second assistant message with the same visible text. Fixes #72830. Thanks @lhtpluto. diff --git a/extensions/ollama/src/provider-models.test.ts b/extensions/ollama/src/provider-models.test.ts index ea4dbf8933d..5510e516519 100644 --- a/extensions/ollama/src/provider-models.test.ts +++ b/extensions/ollama/src/provider-models.test.ts @@ -261,15 +261,17 @@ describe("ollama provider models", () => { expect(visionModel.input).toEqual(["text", "image"]); expect(visionModel.reasoning).toBe(true); expect(visionModel.compat?.supportsTools).toBe(true); + expect(visionModel.compat?.supportsUsageInStreaming).toBe(true); const textModel = buildOllamaModelDefinition("glm-5.1:cloud", 202752, ["completion", "tools"]); expect(textModel.input).toEqual(["text"]); expect(textModel.reasoning).toBe(false); expect(textModel.compat?.supportsTools).toBe(true); + expect(textModel.compat?.supportsUsageInStreaming).toBe(true); const noCapabilities = buildOllamaModelDefinition("unknown-model", 65536); expect(noCapabilities.input).toEqual(["text"]); - expect(noCapabilities.compat).toBeUndefined(); + expect(noCapabilities.compat?.supportsUsageInStreaming).toBe(true); }); it("disables tool support when Ollama capabilities omit tools", () => { @@ -277,6 +279,7 @@ describe("ollama provider models", () => { expect(model.reasoning).toBe(false); expect(model.compat?.supportsTools).toBe(false); + expect(model.compat?.supportsUsageInStreaming).toBe(true); }); it("parses the last positive Modelfile num_ctx value", () => { diff --git a/extensions/ollama/src/provider-models.ts b/extensions/ollama/src/provider-models.ts index 401c88ab83b..bd92dc2a3a4 100644 --- a/extensions/ollama/src/provider-models.ts +++ b/extensions/ollama/src/provider-models.ts @@ -249,9 +249,10 @@ export function buildOllamaModelDefinition( : capabilities.includes("thinking"); const compat = capabilities === undefined - ? undefined + ? { supportsUsageInStreaming: true } : { supportsTools: capabilities.includes("tools"), + supportsUsageInStreaming: true, }; return { id: modelId, @@ -261,7 +262,7 @@ export function buildOllamaModelDefinition( cost: OLLAMA_DEFAULT_COST, contextWindow: contextWindow ?? OLLAMA_DEFAULT_CONTEXT_WINDOW, maxTokens: OLLAMA_DEFAULT_MAX_TOKENS, - ...(compat ? { compat } : {}), + compat, }; }