From 930b443c9e2b105ff6a88d37a03bfc61e3b0fe4e Mon Sep 17 00:00:00 2001
From: Peter Steinberger <steipete@gmail.com>
Date: Mon, 27 Apr 2026 20:30:00 +0100
Subject: [PATCH] fix(ollama): preserve streaming usage compat

---
 CHANGELOG.md                                  | 1 +
 extensions/ollama/src/provider-models.test.ts | 5 ++++-
 extensions/ollama/src/provider-models.ts      | 5 +++--
 3 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index fd3a2471655..4301cbaeb7f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -19,6 +19,7 @@ Docs: https://docs.openclaw.ai
 - CLI/models: keep route-first `models status --json` stdout reserved for the JSON payload by routing auth-profile and startup diagnostics to stderr. Fixes #72962. Thanks @vishutdhar.
 - Sessions: ignore future-dated session activity timestamps during reset freshness checks and cap future `updatedAt` values at the merge boundary so clock-skewed messages cannot keep stale sessions alive forever. Fixes #72989. Thanks @martingarramon.
 - Plugins/CLI: allow managed plugin installs when the active extensions root is a symlink to a real state directory, while keeping nested target symlinks blocked and suppressing misleading hook-pack fallback errors for install-boundary failures. Fixes #72946. Thanks @mayank6136.
+- Providers/Ollama: mark discovered Ollama catalog models as supporting streaming usage metadata so token accounting stays enabled for local models. (#72976) Thanks @sdeyang.
 - Gateway/startup: keep hot Gateway boot paths on leaf config imports and add max-RSS reporting to the gateway startup bench so low-memory startup regressions are visible before release. Thanks @vincentkoc.
 - WebChat: read `chat.history` from active transcript branches, drop stale streamed assistant tails once final history catches up, and coalesce duplicate in-flight Control UI submits, so rewritten prompts, completed replies, and rapid send events no longer render or process twice. Fixes #72975, #72963, and #72974. Thanks @dmagdici, @lhtpluto, and @Benjamin5281999.
 - WebChat/TTS: persist automatic final-mode TTS audio as a supplemental audio-only transcript update instead of adding a second assistant message with the same visible text. Fixes #72830. Thanks @lhtpluto.
diff --git a/extensions/ollama/src/provider-models.test.ts b/extensions/ollama/src/provider-models.test.ts
index ea4dbf8933d..5510e516519 100644
--- a/extensions/ollama/src/provider-models.test.ts
+++ b/extensions/ollama/src/provider-models.test.ts
@@ -261,15 +261,17 @@ describe("ollama provider models", () => {
     expect(visionModel.input).toEqual(["text", "image"]);
     expect(visionModel.reasoning).toBe(true);
     expect(visionModel.compat?.supportsTools).toBe(true);
+    expect(visionModel.compat?.supportsUsageInStreaming).toBe(true);
 
     const textModel = buildOllamaModelDefinition("glm-5.1:cloud", 202752, ["completion", "tools"]);
     expect(textModel.input).toEqual(["text"]);
     expect(textModel.reasoning).toBe(false);
     expect(textModel.compat?.supportsTools).toBe(true);
+    expect(textModel.compat?.supportsUsageInStreaming).toBe(true);
 
     const noCapabilities = buildOllamaModelDefinition("unknown-model", 65536);
     expect(noCapabilities.input).toEqual(["text"]);
-    expect(noCapabilities.compat).toBeUndefined();
+    expect(noCapabilities.compat?.supportsUsageInStreaming).toBe(true);
   });
 
   it("disables tool support when Ollama capabilities omit tools", () => {
@@ -277,6 +279,7 @@ describe("ollama provider models", () => {
 
     expect(model.reasoning).toBe(false);
     expect(model.compat?.supportsTools).toBe(false);
+    expect(model.compat?.supportsUsageInStreaming).toBe(true);
   });
 
   it("parses the last positive Modelfile num_ctx value", () => {
diff --git a/extensions/ollama/src/provider-models.ts b/extensions/ollama/src/provider-models.ts
index 401c88ab83b..bd92dc2a3a4 100644
--- a/extensions/ollama/src/provider-models.ts
+++ b/extensions/ollama/src/provider-models.ts
@@ -249,9 +249,10 @@ export function buildOllamaModelDefinition(
       : capabilities.includes("thinking");
   const compat =
     capabilities === undefined
-      ? undefined
+      ? { supportsUsageInStreaming: true }
       : {
           supportsTools: capabilities.includes("tools"),
+          supportsUsageInStreaming: true,
         };
   return {
     id: modelId,
@@ -261,7 +262,7 @@ export function buildOllamaModelDefinition(
     cost: OLLAMA_DEFAULT_COST,
     contextWindow: contextWindow ?? OLLAMA_DEFAULT_CONTEXT_WINDOW,
     maxTokens: OLLAMA_DEFAULT_MAX_TOKENS,
-    ...(compat ? { compat } : {}),
+    compat,
   };
 }