fix(ollama): preserve streaming usage compat

2026-05-06 07:00:43 +00:00 · 2026-04-27 20:30:00 +01:00
parent cff991c88d
commit 930b443c9e
3 changed files with 8 additions and 3 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -19,6 +19,7 @@ Docs: https://docs.openclaw.ai
 - CLI/models: keep route-first `models status --json` stdout reserved for the JSON payload by routing auth-profile and startup diagnostics to stderr. Fixes #72962. Thanks @vishutdhar.
 - Sessions: ignore future-dated session activity timestamps during reset freshness checks and cap future `updatedAt` values at the merge boundary so clock-skewed messages cannot keep stale sessions alive forever. Fixes #72989. Thanks @martingarramon.
 - Plugins/CLI: allow managed plugin installs when the active extensions root is a symlink to a real state directory, while keeping nested target symlinks blocked and suppressing misleading hook-pack fallback errors for install-boundary failures. Fixes #72946. Thanks @mayank6136.
+- Providers/Ollama: mark discovered Ollama catalog models as supporting streaming usage metadata so token accounting stays enabled for local models. (#72976) Thanks @sdeyang.
 - Gateway/startup: keep hot Gateway boot paths on leaf config imports and add max-RSS reporting to the gateway startup bench so low-memory startup regressions are visible before release. Thanks @vincentkoc.
 - WebChat: read `chat.history` from active transcript branches, drop stale streamed assistant tails once final history catches up, and coalesce duplicate in-flight Control UI submits, so rewritten prompts, completed replies, and rapid send events no longer render or process twice. Fixes #72975, #72963, and #72974. Thanks @dmagdici, @lhtpluto, and @Benjamin5281999.
 - WebChat/TTS: persist automatic final-mode TTS audio as a supplemental audio-only transcript update instead of adding a second assistant message with the same visible text. Fixes #72830. Thanks @lhtpluto.
--- a/extensions/ollama/src/provider-models.test.ts
+++ b/extensions/ollama/src/provider-models.test.ts
@@ -261,15 +261,17 @@ describe("ollama provider models", () => {
    expect(visionModel.input).toEqual(["text", "image"]);
    expect(visionModel.reasoning).toBe(true);
    expect(visionModel.compat?.supportsTools).toBe(true);
+    expect(visionModel.compat?.supportsUsageInStreaming).toBe(true);

    const textModel = buildOllamaModelDefinition("glm-5.1:cloud", 202752, ["completion", "tools"]);
    expect(textModel.input).toEqual(["text"]);
    expect(textModel.reasoning).toBe(false);
    expect(textModel.compat?.supportsTools).toBe(true);
+    expect(textModel.compat?.supportsUsageInStreaming).toBe(true);

    const noCapabilities = buildOllamaModelDefinition("unknown-model", 65536);
    expect(noCapabilities.input).toEqual(["text"]);
-    expect(noCapabilities.compat).toBeUndefined();
+    expect(noCapabilities.compat?.supportsUsageInStreaming).toBe(true);
  });

  it("disables tool support when Ollama capabilities omit tools", () => {
@@ -277,6 +279,7 @@ describe("ollama provider models", () => {

    expect(model.reasoning).toBe(false);
    expect(model.compat?.supportsTools).toBe(false);
+    expect(model.compat?.supportsUsageInStreaming).toBe(true);
  });

  it("parses the last positive Modelfile num_ctx value", () => {
--- a/extensions/ollama/src/provider-models.ts
+++ b/extensions/ollama/src/provider-models.ts
@@ -249,9 +249,10 @@ export function buildOllamaModelDefinition(
      : capabilities.includes("thinking");
  const compat =
    capabilities === undefined
-      ? undefined
+      ? { supportsUsageInStreaming: true }
      : {
          supportsTools: capabilities.includes("tools"),
+          supportsUsageInStreaming: true,
        };
  return {
    id: modelId,
@@ -261,7 +262,7 @@ export function buildOllamaModelDefinition(
    cost: OLLAMA_DEFAULT_COST,
    contextWindow: contextWindow ?? OLLAMA_DEFAULT_CONTEXT_WINDOW,
    maxTokens: OLLAMA_DEFAULT_MAX_TOKENS,
-    ...(compat ? { compat } : {}),
+    compat,
  };
 }