fix(ollama): preserve streaming usage compat

This commit is contained in:
Peter Steinberger
2026-04-27 20:30:00 +01:00
parent cff991c88d
commit 930b443c9e
3 changed files with 8 additions and 3 deletions

View File

@@ -19,6 +19,7 @@ Docs: https://docs.openclaw.ai
- CLI/models: keep route-first `models status --json` stdout reserved for the JSON payload by routing auth-profile and startup diagnostics to stderr. Fixes #72962. Thanks @vishutdhar.
- Sessions: ignore future-dated session activity timestamps during reset freshness checks and cap future `updatedAt` values at the merge boundary so clock-skewed messages cannot keep stale sessions alive forever. Fixes #72989. Thanks @martingarramon.
- Plugins/CLI: allow managed plugin installs when the active extensions root is a symlink to a real state directory, while keeping nested target symlinks blocked and suppressing misleading hook-pack fallback errors for install-boundary failures. Fixes #72946. Thanks @mayank6136.
- Providers/Ollama: mark discovered Ollama catalog models as supporting streaming usage metadata so token accounting stays enabled for local models. (#72976) Thanks @sdeyang.
- Gateway/startup: keep hot Gateway boot paths on leaf config imports and add max-RSS reporting to the gateway startup bench so low-memory startup regressions are visible before release. Thanks @vincentkoc.
- WebChat: read `chat.history` from active transcript branches, drop stale streamed assistant tails once final history catches up, and coalesce duplicate in-flight Control UI submits, so rewritten prompts, completed replies, and rapid send events no longer render or process twice. Fixes #72975, #72963, and #72974. Thanks @dmagdici, @lhtpluto, and @Benjamin5281999.
- WebChat/TTS: persist automatic final-mode TTS audio as a supplemental audio-only transcript update instead of adding a second assistant message with the same visible text. Fixes #72830. Thanks @lhtpluto.

View File

@@ -261,15 +261,17 @@ describe("ollama provider models", () => {
expect(visionModel.input).toEqual(["text", "image"]);
expect(visionModel.reasoning).toBe(true);
expect(visionModel.compat?.supportsTools).toBe(true);
expect(visionModel.compat?.supportsUsageInStreaming).toBe(true);
const textModel = buildOllamaModelDefinition("glm-5.1:cloud", 202752, ["completion", "tools"]);
expect(textModel.input).toEqual(["text"]);
expect(textModel.reasoning).toBe(false);
expect(textModel.compat?.supportsTools).toBe(true);
expect(textModel.compat?.supportsUsageInStreaming).toBe(true);
const noCapabilities = buildOllamaModelDefinition("unknown-model", 65536);
expect(noCapabilities.input).toEqual(["text"]);
expect(noCapabilities.compat).toBeUndefined();
expect(noCapabilities.compat?.supportsUsageInStreaming).toBe(true);
});
it("disables tool support when Ollama capabilities omit tools", () => {
@@ -277,6 +279,7 @@ describe("ollama provider models", () => {
expect(model.reasoning).toBe(false);
expect(model.compat?.supportsTools).toBe(false);
expect(model.compat?.supportsUsageInStreaming).toBe(true);
});
it("parses the last positive Modelfile num_ctx value", () => {

View File

@@ -249,9 +249,10 @@ export function buildOllamaModelDefinition(
: capabilities.includes("thinking");
const compat =
capabilities === undefined
? undefined
? { supportsUsageInStreaming: true }
: {
supportsTools: capabilities.includes("tools"),
supportsUsageInStreaming: true,
};
return {
id: modelId,
@@ -261,7 +262,7 @@ export function buildOllamaModelDefinition(
cost: OLLAMA_DEFAULT_COST,
contextWindow: contextWindow ?? OLLAMA_DEFAULT_CONTEXT_WINDOW,
maxTokens: OLLAMA_DEFAULT_MAX_TOKENS,
...(compat ? { compat } : {}),
compat,
};
}