fix(ui): include cached tokens in context usage (#70532)

Fixes #70491. Includes cached prompt tokens in the Control UI context percent and keeps output tokens out of the percentage. Thanks @chen-zhang-cs-code.
2026-05-06 07:30:43 +00:00 · 2026-04-24 01:13:31 +08:00
parent 4792a50710
commit 6415e35f55
3 changed files with 52 additions and 1 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -65,6 +65,7 @@ Docs: https://docs.openclaw.ai
 - Plugins/install: add newly installed plugin ids to an existing `plugins.allow` list before enabling them, so allowlisted configs load installed plugins after restart.
 - Status: show `Fast` in `/status` when fast mode is enabled, including config/default-derived fast mode, and omit it when disabled.
 - OpenAI/image generation: detect Azure OpenAI-style image endpoints, use Azure `api-key` auth plus deployment-scoped image URLs, and honor `AZURE_OPENAI_API_VERSION` so image generation and edits work against Azure-hosted OpenAI resources. (#70570) Thanks @zhanggpcsu.
+- Control UI/chat: include cache-read and cache-write tokens when computing the message footer context percentage, so cached Claude/OpenAI sessions no longer show `0% ctx` while `/status` reports substantial context use. Fixes #70491. (#70532) Thanks @chen-zhang-cs-code.
 - Models/auth: merge provider-owned default-model additions from `openclaw models auth login` instead of replacing `agents.defaults.models`, so re-authenticating an OAuth provider such as OpenAI Codex no longer wipes other providers' aliases and per-model params. Migrations that must rename keys (Anthropic -> Claude CLI) opt in with `replaceDefaultModels`. Fixes #69414. (#70435) Thanks @neeravmakwana.
 - Media understanding/audio: prefer configured or key-backed STT providers before auto-detected local Whisper CLIs, so installed local transcription tools no longer shadow API providers such as Groq/OpenAI in `tools.media.audio` auto mode. Fixes #68727.
 - Providers/OpenAI: lock the auth picker wording for OpenAI API key, Codex browser login, and Codex device pairing so the setup choices no longer imply a mixed Codex/API-key auth path. (#67848) Thanks @tmlxrd.
--- a/ui/src/ui/chat/grouped-render.test.ts
+++ b/ui/src/ui/chat/grouped-render.test.ts
@@ -241,6 +241,53 @@ describe("grouped chat rendering", () => {
    expect(avatar?.getAttribute("src")).toBe("/openclaw-logo.svg");
  });

+  it("includes cache tokens when rendering assistant context usage", () => {
+    const container = document.createElement("div");
+
+    renderAssistantMessage(
+      container,
+      {
+        role: "assistant",
+        content: "Done",
+        usage: {
+          input: 1,
+          output: 1200,
+          cacheRead: 438_400,
+          cacheWrite: 307,
+        },
+        model: "anthropic/claude-opus-4-7",
+        timestamp: 1000,
+      },
+      { contextWindow: 1_000_000 },
+    );
+
+    expect(container.querySelector(".msg-meta__ctx")?.textContent).toBe("44% ctx");
+    expect(container.textContent).toContain("R438.4k");
+    expect(container.textContent).toContain("W307");
+  });
+
+  it("excludes output tokens when rendering assistant context usage", () => {
+    const container = document.createElement("div");
+
+    renderAssistantMessage(
+      container,
+      {
+        role: "assistant",
+        content: "Long response",
+        usage: {
+          input: 1_000,
+          output: 9_000,
+          cacheRead: 0,
+          cacheWrite: 0,
+        },
+        timestamp: 1000,
+      },
+      { contextWindow: 10_000 },
+    );
+
+    expect(container.querySelector(".msg-meta__ctx")?.textContent).toBe("10% ctx");
+  });
+
  it("renders the configured local user name in user message footers", () => {
    const container = document.createElement("div");

--- a/ui/src/ui/chat/grouped-render.ts
+++ b/ui/src/ui/chat/grouped-render.ts
@@ -402,8 +402,11 @@ function extractGroupMeta(group: MessageGroup, contextWindow: number | null): Gr
    return null;
  }

+  const promptTokens = input + cacheRead + cacheWrite;
  const contextPercent =
-    contextWindow && input > 0 ? Math.min(Math.round((input / contextWindow) * 100), 100) : null;
+    contextWindow && promptTokens > 0
+      ? Math.min(Math.round((promptTokens / contextWindow) * 100), 100)
+      : null;

  return { input, output, cacheRead, cacheWrite, cost, model, contextPercent };
 }