From 455bc1ebbab5bb55676875b07b5666de911b1c4e Mon Sep 17 00:00:00 2001
From: Akari <musubiakari@gmail.com>
Date: Thu, 12 Feb 2026 23:01:36 +0900
Subject: [PATCH] fix: use last API call's cache tokens for context-size
 display (#13698) (#13805)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The UsageAccumulator sums cacheRead/cacheWrite across all API calls
within a single turn. With Anthropic prompt caching, each call reports
cacheRead ≈ current_context_size, so after N tool-call round-trips the
accumulated total becomes N × actual_context, which gets clamped to
contextWindow (200k) by deriveSessionTotalTokens().

Fix: track the most recent API call's cache fields separately and use
them in toNormalizedUsage() for context-size reporting. This makes
/status Context display accurate while preserving accumulated output
token counts.

Fixes #13698
Fixes #13782

Co-authored-by: akari-musubi <259925157+akari-musubi@users.noreply.github.com>
---
 src/agents/pi-embedded-runner/run.ts | 31 +++++++++++++++++++++++-----
 1 file changed, 26 insertions(+), 5 deletions(-)

diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts
index 7fa46ced3b1..f4bdda6d652 100644
--- a/src/agents/pi-embedded-runner/run.ts
+++ b/src/agents/pi-embedded-runner/run.ts
@@ -80,6 +80,10 @@ type UsageAccumulator = {
   cacheRead: number;
   cacheWrite: number;
   total: number;
+  /** Cache fields from the most recent API call (not accumulated). */
+  lastCacheRead: number;
+  lastCacheWrite: number;
+  lastInput: number;
 };
 
 const createUsageAccumulator = (): UsageAccumulator => ({
@@ -88,6 +92,9 @@ const createUsageAccumulator = (): UsageAccumulator => ({
   cacheRead: 0,
   cacheWrite: 0,
   total: 0,
+  lastCacheRead: 0,
+  lastCacheWrite: 0,
+  lastInput: 0,
 });
 
 const hasUsageValues = (
@@ -112,6 +119,12 @@ const mergeUsageIntoAccumulator = (
   target.total +=
     usage.total ??
     (usage.input ?? 0) + (usage.output ?? 0) + (usage.cacheRead ?? 0) + (usage.cacheWrite ?? 0);
+  // Track the most recent API call's cache fields for accurate context-size reporting.
+  // Accumulated cache totals inflate context size when there are multiple tool-call round-trips,
+  // since each call reports cacheRead ≈ current_context_size.
+  target.lastCacheRead = usage.cacheRead ?? 0;
+  target.lastCacheWrite = usage.cacheWrite ?? 0;
+  target.lastInput = usage.input ?? 0;
 };
 
 const toNormalizedUsage = (usage: UsageAccumulator) => {
@@ -124,13 +137,21 @@ const toNormalizedUsage = (usage: UsageAccumulator) => {
   if (!hasUsage) {
     return undefined;
   }
-  const derivedTotal = usage.input + usage.output + usage.cacheRead + usage.cacheWrite;
+  // Use the LAST API call's cache fields for context-size calculation.
+  // The accumulated cacheRead/cacheWrite inflate context size because each tool-call
+  // round-trip reports cacheRead ≈ current_context_size, and summing N calls gives
+  // N × context_size which gets clamped to contextWindow (e.g. 200k).
+  // See: https://github.com/openclaw/openclaw/issues/13698
+  //
+  // We use lastInput/lastCacheRead/lastCacheWrite (from the most recent API call) for
+  // cache-related fields, but keep accumulated output (total generated text this turn).
+  const lastPromptTokens = usage.lastInput + usage.lastCacheRead + usage.lastCacheWrite;
   return {
-    input: usage.input || undefined,
+    input: usage.lastInput || undefined,
     output: usage.output || undefined,
-    cacheRead: usage.cacheRead || undefined,
-    cacheWrite: usage.cacheWrite || undefined,
-    total: usage.total || derivedTotal || undefined,
+    cacheRead: usage.lastCacheRead || undefined,
+    cacheWrite: usage.lastCacheWrite || undefined,
+    total: lastPromptTokens + usage.output || undefined,
   };
 };