From 455bc1ebbab5bb55676875b07b5666de911b1c4e Mon Sep 17 00:00:00 2001 From: Akari Date: Thu, 12 Feb 2026 23:01:36 +0900 Subject: [PATCH] fix: use last API call's cache tokens for context-size display (#13698) (#13805) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The UsageAccumulator sums cacheRead/cacheWrite across all API calls within a single turn. With Anthropic prompt caching, each call reports cacheRead ≈ current_context_size, so after N tool-call round-trips the accumulated total becomes N × actual_context, which gets clamped to contextWindow (200k) by deriveSessionTotalTokens(). Fix: track the most recent API call's cache fields separately and use them in toNormalizedUsage() for context-size reporting. This makes /status Context display accurate while preserving accumulated output token counts. Fixes #13698 Fixes #13782 Co-authored-by: akari-musubi <259925157+akari-musubi@users.noreply.github.com> --- src/agents/pi-embedded-runner/run.ts | 31 +++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts index 7fa46ced3b1..f4bdda6d652 100644 --- a/src/agents/pi-embedded-runner/run.ts +++ b/src/agents/pi-embedded-runner/run.ts @@ -80,6 +80,10 @@ type UsageAccumulator = { cacheRead: number; cacheWrite: number; total: number; + /** Cache fields from the most recent API call (not accumulated). */ + lastCacheRead: number; + lastCacheWrite: number; + lastInput: number; }; const createUsageAccumulator = (): UsageAccumulator => ({ @@ -88,6 +92,9 @@ const createUsageAccumulator = (): UsageAccumulator => ({ cacheRead: 0, cacheWrite: 0, total: 0, + lastCacheRead: 0, + lastCacheWrite: 0, + lastInput: 0, }); const hasUsageValues = ( @@ -112,6 +119,12 @@ const mergeUsageIntoAccumulator = ( target.total += usage.total ?? (usage.input ?? 0) + (usage.output ?? 0) + (usage.cacheRead ?? 0) + (usage.cacheWrite ?? 0); + // Track the most recent API call's cache fields for accurate context-size reporting. + // Accumulated cache totals inflate context size when there are multiple tool-call round-trips, + // since each call reports cacheRead ≈ current_context_size. + target.lastCacheRead = usage.cacheRead ?? 0; + target.lastCacheWrite = usage.cacheWrite ?? 0; + target.lastInput = usage.input ?? 0; }; const toNormalizedUsage = (usage: UsageAccumulator) => { @@ -124,13 +137,21 @@ const toNormalizedUsage = (usage: UsageAccumulator) => { if (!hasUsage) { return undefined; } - const derivedTotal = usage.input + usage.output + usage.cacheRead + usage.cacheWrite; + // Use the LAST API call's cache fields for context-size calculation. + // The accumulated cacheRead/cacheWrite inflate context size because each tool-call + // round-trip reports cacheRead ≈ current_context_size, and summing N calls gives + // N × context_size which gets clamped to contextWindow (e.g. 200k). + // See: https://github.com/openclaw/openclaw/issues/13698 + // + // We use lastInput/lastCacheRead/lastCacheWrite (from the most recent API call) for + // cache-related fields, but keep accumulated output (total generated text this turn). + const lastPromptTokens = usage.lastInput + usage.lastCacheRead + usage.lastCacheWrite; return { - input: usage.input || undefined, + input: usage.lastInput || undefined, output: usage.output || undefined, - cacheRead: usage.cacheRead || undefined, - cacheWrite: usage.cacheWrite || undefined, - total: usage.total || derivedTotal || undefined, + cacheRead: usage.lastCacheRead || undefined, + cacheWrite: usage.lastCacheWrite || undefined, + total: lastPromptTokens + usage.output || undefined, }; };