fix: avoid cumulative codex usage as context (#64669) (thanks @cyrusaf)

2026-05-06 15:20:44 +00:00 · 2026-04-10 22:49:17 -07:00
parent 4e2541e5fb
commit 9a94194329
3 changed files with 156 additions and 10 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,6 +11,7 @@ Docs: https://docs.openclaw.ai
 - Agents/channels: route cross-agent subagent spawns through the target agent's bound channel account while preserving peer and workspace/role-scoped bindings, so child sessions no longer inherit the caller's account in shared rooms, workspaces, or multi-account setups. (#67508) Thanks @lukeboyett and @gumadeiras.
 - Telegram/callbacks: treat permanent callback edit errors as completed updates so stale command pagination buttons no longer wedge the update watermark and block newer Telegram updates. (#68588) Thanks @Lucenx9.
 - Browser/CDP: allow the selected remote CDP profile host for CDP health and control checks without widening browser navigation SSRF policy, so WSL-to-Windows Chrome endpoints no longer appear offline under strict defaults. Fixes #68108. (#68207) Thanks @Mlightsnow.
+- Codex: stop cumulative app-server token totals from being treated as fresh context usage, so session status no longer reports inflated context percentages after long Codex threads. (#64669) Thanks @cyrusaf.

 ## 2026.4.18

--- a/extensions/codex/src/app-server/event-projector.test.ts
+++ b/extensions/codex/src/app-server/event-projector.test.ts
@@ -50,7 +50,13 @@ describe("CodexAppServerEventProjector", () => {
        turnId: "turn-1",
        tokenUsage: {
          total: {
-            totalTokens: 12,
+            totalTokens: 900_000,
+            inputTokens: 700_000,
+            cachedInputTokens: 100_000,
+            outputTokens: 100_000,
+          },
+          last: {
+            totalTokens: 14,
            inputTokens: 5,
            cachedInputTokens: 2,
            outputTokens: 7,
@@ -83,10 +89,98 @@ describe("CodexAppServerEventProjector", () => {
    expect(result.assistantTexts).toEqual(["hello"]);
    expect(result.messagesSnapshot.map((message) => message.role)).toEqual(["user", "assistant"]);
    expect(result.lastAssistant?.content).toEqual([{ type: "text", text: "hello" }]);
-    expect(result.attemptUsage).toMatchObject({ input: 5, output: 7, cacheRead: 2, total: 12 });
+    expect(result.attemptUsage).toMatchObject({ input: 5, output: 7, cacheRead: 2, total: 14 });
+    expect(result.lastAssistant?.usage).toMatchObject({
+      input: 5,
+      output: 7,
+      cacheRead: 2,
+      totalTokens: 14,
+    });
    expect(result.replayMetadata.replaySafe).toBe(true);
  });

+  it("does not treat cumulative-only token usage as fresh context usage", async () => {
+    const params = createParams();
+    const projector = new CodexAppServerEventProjector(params, "thread-1", "turn-1");
+
+    await projector.handleNotification({
+      method: "item/agentMessage/delta",
+      params: { threadId: "thread-1", turnId: "turn-1", itemId: "msg-1", delta: "done" },
+    });
+    await projector.handleNotification({
+      method: "thread/tokenUsage/updated",
+      params: {
+        threadId: "thread-1",
+        turnId: "turn-1",
+        tokenUsage: {
+          total: {
+            totalTokens: 1_000_000,
+            inputTokens: 999_000,
+            cachedInputTokens: 500,
+            outputTokens: 500,
+          },
+        },
+      },
+    });
+
+    const result = projector.buildResult({
+      didSendViaMessagingTool: false,
+      messagingToolSentTexts: [],
+      messagingToolSentMediaUrls: [],
+      messagingToolSentTargets: [],
+    });
+
+    expect(result.assistantTexts).toEqual(["done"]);
+    expect(result.attemptUsage).toBeUndefined();
+    expect(result.lastAssistant?.usage).toMatchObject({
+      input: 0,
+      output: 0,
+      cacheRead: 0,
+      totalTokens: 0,
+    });
+  });
+
+  it("normalizes snake_case current token usage fields", async () => {
+    const params = createParams();
+    const projector = new CodexAppServerEventProjector(params, "thread-1", "turn-1");
+
+    await projector.handleNotification({
+      method: "item/agentMessage/delta",
+      params: { threadId: "thread-1", turnId: "turn-1", itemId: "msg-1", delta: "done" },
+    });
+    await projector.handleNotification({
+      method: "thread/tokenUsage/updated",
+      params: {
+        threadId: "thread-1",
+        turnId: "turn-1",
+        tokenUsage: {
+          total: { total_tokens: 1_000_000 },
+          last_token_usage: {
+            total_tokens: 20,
+            input_tokens: 8,
+            cached_input_tokens: 3,
+            output_tokens: 9,
+          },
+        },
+      },
+    });
+
+    const result = projector.buildResult({
+      didSendViaMessagingTool: false,
+      messagingToolSentTexts: [],
+      messagingToolSentMediaUrls: [],
+      messagingToolSentTargets: [],
+    });
+
+    expect(result.attemptUsage).toMatchObject({ input: 8, output: 9, cacheRead: 3, total: 20 });
+    expect(result.lastAssistant?.usage).toMatchObject({
+      input: 8,
+      output: 9,
+      cacheRead: 3,
+      totalTokens: 20,
+    });
+  });
+
  it("keeps intermediate agentMessage items out of the final visible reply", async () => {
    const onAssistantMessageStart = vi.fn();
    const onPartialReply = vi.fn();
--- a/extensions/codex/src/app-server/event-projector.ts
+++ b/extensions/codex/src/app-server/event-projector.ts
@@ -42,6 +42,15 @@ const ZERO_USAGE: Usage = {
  },
 };

+const CURRENT_TOKEN_USAGE_KEYS = [
+  "last",
+  "current",
+  "lastCall",
+  "lastCallUsage",
+  "lastTokenUsage",
+  "last_token_usage",
+] as const;
+
 export class CodexAppServerEventProjector {
  private readonly assistantTextByItem = new Map<string, string>();
  private readonly assistantItemOrder: string[] = [];
@@ -327,16 +336,16 @@ export class CodexAppServerEventProjector {

  private handleTokenUsage(params: JsonObject): void {
    const tokenUsage = isJsonObject(params.tokenUsage) ? params.tokenUsage : undefined;
-    const total = tokenUsage && isJsonObject(tokenUsage.total) ? tokenUsage.total : undefined;
-    if (!total) {
+    const current =
+      (tokenUsage ? readFirstJsonObject(tokenUsage, CURRENT_TOKEN_USAGE_KEYS) : undefined) ??
+      readFirstJsonObject(params, CURRENT_TOKEN_USAGE_KEYS);
+    if (!current) {
      return;
    }
-    this.tokenUsage = normalizeUsage({
-      input: readNumber(total, "inputTokens"),
-      output: readNumber(total, "outputTokens"),
-      cacheRead: readNumber(total, "cachedInputTokens"),
-      total: readNumber(total, "totalTokens"),
-    });
+    const usage = normalizeCodexTokenUsage(current);
+    if (usage) {
+      this.tokenUsage = usage;
+    }
  }

  private async handleTurnCompleted(params: JsonObject): Promise<void> {
@@ -524,6 +533,48 @@ function readNumber(record: JsonObject, key: string): number | undefined {
  return typeof value === "number" && Number.isFinite(value) ? value : undefined;
 }

+function readFirstJsonObject(record: JsonObject, keys: readonly string[]): JsonObject | undefined {
+  for (const key of keys) {
+    const value = record[key];
+    if (isJsonObject(value)) {
+      return value;
+    }
+  }
+  return undefined;
+}
+
+function readNumberAlias(record: JsonObject, keys: readonly string[]): number | undefined {
+  for (const key of keys) {
+    const value = readNumber(record, key);
+    if (value !== undefined) {
+      return value;
+    }
+  }
+  return undefined;
+}
+
+function normalizeCodexTokenUsage(record: JsonObject): NormalizedUsage | undefined {
+  return normalizeUsage({
+    input: readNumberAlias(record, ["inputTokens", "input_tokens", "input", "promptTokens"]),
+    output: readNumberAlias(record, ["outputTokens", "output_tokens", "output"]),
+    cacheRead: readNumberAlias(record, [
+      "cachedInputTokens",
+      "cached_input_tokens",
+      "cacheRead",
+      "cache_read",
+      "cache_read_input_tokens",
+      "cached_tokens",
+    ]),
+    cacheWrite: readNumberAlias(record, [
+      "cacheWrite",
+      "cache_write",
+      "cacheCreationInputTokens",
+      "cache_creation_input_tokens",
+    ]),
+    total: readNumberAlias(record, ["totalTokens", "total_tokens", "total"]),
+  });
+}
+
 function splitPlanText(text: string): string[] {
  return text
    .split(/\r?\n/)