fix: bound gateway usage cost cache (#68842)

2026-05-06 08:10:44 +00:00 · 2026-04-21 05:09:41 +01:00
parent 8bf57e8bde
commit fb2c405dbc
3 changed files with 72 additions and 21 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -26,6 +26,7 @@ Docs: https://docs.openclaw.ai
 - Webchat/images: treat inline image attachments as media for empty-turn gating while still ignoring metadata-only blank turns. (#69474) Thanks @Jaswir.
 - Discord/think: only show `adaptive` in `/think` autocomplete for provider/model pairs that actually support provider-managed adaptive thinking, so GPT/OpenAI models no longer advertise an Anthropic-only option.
 - Thinking: only expose `max` for models that explicitly support provider max reasoning, and remap stored `max` settings to the largest supported thinking mode when users switch to another model.
+- Gateway/usage: bound the cost usage cache with FIFO eviction so date/range lookups cannot grow unbounded. (#68842) Thanks @Feelw00.
 - OpenAI/Responses: resolve `/think` levels against each GPT model's supported reasoning efforts so `/think off` no longer becomes high reasoning or sends unsupported `reasoning.effort: "none"` payloads.
 - Lobster/TaskFlow: allow managed approval resumes to use `approvalId` without a resume token, and persist that id in approval wait state. (#69559) Thanks @kirkluokun.
 - Plugins/startup: install bundled runtime dependencies into each plugin's own runtime directory, reuse source-checkout repair caches after rebuilds, and log only packages that were actually installed so repeated Gateway starts stay quiet once deps are present.
--- a/src/gateway/server-methods/usage.cost-usage-cache.test.ts
+++ b/src/gateway/server-methods/usage.cost-usage-cache.test.ts
@@ -21,26 +21,34 @@
 import { beforeEach, describe, expect, it, vi } from "vitest";
 import type { OpenClawConfig } from "../../config/config.js";

+const mocks = vi.hoisted(() => ({
+  loadCostUsageSummary: vi.fn(),
+}));
+
+function createSummary() {
+  return {
+    updatedAt: Date.now(),
+    startDate: "2026-02-01",
+    endDate: "2026-02-02",
+    daily: [],
+    totals: {
+      totalTokens: 1,
+      input: 0,
+      output: 0,
+      cacheRead: 0,
+      cacheWrite: 0,
+      totalCost: 0,
+    },
+  };
+}
+
 vi.mock("../../infra/session-cost-usage.js", async () => {
  const actual = await vi.importActual<typeof import("../../infra/session-cost-usage.js")>(
    "../../infra/session-cost-usage.js",
  );
  return {
    ...actual,
-    loadCostUsageSummary: vi.fn(async () => ({
-      updatedAt: Date.now(),
-      startDate: "2026-02-01",
-      endDate: "2026-02-02",
-      daily: [],
-      totals: {
-        totalTokens: 1,
-        input: 0,
-        output: 0,
-        cacheRead: 0,
-        cacheWrite: 0,
-        totalCost: 0,
-      },
-    })),
+    loadCostUsageSummary: mocks.loadCostUsageSummary,
  };
 });

@@ -53,6 +61,7 @@ describe("costUsageCache bounded growth", () => {
    __test.costUsageCache.clear();
    vi.useRealTimers();
    vi.clearAllMocks();
+    mocks.loadCostUsageSummary.mockResolvedValue(createSummary());
  });

  it("does not grow without bound when (startMs, endMs) varies across day rollover and range switches", async () => {
@@ -87,4 +96,38 @@ describe("costUsageCache bounded growth", () => {
    const firstCacheKey = `${firstStartMs}-${firstEndMs}`;
    expect(__test.costUsageCache.has(firstCacheKey)).toBe(false);
  });
+
+  it("evicts settled entries before in-flight entries when possible", async () => {
+    const config = {} as OpenClawConfig;
+    const pending = new Promise<ReturnType<typeof createSummary>>(() => {});
+    mocks.loadCostUsageSummary.mockReturnValueOnce(pending);
+
+    const inFlight = __test.loadCostUsageSummaryCached({
+      startMs: 1,
+      endMs: 2,
+      config,
+    });
+    await Promise.resolve();
+
+    for (let i = 0; i < 256; i++) {
+      const startMs = Date.UTC(2026, 0, 1) + i * DAY_MS;
+      await __test.loadCostUsageSummaryCached({
+        startMs,
+        endMs: startMs + DAY_MS - 1,
+        config,
+      });
+    }
+
+    const repeated = __test.loadCostUsageSummaryCached({
+      startMs: 1,
+      endMs: 2,
+      config,
+    });
+    await Promise.resolve();
+
+    expect(__test.costUsageCache.has("1-2")).toBe(true);
+    expect(mocks.loadCostUsageSummary).toHaveBeenCalledTimes(257);
+    void inFlight.catch(() => {});
+    void repeated.catch(() => {});
+  });
 });
--- a/src/gateway/server-methods/usage.ts
+++ b/src/gateway/server-methods/usage.ts
@@ -65,15 +65,22 @@ type CostUsageCacheEntry = {

 const costUsageCache = new Map<string, CostUsageCacheEntry>();

-// Store an entry with FIFO eviction when adding a new key would exceed the
-// cap. Mirrors the pattern in session-transcript-key.ts and server-session-key.ts
-// so the cache stays bounded under sliding-window usage queries (each
-// day/range combination produces a distinct key).
+function findCostUsageCacheEvictionKey(): string | undefined {
+  for (const [key, entry] of costUsageCache) {
+    if (!entry.inFlight) {
+      return key;
+    }
+  }
+  return costUsageCache.keys().next().value;
+}
+
+// Keep the cache bounded while preserving in-flight request coalescing when a
+// settled entry is available to evict.
 function setCostUsageCache(cacheKey: string, entry: CostUsageCacheEntry): void {
  if (!costUsageCache.has(cacheKey) && costUsageCache.size >= COST_USAGE_CACHE_MAX) {
-    const oldest = costUsageCache.keys().next().value;
-    if (oldest !== undefined) {
-      costUsageCache.delete(oldest);
+    const evictKey = findCostUsageCacheEvictionKey();
+    if (evictKey !== undefined) {
+      costUsageCache.delete(evictKey);
    }
  }
  costUsageCache.set(cacheKey, entry);