fix(memory): harden context window cache collisions

2026-05-06 04:10:46 +00:00 · 2026-02-15 19:31:41 -08:00
parent 559c8d9930
commit cbf58d2e1c
3 changed files with 52 additions and 11 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -52,6 +52,7 @@ Docs: https://docs.openclaw.ai
 - Subagents/Models: preserve `agents.defaults.model.fallbacks` when subagent sessions carry a model override, so subagent runs fail over to configured fallback models instead of retrying only the overridden primary model.
 - Agents/Security: sanitize workspace paths before embedding into LLM prompts (strip Unicode control/format chars) to prevent instruction injection via malicious directory names. Thanks @aether-ai-agent.
 - Agents/Context: apply configured model `contextWindow` overrides after provider discovery so `lookupContextTokens()` honors operator config values (including discovery-failure paths). (#17404) Thanks @michaelbship and @vignesh07.
+- Agents/Context: derive `lookupContextTokens()` from auth-available model metadata and keep the smallest discovered context window for duplicate model ids, preventing cross-provider cache collisions from overestimating session context limits. (#17586) Thanks @githabideri and @vignesh07.
 - Memory/FTS: make `buildFtsQuery` Unicode-aware so non-ASCII queries (including CJK) produce keyword tokens instead of falling back to vector-only search. (#17672) Thanks @KinGP5471.
 - Agents/OpenAI: force `store=true` for direct OpenAI Responses/Codex runs to preserve multi-turn server-side conversation state, while leaving proxy/non-OpenAI endpoints unchanged. (#16803) Thanks @mark9232 and @vignesh07.
 - Auto-reply/Compaction: resolve `memory/YYYY-MM-DD.md` placeholders with timezone-aware runtime dates and append a `Current time:` line to memory-flush turns, preventing wrong-year memory filenames without making the system prompt time-variant. (#17603, #17633) Thanks @nicholaspapadam-wq and @vignesh07.
--- a/src/agents/context.test.ts
+++ b/src/agents/context.test.ts
@@ -1,7 +1,22 @@
 import { describe, expect, it } from "vitest";
-import { applyConfiguredContextWindows } from "./context.js";
+import { applyConfiguredContextWindows, applyDiscoveredContextWindows } from "./context.js";
 import { createSessionManagerRuntimeRegistry } from "./pi-extensions/session-manager-runtime-registry.js";

+describe("applyDiscoveredContextWindows", () => {
+  it("keeps the smallest context window when duplicate model ids are discovered", () => {
+    const cache = new Map<string, number>();
+    applyDiscoveredContextWindows({
+      cache,
+      models: [
+        { id: "claude-sonnet-4-5", contextWindow: 1_000_000 },
+        { id: "claude-sonnet-4-5", contextWindow: 200_000 },
+      ],
+    });
+
+    expect(cache.get("claude-sonnet-4-5")).toBe(200_000);
+  });
+});
+
 describe("applyConfiguredContextWindows", () => {
  it("overrides discovered cache values with explicit models.providers contextWindow", () => {
    const cache = new Map<string, number>([["anthropic/claude-opus-4-6", 1_000_000]]);
--- a/src/agents/context.ts
+++ b/src/agents/context.ts
@@ -6,10 +6,36 @@ import { resolveOpenClawAgentDir } from "./agent-paths.js";
 import { ensureOpenClawModelsJson } from "./models-config.js";

 type ModelEntry = { id: string; contextWindow?: number };
+type ModelRegistryLike = {
+  getAvailable?: () => ModelEntry[];
+  getAll: () => ModelEntry[];
+};
 type ConfigModelEntry = { id?: string; contextWindow?: number };
 type ProviderConfigEntry = { models?: ConfigModelEntry[] };
 type ModelsConfig = { providers?: Record<string, ProviderConfigEntry | undefined> };

+export function applyDiscoveredContextWindows(params: {
+  cache: Map<string, number>;
+  models: ModelEntry[];
+}) {
+  for (const model of params.models) {
+    if (!model?.id) {
+      continue;
+    }
+    const contextWindow =
+      typeof model.contextWindow === "number" ? Math.trunc(model.contextWindow) : undefined;
+    if (!contextWindow || contextWindow <= 0) {
+      continue;
+    }
+    const existing = params.cache.get(model.id);
+    // When multiple providers expose the same model id with different limits,
+    // prefer the smaller window so token budgeting is fail-safe (no overestimation).
+    if (existing === undefined || contextWindow < existing) {
+      params.cache.set(model.id, contextWindow);
+    }
+  }
+}
+
 export function applyConfiguredContextWindows(params: {
  cache: Map<string, number>;
  modelsConfig: ModelsConfig | undefined;
@@ -54,16 +80,15 @@ const loadPromise = (async () => {
    const { discoverAuthStorage, discoverModels } = await import("./pi-model-discovery.js");
    const agentDir = resolveOpenClawAgentDir();
    const authStorage = discoverAuthStorage(agentDir);
-    const modelRegistry = discoverModels(authStorage, agentDir);
-    const models = modelRegistry.getAll() as ModelEntry[];
-    for (const m of models) {
-      if (!m?.id) {
-        continue;
-      }
-      if (typeof m.contextWindow === "number" && m.contextWindow > 0) {
-        MODEL_CACHE.set(m.id, m.contextWindow);
-      }
-    }
+    const modelRegistry = discoverModels(authStorage, agentDir) as unknown as ModelRegistryLike;
+    const models =
+      typeof modelRegistry.getAvailable === "function"
+        ? modelRegistry.getAvailable()
+        : modelRegistry.getAll();
+    applyDiscoveredContextWindows({
+      cache: MODEL_CACHE,
+      models,
+    });
  } catch {
    // If model discovery fails, continue with config overrides only.
  }