fix(openrouter): use endpoint context limits (#86041)

Summary: - The branch updates OpenRouter dynamic model capability parsing to prefer `top_provider.context_length`, bump ... sk cache version, adds regression coverage and a changelog entry, and adds script helper declaration files. - Reproducibility: yes. from source and live catalog evidence rather than an authenticated inference turn. Cur ... catalog currently reports a smaller endpoint-specific `top_provider.context_length` for the reported model. Automerge notes: - PR branch already contained follow-up commit before automerge: fix(openrouter): use endpoint context limits - PR branch already contained follow-up commit before automerge: fix(clawsweeper): address review for automerge-openclaw-openclaw-8594… Validation: - ClawSweeper review passed for head 76fcc362d2. - Required merge gates passed before the squash merge. Prepared head SHA: 76fcc362d2 Review: https://github.com/openclaw/openclaw/pull/86041#issuecomment-4528646655 Co-authored-by: Andy Ye <35905412+TurboTheTurtle@users.noreply.github.com> Co-authored-by: clawsweeper <274271284+clawsweeper[bot]@users.noreply.github.com> Co-authored-by: clawsweeper[bot] <274271284+clawsweeper[bot]@users.noreply.github.com> Approved-by: takhoffman Co-authored-by: takhoffman <781889+takhoffman@users.noreply.github.com>
2026-05-28 03:53:54 +00:00 · 2026-05-24 13:32:44 +00:00
parent 8473e8933a
commit dd01a2e789
6 changed files with 161 additions and 4 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,8 +7,10 @@ Docs: https://docs.openclaw.ai
 ### Changes

 ### Fixes
+
 - Tests: fail the kitchen-sink RPC Docker walk when gateway RSS sampling is unavailable instead of silently disabling the per-process memory guard.
 - Tests: suppress the current Rolldown plugin timing warning format in the Vitest wrapper so tiny focused runs do not drown useful stderr in repeated build-timing noise.
+- Models/OpenRouter: use endpoint-specific OpenRouter context limits from `top_provider` metadata so provider-routed models no longer overstate available context. (#85949) Thanks @TurboTheTurtle.
 - Crabbox: sync clean sparse-checkout remote changed gates from a temporary full checkout with local-only commits overlaid as worktree changes so git-backed script checks can seed the runner repository.
 - Tests: make startup memory and startup bench smoke scripts build CLI startup artifacts when run from a fresh source checkout.
 - iMessage: mark authorized slash-command turns as text-sourced commands so `/status`, `/new`, and `/restart` acknowledgements return to the source conversation. (#82642) thanks @homer-byte.
@@ -16,7 +18,6 @@ Docs: https://docs.openclaw.ai
 - Live tests: fail Gateway live model sweeps when selected coverage is lost to timeouts or stale high-signal filters instead of reporting false missing-profile coverage, and pin Docker OpenAI gateway coverage to the current `gpt-5.5` lane.
 - Tests: fail Docker resource-ceiling checks when stats samples or configured limits are invalid instead of silently reporting zero peaks.

-
 ## 2026.5.24

 ### Changes
--- a/scripts/npm-runner.d.mts
+++ b/scripts/npm-runner.d.mts
@@ -0,0 +1,16 @@
+export type NpmRunnerParams = {
+  comSpec?: string;
+  env?: NodeJS.ProcessEnv;
+  execPath?: string;
+  existsSync?: (path: string) => boolean;
+  npmArgs?: string[];
+  platform?: NodeJS.Platform;
+};
+
+export function resolveNpmRunner(params?: NpmRunnerParams): {
+  args: string[];
+  command: string;
+  env?: NodeJS.ProcessEnv;
+  shell: boolean;
+  windowsVerbatimArguments?: boolean;
+};
--- a/scripts/pnpm-runner.d.mts
+++ b/scripts/pnpm-runner.d.mts
@@ -0,0 +1,30 @@
+import type { ChildProcess, SpawnOptions } from "node:child_process";
+
+export type PnpmRunnerParams = {
+  comSpec?: string;
+  cwd?: string;
+  detached?: boolean;
+  env?: NodeJS.ProcessEnv;
+  nodeArgs?: string[];
+  nodeExecPath?: string;
+  npmExecPath?: string;
+  platform?: NodeJS.Platform;
+  pnpmArgs?: string[];
+  stdio?: SpawnOptions["stdio"];
+};
+
+export function resolvePnpmRunner(params?: PnpmRunnerParams): {
+  args: string[];
+  command: string;
+  env?: NodeJS.ProcessEnv;
+  shell: boolean;
+  windowsVerbatimArguments?: boolean;
+};
+
+export function createPnpmRunnerSpawnSpec(params?: PnpmRunnerParams): {
+  args: string[];
+  command: string;
+  options: SpawnOptions;
+};
+
+export function spawnPnpmRunner(params?: PnpmRunnerParams): ChildProcess;
--- a/scripts/windows-cmd-helpers.d.mts
+++ b/scripts/windows-cmd-helpers.d.mts
@@ -0,0 +1,3 @@
+export function resolvePathEnvKey(env: NodeJS.ProcessEnv): string;
+
+export function buildCmdExeCommandLine(command: string, args: string[]): string;
--- a/src/agents/pi-embedded-runner/openrouter-model-capabilities.test.ts
+++ b/src/agents/pi-embedded-runner/openrouter-model-capabilities.test.ts
@@ -1,4 +1,4 @@
-import { mkdtempSync, rmSync } from "node:fs";
+import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs";
 import { tmpdir } from "node:os";
 import { join } from "node:path";
 import { importFreshModule } from "openclaw/plugin-sdk/test-fixtures";
@@ -92,6 +92,112 @@ describe("openrouter-model-capabilities", () => {
    });
  });

+  it("uses endpoint-specific OpenRouter context length when top_provider reports one", async () => {
+    await withOpenRouterStateDir(async () => {
+      vi.stubGlobal(
+        "fetch",
+        vi.fn(
+          async () =>
+            new Response(
+              JSON.stringify({
+                data: [
+                  {
+                    id: "nvidia/nemotron-3-super-120b-a12b:free",
+                    name: "Nemotron 3 Super 120B Free",
+                    architecture: { modality: "text->text" },
+                    context_length: 1_000_000,
+                    top_provider: {
+                      context_length: 262_144,
+                      max_completion_tokens: 262_144,
+                    },
+                    pricing: { prompt: "0", completion: "0" },
+                  },
+                ],
+              }),
+              {
+                status: 200,
+                headers: { "content-type": "application/json" },
+              },
+            ),
+        ),
+      );
+
+      const module = await importOpenRouterModelCapabilities("top-provider-context-length");
+      await module.loadOpenRouterModelCapabilities("nvidia/nemotron-3-super-120b-a12b:free");
+
+      expect(
+        module.getOpenRouterModelCapabilities("nvidia/nemotron-3-super-120b-a12b:free"),
+      ).toMatchObject({
+        contextWindow: 262_144,
+        maxTokens: 262_144,
+      });
+    });
+  });
+
+  it("does not reuse older disk caches with precomputed OpenRouter context windows", async () => {
+    await withOpenRouterStateDir(async (stateDir) => {
+      const modelId = "nvidia/nemotron-3-super-120b-a12b:free";
+      const cacheDir = join(stateDir, "cache");
+      mkdirSync(cacheDir, { recursive: true });
+      writeFileSync(
+        join(cacheDir, "openrouter-models.json"),
+        JSON.stringify({
+          version: 2,
+          models: {
+            [modelId]: {
+              name: "Nemotron 3 Super 120B Free",
+              input: ["text"],
+              reasoning: false,
+              contextWindow: 1_000_000,
+              maxTokens: 262_144,
+              cost: {
+                input: 0,
+                output: 0,
+                cacheRead: 0,
+                cacheWrite: 0,
+              },
+            },
+          },
+        }),
+      );
+
+      const fetchSpy = vi.fn(
+        async () =>
+          new Response(
+            JSON.stringify({
+              data: [
+                {
+                  id: modelId,
+                  name: "Nemotron 3 Super 120B Free",
+                  architecture: { modality: "text->text" },
+                  context_length: 1_000_000,
+                  top_provider: {
+                    context_length: 262_144,
+                    max_completion_tokens: 262_144,
+                  },
+                  pricing: { prompt: "0", completion: "0" },
+                },
+              ],
+            }),
+            {
+              status: 200,
+              headers: { "content-type": "application/json" },
+            },
+          ),
+      );
+      vi.stubGlobal("fetch", fetchSpy);
+
+      const module = await importOpenRouterModelCapabilities("old-context-window-cache");
+      await module.loadOpenRouterModelCapabilities(modelId);
+
+      expect(fetchSpy).toHaveBeenCalledTimes(1);
+      expect(module.getOpenRouterModelCapabilities(modelId)).toMatchObject({
+        contextWindow: 262_144,
+        maxTokens: 262_144,
+      });
+    });
+  });
+
  it("preserves explicit OpenRouter tool support metadata", async () => {
    await withOpenRouterStateDir(async () => {
      vi.stubGlobal(
--- a/src/agents/pi-embedded-runner/openrouter-model-capabilities.ts
+++ b/src/agents/pi-embedded-runner/openrouter-model-capabilities.ts
@@ -31,7 +31,7 @@ const log = createSubsystemLogger("openrouter-model-capabilities");
 const OPENROUTER_MODELS_URL = "https://openrouter.ai/api/v1/models";
 const FETCH_TIMEOUT_MS = 10_000;
 const DISK_CACHE_FILENAME = "openrouter-models.json";
-const DISK_CACHE_VERSION = 2;
+const DISK_CACHE_VERSION = 3;

 // ---------------------------------------------------------------------------
 // Types
@@ -49,6 +49,7 @@ interface OpenRouterApiModel {
  max_completion_tokens?: number;
  max_output_tokens?: number;
  top_provider?: {
+    context_length?: number;
    max_completion_tokens?: number;
  };
  pricing?: {
@@ -174,7 +175,7 @@ function parseModel(model: OpenRouterApiModel): OpenRouterModelCapabilities {
    input,
    reasoning: supportedParameters?.includes("reasoning") ?? false,
    ...(supportedParameters ? { supportsTools: supportedParameters.includes("tools") } : {}),
-    contextWindow: model.context_length || 128_000,
+    contextWindow: model.top_provider?.context_length ?? model.context_length ?? 128_000,
    maxTokens:
      model.top_provider?.max_completion_tokens ??
      model.max_completion_tokens ??