fix(openrouter): use endpoint context limits (#86041)

Summary:
- The branch updates OpenRouter dynamic model capability parsing to prefer `top_provider.context_length`, bump ... sk cache version, adds regression coverage and a changelog entry, and adds script helper declaration files.
- Reproducibility: yes. from source and live catalog evidence rather than an authenticated inference turn. Cur ... catalog currently reports a smaller endpoint-specific `top_provider.context_length` for the reported model.

Automerge notes:
- PR branch already contained follow-up commit before automerge: fix(openrouter): use endpoint context limits
- PR branch already contained follow-up commit before automerge: fix(clawsweeper): address review for automerge-openclaw-openclaw-8594…

Validation:
- ClawSweeper review passed for head 76fcc362d2.
- Required merge gates passed before the squash merge.

Prepared head SHA: 76fcc362d2
Review: https://github.com/openclaw/openclaw/pull/86041#issuecomment-4528646655

Co-authored-by: Andy Ye <35905412+TurboTheTurtle@users.noreply.github.com>
Co-authored-by: clawsweeper <274271284+clawsweeper[bot]@users.noreply.github.com>
Co-authored-by: clawsweeper[bot] <274271284+clawsweeper[bot]@users.noreply.github.com>
Approved-by: takhoffman
Co-authored-by: takhoffman <781889+takhoffman@users.noreply.github.com>
This commit is contained in:
clawsweeper[bot]
2026-05-24 13:32:44 +00:00
committed by GitHub
parent 8473e8933a
commit dd01a2e789
6 changed files with 161 additions and 4 deletions

View File

@@ -7,8 +7,10 @@ Docs: https://docs.openclaw.ai
### Changes
### Fixes
- Tests: fail the kitchen-sink RPC Docker walk when gateway RSS sampling is unavailable instead of silently disabling the per-process memory guard.
- Tests: suppress the current Rolldown plugin timing warning format in the Vitest wrapper so tiny focused runs do not drown useful stderr in repeated build-timing noise.
- Models/OpenRouter: use endpoint-specific OpenRouter context limits from `top_provider` metadata so provider-routed models no longer overstate available context. (#85949) Thanks @TurboTheTurtle.
- Crabbox: sync clean sparse-checkout remote changed gates from a temporary full checkout with local-only commits overlaid as worktree changes so git-backed script checks can seed the runner repository.
- Tests: make startup memory and startup bench smoke scripts build CLI startup artifacts when run from a fresh source checkout.
- iMessage: mark authorized slash-command turns as text-sourced commands so `/status`, `/new`, and `/restart` acknowledgements return to the source conversation. (#82642) thanks @homer-byte.
@@ -16,7 +18,6 @@ Docs: https://docs.openclaw.ai
- Live tests: fail Gateway live model sweeps when selected coverage is lost to timeouts or stale high-signal filters instead of reporting false missing-profile coverage, and pin Docker OpenAI gateway coverage to the current `gpt-5.5` lane.
- Tests: fail Docker resource-ceiling checks when stats samples or configured limits are invalid instead of silently reporting zero peaks.
## 2026.5.24
### Changes

16
scripts/npm-runner.d.mts Normal file
View File

@@ -0,0 +1,16 @@
export type NpmRunnerParams = {
comSpec?: string;
env?: NodeJS.ProcessEnv;
execPath?: string;
existsSync?: (path: string) => boolean;
npmArgs?: string[];
platform?: NodeJS.Platform;
};
export function resolveNpmRunner(params?: NpmRunnerParams): {
args: string[];
command: string;
env?: NodeJS.ProcessEnv;
shell: boolean;
windowsVerbatimArguments?: boolean;
};

30
scripts/pnpm-runner.d.mts Normal file
View File

@@ -0,0 +1,30 @@
import type { ChildProcess, SpawnOptions } from "node:child_process";
export type PnpmRunnerParams = {
comSpec?: string;
cwd?: string;
detached?: boolean;
env?: NodeJS.ProcessEnv;
nodeArgs?: string[];
nodeExecPath?: string;
npmExecPath?: string;
platform?: NodeJS.Platform;
pnpmArgs?: string[];
stdio?: SpawnOptions["stdio"];
};
export function resolvePnpmRunner(params?: PnpmRunnerParams): {
args: string[];
command: string;
env?: NodeJS.ProcessEnv;
shell: boolean;
windowsVerbatimArguments?: boolean;
};
export function createPnpmRunnerSpawnSpec(params?: PnpmRunnerParams): {
args: string[];
command: string;
options: SpawnOptions;
};
export function spawnPnpmRunner(params?: PnpmRunnerParams): ChildProcess;

View File

@@ -0,0 +1,3 @@
export function resolvePathEnvKey(env: NodeJS.ProcessEnv): string;
export function buildCmdExeCommandLine(command: string, args: string[]): string;

View File

@@ -1,4 +1,4 @@
import { mkdtempSync, rmSync } from "node:fs";
import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { importFreshModule } from "openclaw/plugin-sdk/test-fixtures";
@@ -92,6 +92,112 @@ describe("openrouter-model-capabilities", () => {
});
});
it("uses endpoint-specific OpenRouter context length when top_provider reports one", async () => {
await withOpenRouterStateDir(async () => {
vi.stubGlobal(
"fetch",
vi.fn(
async () =>
new Response(
JSON.stringify({
data: [
{
id: "nvidia/nemotron-3-super-120b-a12b:free",
name: "Nemotron 3 Super 120B Free",
architecture: { modality: "text->text" },
context_length: 1_000_000,
top_provider: {
context_length: 262_144,
max_completion_tokens: 262_144,
},
pricing: { prompt: "0", completion: "0" },
},
],
}),
{
status: 200,
headers: { "content-type": "application/json" },
},
),
),
);
const module = await importOpenRouterModelCapabilities("top-provider-context-length");
await module.loadOpenRouterModelCapabilities("nvidia/nemotron-3-super-120b-a12b:free");
expect(
module.getOpenRouterModelCapabilities("nvidia/nemotron-3-super-120b-a12b:free"),
).toMatchObject({
contextWindow: 262_144,
maxTokens: 262_144,
});
});
});
it("does not reuse older disk caches with precomputed OpenRouter context windows", async () => {
await withOpenRouterStateDir(async (stateDir) => {
const modelId = "nvidia/nemotron-3-super-120b-a12b:free";
const cacheDir = join(stateDir, "cache");
mkdirSync(cacheDir, { recursive: true });
writeFileSync(
join(cacheDir, "openrouter-models.json"),
JSON.stringify({
version: 2,
models: {
[modelId]: {
name: "Nemotron 3 Super 120B Free",
input: ["text"],
reasoning: false,
contextWindow: 1_000_000,
maxTokens: 262_144,
cost: {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
},
},
},
}),
);
const fetchSpy = vi.fn(
async () =>
new Response(
JSON.stringify({
data: [
{
id: modelId,
name: "Nemotron 3 Super 120B Free",
architecture: { modality: "text->text" },
context_length: 1_000_000,
top_provider: {
context_length: 262_144,
max_completion_tokens: 262_144,
},
pricing: { prompt: "0", completion: "0" },
},
],
}),
{
status: 200,
headers: { "content-type": "application/json" },
},
),
);
vi.stubGlobal("fetch", fetchSpy);
const module = await importOpenRouterModelCapabilities("old-context-window-cache");
await module.loadOpenRouterModelCapabilities(modelId);
expect(fetchSpy).toHaveBeenCalledTimes(1);
expect(module.getOpenRouterModelCapabilities(modelId)).toMatchObject({
contextWindow: 262_144,
maxTokens: 262_144,
});
});
});
it("preserves explicit OpenRouter tool support metadata", async () => {
await withOpenRouterStateDir(async () => {
vi.stubGlobal(

View File

@@ -31,7 +31,7 @@ const log = createSubsystemLogger("openrouter-model-capabilities");
const OPENROUTER_MODELS_URL = "https://openrouter.ai/api/v1/models";
const FETCH_TIMEOUT_MS = 10_000;
const DISK_CACHE_FILENAME = "openrouter-models.json";
const DISK_CACHE_VERSION = 2;
const DISK_CACHE_VERSION = 3;
// ---------------------------------------------------------------------------
// Types
@@ -49,6 +49,7 @@ interface OpenRouterApiModel {
max_completion_tokens?: number;
max_output_tokens?: number;
top_provider?: {
context_length?: number;
max_completion_tokens?: number;
};
pricing?: {
@@ -174,7 +175,7 @@ function parseModel(model: OpenRouterApiModel): OpenRouterModelCapabilities {
input,
reasoning: supportedParameters?.includes("reasoning") ?? false,
...(supportedParameters ? { supportsTools: supportedParameters.includes("tools") } : {}),
contextWindow: model.context_length || 128_000,
contextWindow: model.top_provider?.context_length ?? model.context_length ?? 128_000,
maxTokens:
model.top_provider?.max_completion_tokens ??
model.max_completion_tokens ??