diff --git a/CHANGELOG.md b/CHANGELOG.md index aa5389f771c..57a16aee935 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -52,6 +52,7 @@ Docs: https://docs.openclaw.ai - OpenAI/Responses WebSocket tool-call id hygiene: normalize blank/whitespace streamed tool-call ids before persistence, and block empty `function_call_output.call_id` payloads in the WS conversion path to avoid OpenAI 400 errors (`Invalid 'input[n].call_id': empty string`), with regression coverage for both inbound stream normalization and outbound payload guards. - Gateway/Control UI basePath webhook passthrough: let non-read methods under configured `controlUiBasePath` fall through to plugin routes (instead of returning Control UI 405), restoring webhook handlers behind basePath mounts. (#32311) Thanks @ademczuk. - CLI/Config validation and routing hardening: dedupe `openclaw config validate` failures to a single authoritative report, expose allowed-values metadata/hints across core Zod and plugin AJV validation (including `--json` fields), sanitize terminal-rendered validation text, and make command-path parsing root-option-aware across preaction/route/lazy registration (including routed `config get/unset` with split root options). Thanks @gumadeiras. +- Context-window metadata warmup: add exponential config-load retry backoff (1s -> 2s -> 4s, capped at 60s) so transient startup failures recover automatically without hot-loop retries. - Models/config env propagation: apply `config.env.vars` before implicit provider discovery in models bootstrap so config-scoped credentials are visible to implicit provider resolution paths. (#32295) Thanks @hsiaoa. - Hooks/runtime stability: keep the internal hook handler registry on a `globalThis` singleton so hook registration/dispatch remains consistent when bundling emits duplicate module copies. (#32292) Thanks @Drickon. - Hooks/plugin context parity: ensure `llm_input` hooks in embedded attempts receive the same `trigger` and `channelId`-aware `hookCtx` used by the other hook phases, preserving channel/trigger-scoped plugin behavior. (#28623) Thanks @davidrudduck and @vincentkoc. diff --git a/src/agents/context.lookup.test.ts b/src/agents/context.lookup.test.ts index 5870be401a4..81263481c34 100644 --- a/src/agents/context.lookup.test.ts +++ b/src/agents/context.lookup.test.ts @@ -61,4 +61,54 @@ describe("lookupContextTokens", () => { process.argv = argvSnapshot; } }); + + it("retries config loading after backoff when an initial load fails", async () => { + vi.useFakeTimers(); + const loadConfigMock = vi + .fn() + .mockImplementationOnce(() => { + throw new Error("transient"); + }) + .mockImplementation(() => ({ + models: { + providers: { + openrouter: { + models: [{ id: "openrouter/claude-sonnet", contextWindow: 654_321 }], + }, + }, + }, + })); + + vi.doMock("../config/config.js", () => ({ + loadConfig: loadConfigMock, + })); + vi.doMock("./models-config.js", () => ({ + ensureOpenClawModelsJson: vi.fn(async () => {}), + })); + vi.doMock("./agent-paths.js", () => ({ + resolveOpenClawAgentDir: () => "/tmp/openclaw-agent", + })); + vi.doMock("./pi-model-discovery.js", () => ({ + discoverAuthStorage: vi.fn(() => ({})), + discoverModels: vi.fn(() => ({ + getAll: () => [], + })), + })); + + const argvSnapshot = process.argv; + process.argv = ["node", "openclaw", "config", "validate"]; + try { + const { lookupContextTokens } = await import("./context.js"); + expect(lookupContextTokens("openrouter/claude-sonnet")).toBeUndefined(); + expect(loadConfigMock).toHaveBeenCalledTimes(1); + expect(lookupContextTokens("openrouter/claude-sonnet")).toBeUndefined(); + expect(loadConfigMock).toHaveBeenCalledTimes(1); + await vi.advanceTimersByTimeAsync(1_000); + expect(lookupContextTokens("openrouter/claude-sonnet")).toBe(654_321); + expect(loadConfigMock).toHaveBeenCalledTimes(2); + } finally { + process.argv = argvSnapshot; + vi.useRealTimers(); + } + }); }); diff --git a/src/agents/context.ts b/src/agents/context.ts index 50e549877ea..bd3aeaf6fc2 100644 --- a/src/agents/context.ts +++ b/src/agents/context.ts @@ -3,6 +3,7 @@ import { loadConfig } from "../config/config.js"; import type { OpenClawConfig } from "../config/config.js"; +import { computeBackoff, type BackoffPolicy } from "../infra/backoff.js"; import { consumeRootOptionToken, FLAG_TERMINATOR } from "../infra/cli-root-options.js"; import { resolveOpenClawAgentDir } from "./agent-paths.js"; import { ensureOpenClawModelsJson } from "./models-config.js"; @@ -19,6 +20,12 @@ type AgentModelEntry = { params?: Record }; const ANTHROPIC_1M_MODEL_PREFIXES = ["claude-opus-4", "claude-sonnet-4"] as const; export const ANTHROPIC_CONTEXT_1M_TOKENS = 1_048_576; +const CONFIG_LOAD_RETRY_POLICY: BackoffPolicy = { + initialMs: 1_000, + maxMs: 60_000, + factor: 2, + jitter: 0, +}; export function applyDiscoveredContextWindows(params: { cache: Map; @@ -68,7 +75,9 @@ export function applyConfiguredContextWindows(params: { const MODEL_CACHE = new Map(); let loadPromise: Promise | null = null; -let configuredWindowsPrimed = false; +let configuredConfig: OpenClawConfig | undefined; +let configLoadFailures = 0; +let nextConfigLoadAttemptAtMs = 0; function getCommandPathFromArgv(argv: string[]): string[] { const args = argv.slice(2); @@ -100,33 +109,42 @@ function shouldSkipEagerContextWindowWarmup(argv: string[] = process.argv): bool } function primeConfiguredContextWindows(): OpenClawConfig | undefined { - if (configuredWindowsPrimed) { + if (configuredConfig) { + return configuredConfig; + } + if (Date.now() < nextConfigLoadAttemptAtMs) { return undefined; } - configuredWindowsPrimed = true; try { const cfg = loadConfig(); applyConfiguredContextWindows({ cache: MODEL_CACHE, modelsConfig: cfg.models as ModelsConfig | undefined, }); + configuredConfig = cfg; + configLoadFailures = 0; + nextConfigLoadAttemptAtMs = 0; return cfg; } catch { - // If config can't be loaded, leave cache empty. + configLoadFailures += 1; + const backoffMs = computeBackoff(CONFIG_LOAD_RETRY_POLICY, configLoadFailures); + nextConfigLoadAttemptAtMs = Date.now() + backoffMs; + // If config can't be loaded, leave cache empty and retry after backoff. return undefined; } } function ensureContextWindowCacheLoaded(): Promise { - const cfg = primeConfiguredContextWindows(); if (loadPromise) { return loadPromise; } - loadPromise = (async () => { - if (!cfg) { - return; - } + const cfg = primeConfiguredContextWindows(); + if (!cfg) { + return Promise.resolve(); + } + + loadPromise = (async () => { try { await ensureOpenClawModelsJson(cfg); } catch {