diff --git a/CHANGELOG.md b/CHANGELOG.md index 91f8037aba9..20ad6f26d04 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -35,6 +35,7 @@ Docs: https://docs.openclaw.ai - OAuth/secrets: ignore root-level Google OAuth `client_secret_*.json` downloads so local client-secret files do not appear as commit candidates. (#74689) Thanks @jeongdulee. - Memory: mirror `sqlite-vec` into packaged bundled-plugin runtime deps for the default memory plugin, so builtin vector search does not lose its SQLite extension after upgrading to 2026.4.27. Fixes #74692. Thanks @mozi1924. - Gateway/startup: bound local discovery advertisement during startup, so a stuck discovery plugin can no longer keep the Gateway from reaching ready. Fixes #73865; refs #74630 and #74633. Thanks @lpendeavors, @moltar-bot, and @Saboor711. +- Gateway/models: serve the last successful model catalog while stale reloads refresh in the background, so Gateway control-plane and OpenAI-compatible requests no longer block behind model-provider rediscovery after model config changes. Refs #74135, #74630, and #74633. Thanks @DerFlash, @moltar-bot, and @Saboor711. - CLI/status: resolve read-only channel setup runtime fallback from the packaged OpenClaw dist root, so `status --all`, `status --deep`, channel, and doctor paths do not crash when an external channel plugin needs setup metadata. Fixes #74693. Thanks @giangthb. - Google Meet: block managed Chrome intro/test speech until browser health proves the participant is in-call, and expose `speechReady` diagnostics so login, admission, permission, and audio-bridge blockers no longer look like successful speech. Refs #72478. Thanks @DougButdorf. - Slack/commands: keep native command argument menus on select controls for encoded choice values up to Slack's option limit and truncate fallback button labels to Slack's button-text limit, so long valid choices no longer render invalid Slack blocks. Thanks @slackapi. diff --git a/src/gateway/server-model-catalog.test.ts b/src/gateway/server-model-catalog.test.ts new file mode 100644 index 00000000000..00867a7773f --- /dev/null +++ b/src/gateway/server-model-catalog.test.ts @@ -0,0 +1,107 @@ +import { beforeEach, describe, expect, it, vi } from "vitest"; +import type { OpenClawConfig } from "../config/types.openclaw.js"; +import type { GatewayModelChoice } from "./server-model-catalog.js"; +import { + __resetModelCatalogCacheForTest, + loadGatewayModelCatalog, + markGatewayModelCatalogStaleForReload, +} from "./server-model-catalog.js"; + +type Deferred = { + promise: Promise; + resolve: (value: T) => void; + reject: (error: unknown) => void; +}; +type LoadModelCatalogForTest = NonNullable< + NonNullable[0]>["loadModelCatalog"] +>; + +function createDeferred(): Deferred { + let resolve!: (value: T) => void; + let reject!: (error: unknown) => void; + const promise = new Promise((resolvePromise, rejectPromise) => { + resolve = resolvePromise; + reject = rejectPromise; + }); + return { promise, resolve, reject }; +} + +function model(id: string): GatewayModelChoice { + return { id, name: id, provider: "openai" } as GatewayModelChoice; +} + +const getConfig = () => ({}) as OpenClawConfig; + +describe("loadGatewayModelCatalog", () => { + beforeEach(async () => { + await __resetModelCatalogCacheForTest(); + }); + + it("caches the first successful catalog until reload marks it stale", async () => { + const catalog = [model("gpt-5.4")]; + const loadModelCatalog = vi.fn(async () => catalog); + + await expect(loadGatewayModelCatalog({ getConfig, loadModelCatalog })).resolves.toBe(catalog); + await expect(loadGatewayModelCatalog({ getConfig, loadModelCatalog })).resolves.toBe(catalog); + + expect(loadModelCatalog).toHaveBeenCalledTimes(1); + }); + + it("returns the last catalog while a stale reload refresh is still pending", async () => { + const staleCatalog = [model("gpt-5.4")]; + const freshCatalog = [model("gpt-5.5")]; + const refresh = createDeferred(); + const loadModelCatalog = vi + .fn() + .mockResolvedValueOnce(staleCatalog) + .mockReturnValueOnce(refresh.promise); + + await expect(loadGatewayModelCatalog({ getConfig, loadModelCatalog })).resolves.toBe( + staleCatalog, + ); + + markGatewayModelCatalogStaleForReload(); + await expect(loadGatewayModelCatalog({ getConfig, loadModelCatalog })).resolves.toBe( + staleCatalog, + ); + await vi.waitFor(() => expect(loadModelCatalog).toHaveBeenCalledTimes(2)); + + refresh.resolve(freshCatalog); + await vi.waitFor(async () => { + await expect(loadGatewayModelCatalog({ getConfig, loadModelCatalog })).resolves.toBe( + freshCatalog, + ); + }); + }); + + it("keeps serving the last catalog when a stale background refresh fails", async () => { + const staleCatalog = [model("gpt-5.4")]; + const freshCatalog = [model("gpt-5.5")]; + const loadModelCatalog = vi + .fn() + .mockResolvedValueOnce(staleCatalog) + .mockRejectedValueOnce(new Error("provider offline")) + .mockResolvedValueOnce(freshCatalog); + + await expect(loadGatewayModelCatalog({ getConfig, loadModelCatalog })).resolves.toBe( + staleCatalog, + ); + + markGatewayModelCatalogStaleForReload(); + await expect(loadGatewayModelCatalog({ getConfig, loadModelCatalog })).resolves.toBe( + staleCatalog, + ); + await vi.waitFor(() => expect(loadModelCatalog).toHaveBeenCalledTimes(2)); + + await expect(loadGatewayModelCatalog({ getConfig, loadModelCatalog })).resolves.toBe( + staleCatalog, + ); + await vi.waitFor(() => expect(loadModelCatalog).toHaveBeenCalledTimes(3)); + + await vi.waitFor(async () => { + await expect(loadGatewayModelCatalog({ getConfig, loadModelCatalog })).resolves.toBe( + freshCatalog, + ); + }); + }); +}); diff --git a/src/gateway/server-model-catalog.ts b/src/gateway/server-model-catalog.ts index 3f4780f14df..fee2b78ae60 100644 --- a/src/gateway/server-model-catalog.ts +++ b/src/gateway/server-model-catalog.ts @@ -2,17 +2,92 @@ import { getRuntimeConfig } from "../config/io.js"; export type GatewayModelChoice = import("../agents/model-catalog.js").ModelCatalogEntry; +type GatewayModelCatalogConfig = ReturnType; +type LoadModelCatalog = (params: { + config: GatewayModelCatalogConfig; +}) => Promise; +type LoadGatewayModelCatalogParams = { + getConfig?: () => GatewayModelCatalogConfig; + loadModelCatalog?: LoadModelCatalog; +}; + +let lastSuccessfulCatalog: GatewayModelChoice[] | null = null; +let inFlightRefresh: Promise | null = null; +let staleGeneration = 0; +let appliedGeneration = 0; + +function resetGatewayModelCatalogState(): void { + lastSuccessfulCatalog = null; + inFlightRefresh = null; + staleGeneration = 0; + appliedGeneration = 0; +} + +function isGatewayModelCatalogStale(): boolean { + return appliedGeneration < staleGeneration; +} + +async function resolveLoadModelCatalog( + params?: LoadGatewayModelCatalogParams, +): Promise { + if (params?.loadModelCatalog) { + return params.loadModelCatalog; + } + const { loadModelCatalog } = await import("../agents/model-catalog.js"); + return loadModelCatalog; +} + +function startGatewayModelCatalogRefresh( + params?: LoadGatewayModelCatalogParams, +): Promise { + const config = (params?.getConfig ?? getRuntimeConfig)(); + const refreshGeneration = staleGeneration; + const refresh = resolveLoadModelCatalog(params) + .then((loadModelCatalog) => loadModelCatalog({ config })) + .then((catalog) => { + if (refreshGeneration === staleGeneration) { + lastSuccessfulCatalog = catalog; + appliedGeneration = staleGeneration; + } + return catalog; + }) + .finally(() => { + if (inFlightRefresh === refresh) { + inFlightRefresh = null; + } + }); + inFlightRefresh = refresh; + return refresh; +} + +export function markGatewayModelCatalogStaleForReload(): void { + staleGeneration += 1; +} + // Test-only escape hatch: model catalog is cached at module scope for the // process lifetime, which is fine for the real gateway daemon, but makes // isolated unit tests harder. Keep this intentionally obscure. export async function __resetModelCatalogCacheForTest(): Promise { + resetGatewayModelCatalogState(); const { resetModelCatalogCacheForTest } = await import("../agents/model-catalog.js"); resetModelCatalogCacheForTest(); } -export async function loadGatewayModelCatalog(params?: { - getConfig?: () => ReturnType; -}): Promise { - const { loadModelCatalog } = await import("../agents/model-catalog.js"); - return await loadModelCatalog({ config: (params?.getConfig ?? getRuntimeConfig)() }); +export async function loadGatewayModelCatalog( + params?: LoadGatewayModelCatalogParams, +): Promise { + const isStale = isGatewayModelCatalogStale(); + if (!isStale && lastSuccessfulCatalog) { + return lastSuccessfulCatalog; + } + if (isStale && lastSuccessfulCatalog) { + if (!inFlightRefresh) { + void startGatewayModelCatalogRefresh(params).catch(() => undefined); + } + return lastSuccessfulCatalog; + } + if (inFlightRefresh) { + return await inFlightRefresh; + } + return await startGatewayModelCatalogRefresh(params); } diff --git a/src/gateway/server-reload-handlers.ts b/src/gateway/server-reload-handlers.ts index 2864fa76b9a..47e067eb626 100644 --- a/src/gateway/server-reload-handlers.ts +++ b/src/gateway/server-reload-handlers.ts @@ -29,6 +29,7 @@ import { startGatewayConfigReloader, type GatewayReloadPlan } from "./config-rel import { resolveHooksConfig } from "./hooks.js"; import { buildGatewayCronService, type GatewayCronState } from "./server-cron.js"; import { applyGatewayLaneConcurrency } from "./server-lanes.js"; +import { markGatewayModelCatalogStaleForReload } from "./server-model-catalog.js"; import { type GatewayChannelManager, startGatewayChannelHealthMonitor, @@ -241,6 +242,7 @@ export function createGatewayReloadHandlers(params: GatewayReloadHandlerParams) ) ) { resetModelCatalogCache(); + markGatewayModelCatalogStaleForReload(); } if (plan.reloadHooks) {