From 73a95d3af4ef8238130e83e747d8e9388675d4bd Mon Sep 17 00:00:00 2001 From: Marvinthebored Date: Sun, 3 May 2026 13:48:55 +0800 Subject: [PATCH] fix(gateway): read-only persisted fast path for models.list catalog (#76406) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(gateway): read-only fast path for models.list catalog loading The gateway model catalog refresh calls loadModelCatalog without readOnly, triggering ensureOpenClawModelsJson (60-70s), full PI SDK registry instantiation, auth storage discovery, and live provider plugin augmentation on every Control UI list/refresh. None of this is needed for a read-only UI listing. Three changes: 1. Gateway catalog refresh now passes readOnly: true to loadModelCatalog. 2. In readOnly mode, skip augmentModelCatalogWithProviderPlugins — live provider discovery is explicit admin/background work, not a UI list operation. 3. Add a persisted models.json fast path: when readOnly is true, first try reading the existing models.json directly and converting providers..models[] to catalog rows. Falls back to the full PI registry path if the file is missing or unreadable. Observed improvement on a production install: loadGatewayModelCatalog: 967 entries / 4651ms → 89 entries / 8ms Live models.list during startup: ~18s → ~2s Co-Authored-By: Claude Opus 4.6 * fix(gateway): preserve full model catalog view * fix(agents): preserve read-only catalog defaults * fix(agents): preserve provider catalog defaults --------- Co-authored-by: Marvinthebored Co-authored-by: Claude Opus 4.6 Co-authored-by: Vincent Koc --- CHANGELOG.md | 1 + src/agents/model-catalog.test.ts | 168 +++++++++++++++++++++ src/agents/model-catalog.ts | 127 +++++++++++++--- src/gateway/server-methods/models.test.ts | 8 +- src/gateway/server-methods/models.ts | 4 +- src/gateway/server-methods/shared-types.ts | 2 +- src/gateway/server-model-catalog.test.ts | 29 ++++ src/gateway/server-model-catalog.ts | 84 +++++++---- 8 files changed, 372 insertions(+), 51 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 00074dfa075..4e75cc15f4e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,7 @@ Docs: https://docs.openclaw.ai - CLI/plugins: reject missing plugin ids before config writes in `plugins enable` and `plugins disable` so a typo no longer persists a stale config entry. (#73554) Thanks @ai-hpc. - Agents/sessions: preserve delivered trailing assistant replies during session-file repair so Telegram/WebChat history is not rewritten to drop already-delivered responses. Fixes #76329. Thanks @obviyus. - Gateway/chat history: preserve oversized transcript turns as explicit omitted-message placeholders while avoiding large JSONL parse stalls. Thanks @Marvinthebored and @vincentkoc. +- Gateway/models: keep read-only model-list responses on registry-compatible fallbacks and metadata defaults, so empty or minimal persisted model files do not hide built-ins or custom model capabilities. Thanks @Marvinthebored. - Gateway: preserve stack diagnostics when `chat.send` or agent attachment parsing/staging fails, improving image-send failure triage. Refs #63432. (#75135) Thanks @keen0206. - Heartbeats/Codex: stop sending the legacy `HEARTBEAT_OK` prompt instruction when heartbeat turns have the structured `heartbeat_respond` tool, while keeping the text sentinel for legacy automatic heartbeat replies. Thanks @pashpashpash. - Agent runtimes: fail explicit plugin runtime selections honestly when the requested harness is unavailable instead of silently falling back to the embedded PI runtime. Thanks @pashpashpash. diff --git a/src/agents/model-catalog.test.ts b/src/agents/model-catalog.test.ts index 64b1e0252e9..af955d6cfaf 100644 --- a/src/agents/model-catalog.test.ts +++ b/src/agents/model-catalog.test.ts @@ -15,6 +15,7 @@ let augmentCatalogMock: ReturnType; let ensureOpenClawModelsJsonMock: ReturnType; let currentPluginMetadataSnapshotMock: ReturnType; let loadPluginMetadataSnapshotMock: ReturnType; +let readFileMock: ReturnType; vi.mock("./model-suppression.runtime.js", () => ({ shouldSuppressBuiltInModel: (params: { provider?: string; id?: string }) => @@ -70,6 +71,11 @@ function mockSingleOpenAiCatalogModel() { describe("loadModelCatalog", () => { beforeAll(async () => { + readFileMock = vi.fn(); + vi.doMock("node:fs/promises", async (importOriginal) => ({ + ...(await importOriginal()), + readFile: readFileMock, + })); ensureOpenClawModelsJsonMock = vi.fn().mockResolvedValue({ agentDir: "/tmp", wrote: false }); vi.doMock("./models-config.js", () => ({ ensureOpenClawModelsJson: ensureOpenClawModelsJsonMock, @@ -104,6 +110,10 @@ describe("loadModelCatalog", () => { beforeEach(() => { resetModelCatalogCacheForTest(); + readFileMock.mockReset(); + readFileMock.mockRejectedValue( + Object.assign(new Error("models.json missing"), { code: "ENOENT" }), + ); ensureOpenClawModelsJsonMock.mockClear(); augmentCatalogMock.mockClear(); currentPluginMetadataSnapshotMock.mockReset(); @@ -117,6 +127,7 @@ describe("loadModelCatalog", () => { }); afterAll(() => { + vi.doUnmock("node:fs/promises"); vi.doUnmock("./models-config.js"); vi.doUnmock("./agent-paths.js"); vi.doUnmock("../plugins/provider-runtime.runtime.js"); @@ -217,6 +228,163 @@ describe("loadModelCatalog", () => { expect(discoverAuthStorage).toHaveBeenCalledWith("/tmp/openclaw", { readOnly: true }); }); + it("filters suppressed built-ins from persisted read-only catalog rows", async () => { + readFileMock.mockResolvedValueOnce( + JSON.stringify({ + providers: { + "openai-codex": { + models: [ + { + id: "gpt-5.3-codex-spark", + name: "GPT-5.3 Codex Spark", + reasoning: true, + contextWindow: 128000, + input: ["text"], + }, + { + id: "gpt-5.4", + name: "GPT-5.4", + reasoning: true, + contextWindow: 272000, + input: ["text", "image"], + }, + ], + }, + openai: { + models: [ + { + id: "gpt-5.3-codex-spark", + name: "GPT-5.3 Codex Spark", + }, + ], + }, + }, + }), + ); + + const result = await loadModelCatalog({ config: {} as OpenClawConfig, readOnly: true }); + + expect(result).toEqual([ + { + provider: "openai-codex", + id: "gpt-5.4", + name: "GPT-5.4", + reasoning: true, + contextWindow: 272000, + input: ["text", "image"], + compat: undefined, + }, + ]); + expect(ensureOpenClawModelsJsonMock).not.toHaveBeenCalled(); + expect(augmentCatalogMock).not.toHaveBeenCalled(); + }); + + it("falls back to the registry when persisted read-only catalog has no model rows", async () => { + readFileMock.mockResolvedValueOnce( + JSON.stringify({ + providers: { + openai: { + modelOverrides: { + "gpt-4.1": { + contextWindow: 128000, + }, + }, + }, + }, + }), + ); + const discoverAuthStorage = vi.fn(() => ({ + getOAuthProviders: () => [], + })); + __setModelCatalogImportForTest( + async () => + ({ + discoverAuthStorage, + AuthStorage: function AuthStorage() {}, + ModelRegistry: class { + getAll() { + return [{ id: "gpt-4.1", name: "GPT-4.1", provider: "openai" }]; + } + }, + }) as unknown as PiSdkModule, + ); + + const result = await loadModelCatalog({ config: {} as OpenClawConfig, readOnly: true }); + + expect(result).toEqual([{ id: "gpt-4.1", name: "GPT-4.1", provider: "openai" }]); + expect(ensureOpenClawModelsJsonMock).not.toHaveBeenCalled(); + expect(discoverAuthStorage).toHaveBeenCalledWith("/tmp/openclaw", { readOnly: true }); + }); + + it("preserves registry defaults for minimal persisted read-only catalog rows", async () => { + readFileMock.mockResolvedValueOnce( + JSON.stringify({ + providers: { + custom: { + models: [{ id: "local-tiny" }], + }, + }, + }), + ); + + const result = await loadModelCatalog({ config: {} as OpenClawConfig, readOnly: true }); + + expect(result).toEqual([ + { + provider: "custom", + id: "local-tiny", + name: "local-tiny", + reasoning: false, + contextWindow: 128000, + input: ["text"], + compat: undefined, + }, + ]); + expect(ensureOpenClawModelsJsonMock).not.toHaveBeenCalled(); + expect(augmentCatalogMock).not.toHaveBeenCalled(); + }); + + it("preserves provider context defaults for persisted read-only catalog rows", async () => { + readFileMock.mockResolvedValueOnce( + JSON.stringify({ + providers: { + custom: { + contextWindow: 262144, + models: [ + { id: "inherits-provider-context" }, + { id: "overrides-context", contextWindow: 65536 }, + ], + }, + }, + }), + ); + + const result = await loadModelCatalog({ config: {} as OpenClawConfig, readOnly: true }); + + expect(result).toEqual([ + { + provider: "custom", + id: "inherits-provider-context", + name: "inherits-provider-context", + reasoning: false, + contextWindow: 262144, + input: ["text"], + compat: undefined, + }, + { + provider: "custom", + id: "overrides-context", + name: "overrides-context", + reasoning: false, + contextWindow: 65536, + input: ["text"], + compat: undefined, + }, + ]); + expect(ensureOpenClawModelsJsonMock).not.toHaveBeenCalled(); + expect(augmentCatalogMock).not.toHaveBeenCalled(); + }); + it("does not synthesize stale openai-codex/gpt-5.3-codex-spark entries from gpt-5.4", async () => { mockPiDiscoveryModels([ { diff --git a/src/agents/model-catalog.ts b/src/agents/model-catalog.ts index 827f34a37f8..449a5212a0a 100644 --- a/src/agents/model-catalog.ts +++ b/src/agents/model-catalog.ts @@ -1,3 +1,4 @@ +import { readFile } from "node:fs/promises"; import { join } from "node:path"; import { getRuntimeConfig } from "../config/config.js"; import type { OpenClawConfig } from "../config/types.openclaw.js"; @@ -20,6 +21,7 @@ import { ensureOpenClawModelsJson } from "./models-config.js"; import { normalizeProviderId } from "./provider-id.js"; const log = createSubsystemLogger("model-catalog"); +const PI_CUSTOM_MODEL_DEFAULT_CONTEXT_WINDOW = 128_000; export type { ModelCatalogEntry, ModelInputType } from "./model-catalog.types.js"; export { @@ -161,12 +163,106 @@ export function loadManifestModelCatalog(params: { }); } +function sortModelCatalogEntries(entries: ModelCatalogEntry[]): ModelCatalogEntry[] { + return entries.toSorted((a, b) => { + const p = a.provider.localeCompare(b.provider); + if (p !== 0) { + return p; + } + return a.name.localeCompare(b.name); + }); +} + +function normalizePersistedModelCatalogEntry( + providerRaw: string, + entry: Record, + defaults?: { + contextWindow?: number; + }, +): ModelCatalogEntry | undefined { + const id = normalizeOptionalString(entry.id) ?? ""; + if (!id) { + return undefined; + } + const provider = normalizeProviderId(providerRaw); + if (!provider) { + return undefined; + } + const name = normalizeOptionalString(entry.name ?? id) || id; + const contextWindow = + typeof entry?.contextWindow === "number" && entry.contextWindow > 0 + ? entry.contextWindow + : defaults?.contextWindow !== undefined + ? defaults.contextWindow + : PI_CUSTOM_MODEL_DEFAULT_CONTEXT_WINDOW; + const reasoning = typeof entry?.reasoning === "boolean" ? entry.reasoning : false; + const parsedInput = Array.isArray(entry?.input) + ? entry.input.filter((value): value is ModelInputType => + ["text", "image", "audio", "video", "document"].includes(String(value)), + ) + : undefined; + const input: ModelInputType[] = parsedInput?.length ? parsedInput : ["text"]; + const compat = + entry?.compat && typeof entry.compat === "object" + ? (entry.compat as ModelCatalogEntry["compat"]) + : undefined; + return { id, name, provider, contextWindow, reasoning, input, compat }; +} + +async function loadReadOnlyPersistedModelCatalog(params?: { + config?: OpenClawConfig; +}): Promise { + const cfg = params?.config ?? getRuntimeConfig(); + const agentDir = resolveOpenClawAgentDir(); + const raw = await readFile(join(agentDir, "models.json"), "utf8"); + const parsed = JSON.parse(raw) as Record; + const models: ModelCatalogEntry[] = []; + const { buildShouldSuppressBuiltInModel } = await loadModelSuppression(); + const shouldSuppressBuiltInModel = buildShouldSuppressBuiltInModel({ config: cfg }); + const providers = + parsed?.providers && typeof parsed.providers === "object" + ? (parsed.providers as Record>) + : {}; + for (const [providerRaw, providerConfig] of Object.entries(providers)) { + if (!Array.isArray(providerConfig?.models)) { + continue; + } + const providerContextWindow = + typeof providerConfig?.contextWindow === "number" && providerConfig.contextWindow > 0 + ? providerConfig.contextWindow + : undefined; + for (const entry of providerConfig.models as Record[]) { + const normalized = normalizePersistedModelCatalogEntry(providerRaw, entry, { + contextWindow: providerContextWindow, + }); + if (normalized && !shouldSuppressBuiltInModel(normalized)) { + models.push(normalized); + } + } + } + if (models.length === 0) { + throw new Error("persisted model catalog has no usable model rows"); + } + const configuredModels = buildConfiguredModelCatalog({ cfg }); + if (configuredModels.length > 0) { + appendCatalogEntriesIfAbsent(models, configuredModels); + } + return sortModelCatalogEntries(models); +} + export async function loadModelCatalog(params?: { config?: OpenClawConfig; useCache?: boolean; readOnly?: boolean; }): Promise { const readOnly = params?.readOnly === true; + if (readOnly) { + try { + return await loadReadOnlyPersistedModelCatalog(params); + } catch { + // fall through to full catalog path + } + } if (!readOnly && params?.useCache === false) { modelCatalogPromise = null; } @@ -185,14 +281,7 @@ export async function loadModelCatalog(params?: { const suffix = extra ? ` ${extra}` : ""; log.info(`model-catalog stage=${stage} elapsedMs=${Date.now() - startMs}${suffix}`); }; - const sortModels = (entries: ModelCatalogEntry[]) => - entries.sort((a, b) => { - const p = a.provider.localeCompare(b.provider); - if (p !== 0) { - return p; - } - return a.name.localeCompare(b.name); - }); + const sortModels = sortModelCatalogEntries; try { const cfg = params?.config ?? getRuntimeConfig(); if (!readOnly) { @@ -247,18 +336,20 @@ export async function loadModelCatalog(params?: { const compat = entry?.compat && typeof entry.compat === "object" ? entry.compat : undefined; models.push({ id, name, provider, contextWindow, reasoning, input, compat }); } - const supplemental = await augmentModelCatalogWithProviderPlugins({ - config: cfg, - env: process.env, - context: { + if (!readOnly) { + const supplemental = await augmentModelCatalogWithProviderPlugins({ config: cfg, - agentDir, env: process.env, - entries: [...models], - }, - }); - if (supplemental.length > 0) { - appendCatalogEntriesIfAbsent(models, supplemental); + context: { + config: cfg, + agentDir, + env: process.env, + entries: [...models], + }, + }); + if (supplemental.length > 0) { + appendCatalogEntriesIfAbsent(models, supplemental); + } } logStage("plugin-models-merged", `entries=${models.length}`); diff --git a/src/gateway/server-methods/models.test.ts b/src/gateway/server-methods/models.test.ts index 4999d396b2b..b47b027c5a2 100644 --- a/src/gateway/server-methods/models.test.ts +++ b/src/gateway/server-methods/models.test.ts @@ -23,6 +23,7 @@ describe("models.list", () => { it("does not block the configured view on slow model catalog discovery", async () => { const catalog = createDeferred(); const respond = vi.fn(); + const loadGatewayModelCatalog = vi.fn(() => catalog.promise); vi.useFakeTimers(); try { @@ -51,7 +52,7 @@ describe("models.list", () => { }; return config as unknown as OpenClawConfig; }, - loadGatewayModelCatalog: vi.fn(() => catalog.promise), + loadGatewayModelCatalog, logGateway: { debug: vi.fn(), }, @@ -74,6 +75,7 @@ describe("models.list", () => { }, undefined, ); + expect(loadGatewayModelCatalog).toHaveBeenCalledWith({ readOnly: true }); } finally { vi.useRealTimers(); } @@ -82,6 +84,7 @@ describe("models.list", () => { it("keeps the all view exact instead of timing out to a partial catalog", async () => { const catalog = createDeferred<[{ id: string; name: string; provider: string }]>(); const respond = vi.fn(); + const loadGatewayModelCatalog = vi.fn(() => catalog.promise); vi.useFakeTimers(); try { @@ -98,7 +101,7 @@ describe("models.list", () => { isWebchatConnect: () => false, context: { getRuntimeConfig: () => ({}) as OpenClawConfig, - loadGatewayModelCatalog: vi.fn(() => catalog.promise), + loadGatewayModelCatalog, logGateway: { debug: vi.fn(), }, @@ -116,6 +119,7 @@ describe("models.list", () => { { models: [{ id: "gpt-test", name: "GPT Test", provider: "openai" }] }, undefined, ); + expect(loadGatewayModelCatalog).toHaveBeenCalledWith({ readOnly: false }); } finally { vi.useRealTimers(); } diff --git a/src/gateway/server-methods/models.ts b/src/gateway/server-methods/models.ts index c2fc194c246..8cf6236d4e5 100644 --- a/src/gateway/server-methods/models.ts +++ b/src/gateway/server-methods/models.ts @@ -26,11 +26,11 @@ async function loadModelsListCatalog( view: ModelsListView, ): Promise { if (view === "all") { - return await context.loadGatewayModelCatalog(); + return await context.loadGatewayModelCatalog({ readOnly: false }); } let timeout: NodeJS.Timeout | undefined; const timedOut = Symbol("models-list-catalog-timeout"); - const catalogPromise = context.loadGatewayModelCatalog(); + const catalogPromise = context.loadGatewayModelCatalog({ readOnly: true }); const timeoutPromise = new Promise((resolve) => { timeout = setTimeout(() => resolve(timedOut), MODELS_LIST_CATALOG_TIMEOUT_MS); timeout.unref?.(); diff --git a/src/gateway/server-methods/shared-types.ts b/src/gateway/server-methods/shared-types.ts index 1961d200f85..1bf326f3ca7 100644 --- a/src/gateway/server-methods/shared-types.ts +++ b/src/gateway/server-methods/shared-types.ts @@ -45,7 +45,7 @@ export type GatewayRequestContext = { getRuntimeConfig: () => OpenClawConfig; execApprovalManager?: ExecApprovalManager; pluginApprovalManager?: ExecApprovalManager; - loadGatewayModelCatalog: () => Promise; + loadGatewayModelCatalog: (params?: { readOnly?: boolean }) => Promise; getHealthCache: () => HealthSummary | null; refreshHealthSnapshot: (opts?: { probe?: boolean; diff --git a/src/gateway/server-model-catalog.test.ts b/src/gateway/server-model-catalog.test.ts index d8f85fa8f85..6a4965b234e 100644 --- a/src/gateway/server-model-catalog.test.ts +++ b/src/gateway/server-model-catalog.test.ts @@ -45,6 +45,35 @@ describe("loadGatewayModelCatalog", () => { await expect(loadGatewayModelCatalog({ getConfig, loadModelCatalog })).resolves.toBe(catalog); expect(loadModelCatalog).toHaveBeenCalledTimes(1); + expect(loadModelCatalog).toHaveBeenCalledWith({ config: getConfig(), readOnly: true }); + }); + + it("keeps read-only and full catalog caches separate", async () => { + const readOnlyCatalog = [model("configured-only")]; + const fullCatalog = [model("configured-only"), model("browse-only")]; + const loadModelCatalog = vi.fn(async (params) => + params.readOnly === false ? fullCatalog : readOnlyCatalog, + ); + + await expect(loadGatewayModelCatalog({ getConfig, loadModelCatalog })).resolves.toBe( + readOnlyCatalog, + ); + await expect( + loadGatewayModelCatalog({ getConfig, loadModelCatalog, readOnly: false }), + ).resolves.toBe(fullCatalog); + await expect(loadGatewayModelCatalog({ getConfig, loadModelCatalog })).resolves.toBe( + readOnlyCatalog, + ); + + expect(loadModelCatalog).toHaveBeenCalledTimes(2); + expect(loadModelCatalog).toHaveBeenNthCalledWith(1, { + config: getConfig(), + readOnly: true, + }); + expect(loadModelCatalog).toHaveBeenNthCalledWith(2, { + config: getConfig(), + readOnly: false, + }); }); it("does not cache an empty catalog so the next request retries", async () => { diff --git a/src/gateway/server-model-catalog.ts b/src/gateway/server-model-catalog.ts index 586a0bd5c34..372abd95776 100644 --- a/src/gateway/server-model-catalog.ts +++ b/src/gateway/server-model-catalog.ts @@ -5,26 +5,50 @@ export type GatewayModelChoice = import("../agents/model-catalog.js").ModelCatal type GatewayModelCatalogConfig = ReturnType; type LoadModelCatalog = (params: { config: GatewayModelCatalogConfig; + readOnly?: boolean; }) => Promise; type LoadGatewayModelCatalogParams = { getConfig?: () => GatewayModelCatalogConfig; loadModelCatalog?: LoadModelCatalog; + readOnly?: boolean; }; -let lastSuccessfulCatalog: GatewayModelChoice[] | null = null; -let inFlightRefresh: Promise | null = null; -let staleGeneration = 0; -let appliedGeneration = 0; +type GatewayModelCatalogCache = { + lastSuccessfulCatalog: GatewayModelChoice[] | null; + inFlightRefresh: Promise | null; + staleGeneration: number; + appliedGeneration: number; +}; -function resetGatewayModelCatalogState(): void { - lastSuccessfulCatalog = null; - inFlightRefresh = null; - staleGeneration = 0; - appliedGeneration = 0; +function createGatewayModelCatalogCache(): GatewayModelCatalogCache { + return { + lastSuccessfulCatalog: null, + inFlightRefresh: null, + staleGeneration: 0, + appliedGeneration: 0, + }; } -function isGatewayModelCatalogStale(): boolean { - return appliedGeneration < staleGeneration; +const readOnlyModelCatalogCache = createGatewayModelCatalogCache(); +const fullModelCatalogCache = createGatewayModelCatalogCache(); + +function resolveGatewayModelCatalogCache( + params?: LoadGatewayModelCatalogParams, +): GatewayModelCatalogCache { + return params?.readOnly === false ? fullModelCatalogCache : readOnlyModelCatalogCache; +} + +function resetGatewayModelCatalogState(): void { + for (const cache of [readOnlyModelCatalogCache, fullModelCatalogCache]) { + cache.lastSuccessfulCatalog = null; + cache.inFlightRefresh = null; + cache.staleGeneration = 0; + cache.appliedGeneration = 0; + } +} + +function isGatewayModelCatalogStale(cache: GatewayModelCatalogCache): boolean { + return cache.appliedGeneration < cache.staleGeneration; } async function resolveLoadModelCatalog( @@ -40,28 +64,31 @@ async function resolveLoadModelCatalog( function startGatewayModelCatalogRefresh( params?: LoadGatewayModelCatalogParams, ): Promise { + const cache = resolveGatewayModelCatalogCache(params); const config = (params?.getConfig ?? getRuntimeConfig)(); - const refreshGeneration = staleGeneration; + const readOnly = params?.readOnly !== false; + const refreshGeneration = cache.staleGeneration; const refresh = resolveLoadModelCatalog(params) - .then((loadModelCatalog) => loadModelCatalog({ config })) + .then((loadModelCatalog) => loadModelCatalog({ config, readOnly })) .then((catalog) => { - if (catalog.length > 0 && refreshGeneration === staleGeneration) { - lastSuccessfulCatalog = catalog; - appliedGeneration = staleGeneration; + if (catalog.length > 0 && refreshGeneration === cache.staleGeneration) { + cache.lastSuccessfulCatalog = catalog; + cache.appliedGeneration = cache.staleGeneration; } return catalog; }) .finally(() => { - if (inFlightRefresh === refresh) { - inFlightRefresh = null; + if (cache.inFlightRefresh === refresh) { + cache.inFlightRefresh = null; } }); - inFlightRefresh = refresh; + cache.inFlightRefresh = refresh; return refresh; } export function markGatewayModelCatalogStaleForReload(): void { - staleGeneration += 1; + readOnlyModelCatalogCache.staleGeneration += 1; + fullModelCatalogCache.staleGeneration += 1; } // Test-only escape hatch: model catalog is cached at module scope for the @@ -76,18 +103,19 @@ export async function __resetModelCatalogCacheForTest(): Promise { export async function loadGatewayModelCatalog( params?: LoadGatewayModelCatalogParams, ): Promise { - const isStale = isGatewayModelCatalogStale(); - if (!isStale && lastSuccessfulCatalog) { - return lastSuccessfulCatalog; + const cache = resolveGatewayModelCatalogCache(params); + const isStale = isGatewayModelCatalogStale(cache); + if (!isStale && cache.lastSuccessfulCatalog) { + return cache.lastSuccessfulCatalog; } - if (isStale && lastSuccessfulCatalog) { - if (!inFlightRefresh) { + if (isStale && cache.lastSuccessfulCatalog) { + if (!cache.inFlightRefresh) { void startGatewayModelCatalogRefresh(params).catch(() => undefined); } - return lastSuccessfulCatalog; + return cache.lastSuccessfulCatalog; } - if (inFlightRefresh) { - return await inFlightRefresh; + if (cache.inFlightRefresh) { + return await cache.inFlightRefresh; } return await startGatewayModelCatalogRefresh(params); }