mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 09:10:45 +00:00
fix(gateway): read-only persisted fast path for models.list catalog (#76406)
* fix(gateway): read-only fast path for models.list catalog loading The gateway model catalog refresh calls loadModelCatalog without readOnly, triggering ensureOpenClawModelsJson (60-70s), full PI SDK registry instantiation, auth storage discovery, and live provider plugin augmentation on every Control UI list/refresh. None of this is needed for a read-only UI listing. Three changes: 1. Gateway catalog refresh now passes readOnly: true to loadModelCatalog. 2. In readOnly mode, skip augmentModelCatalogWithProviderPlugins — live provider discovery is explicit admin/background work, not a UI list operation. 3. Add a persisted models.json fast path: when readOnly is true, first try reading the existing models.json directly and converting providers.<provider>.models[] to catalog rows. Falls back to the full PI registry path if the file is missing or unreadable. Observed improvement on a production install: loadGatewayModelCatalog: 967 entries / 4651ms → 89 entries / 8ms Live models.list during startup: ~18s → ~2s Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix(gateway): preserve full model catalog view * fix(agents): preserve read-only catalog defaults * fix(agents): preserve provider catalog defaults --------- Co-authored-by: Marvinthebored <peter@lindsey.jp> Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com> Co-authored-by: Vincent Koc <vincentkoc@ieee.org>
This commit is contained in:
@@ -29,6 +29,7 @@ Docs: https://docs.openclaw.ai
|
||||
- CLI/plugins: reject missing plugin ids before config writes in `plugins enable` and `plugins disable` so a typo no longer persists a stale config entry. (#73554) Thanks @ai-hpc.
|
||||
- Agents/sessions: preserve delivered trailing assistant replies during session-file repair so Telegram/WebChat history is not rewritten to drop already-delivered responses. Fixes #76329. Thanks @obviyus.
|
||||
- Gateway/chat history: preserve oversized transcript turns as explicit omitted-message placeholders while avoiding large JSONL parse stalls. Thanks @Marvinthebored and @vincentkoc.
|
||||
- Gateway/models: keep read-only model-list responses on registry-compatible fallbacks and metadata defaults, so empty or minimal persisted model files do not hide built-ins or custom model capabilities. Thanks @Marvinthebored.
|
||||
- Gateway: preserve stack diagnostics when `chat.send` or agent attachment parsing/staging fails, improving image-send failure triage. Refs #63432. (#75135) Thanks @keen0206.
|
||||
- Heartbeats/Codex: stop sending the legacy `HEARTBEAT_OK` prompt instruction when heartbeat turns have the structured `heartbeat_respond` tool, while keeping the text sentinel for legacy automatic heartbeat replies. Thanks @pashpashpash.
|
||||
- Agent runtimes: fail explicit plugin runtime selections honestly when the requested harness is unavailable instead of silently falling back to the embedded PI runtime. Thanks @pashpashpash.
|
||||
|
||||
@@ -15,6 +15,7 @@ let augmentCatalogMock: ReturnType<typeof vi.fn>;
|
||||
let ensureOpenClawModelsJsonMock: ReturnType<typeof vi.fn>;
|
||||
let currentPluginMetadataSnapshotMock: ReturnType<typeof vi.fn>;
|
||||
let loadPluginMetadataSnapshotMock: ReturnType<typeof vi.fn>;
|
||||
let readFileMock: ReturnType<typeof vi.fn>;
|
||||
|
||||
vi.mock("./model-suppression.runtime.js", () => ({
|
||||
shouldSuppressBuiltInModel: (params: { provider?: string; id?: string }) =>
|
||||
@@ -70,6 +71,11 @@ function mockSingleOpenAiCatalogModel() {
|
||||
|
||||
describe("loadModelCatalog", () => {
|
||||
beforeAll(async () => {
|
||||
readFileMock = vi.fn();
|
||||
vi.doMock("node:fs/promises", async (importOriginal) => ({
|
||||
...(await importOriginal<typeof import("node:fs/promises")>()),
|
||||
readFile: readFileMock,
|
||||
}));
|
||||
ensureOpenClawModelsJsonMock = vi.fn().mockResolvedValue({ agentDir: "/tmp", wrote: false });
|
||||
vi.doMock("./models-config.js", () => ({
|
||||
ensureOpenClawModelsJson: ensureOpenClawModelsJsonMock,
|
||||
@@ -104,6 +110,10 @@ describe("loadModelCatalog", () => {
|
||||
|
||||
beforeEach(() => {
|
||||
resetModelCatalogCacheForTest();
|
||||
readFileMock.mockReset();
|
||||
readFileMock.mockRejectedValue(
|
||||
Object.assign(new Error("models.json missing"), { code: "ENOENT" }),
|
||||
);
|
||||
ensureOpenClawModelsJsonMock.mockClear();
|
||||
augmentCatalogMock.mockClear();
|
||||
currentPluginMetadataSnapshotMock.mockReset();
|
||||
@@ -117,6 +127,7 @@ describe("loadModelCatalog", () => {
|
||||
});
|
||||
|
||||
afterAll(() => {
|
||||
vi.doUnmock("node:fs/promises");
|
||||
vi.doUnmock("./models-config.js");
|
||||
vi.doUnmock("./agent-paths.js");
|
||||
vi.doUnmock("../plugins/provider-runtime.runtime.js");
|
||||
@@ -217,6 +228,163 @@ describe("loadModelCatalog", () => {
|
||||
expect(discoverAuthStorage).toHaveBeenCalledWith("/tmp/openclaw", { readOnly: true });
|
||||
});
|
||||
|
||||
it("filters suppressed built-ins from persisted read-only catalog rows", async () => {
|
||||
readFileMock.mockResolvedValueOnce(
|
||||
JSON.stringify({
|
||||
providers: {
|
||||
"openai-codex": {
|
||||
models: [
|
||||
{
|
||||
id: "gpt-5.3-codex-spark",
|
||||
name: "GPT-5.3 Codex Spark",
|
||||
reasoning: true,
|
||||
contextWindow: 128000,
|
||||
input: ["text"],
|
||||
},
|
||||
{
|
||||
id: "gpt-5.4",
|
||||
name: "GPT-5.4",
|
||||
reasoning: true,
|
||||
contextWindow: 272000,
|
||||
input: ["text", "image"],
|
||||
},
|
||||
],
|
||||
},
|
||||
openai: {
|
||||
models: [
|
||||
{
|
||||
id: "gpt-5.3-codex-spark",
|
||||
name: "GPT-5.3 Codex Spark",
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
const result = await loadModelCatalog({ config: {} as OpenClawConfig, readOnly: true });
|
||||
|
||||
expect(result).toEqual([
|
||||
{
|
||||
provider: "openai-codex",
|
||||
id: "gpt-5.4",
|
||||
name: "GPT-5.4",
|
||||
reasoning: true,
|
||||
contextWindow: 272000,
|
||||
input: ["text", "image"],
|
||||
compat: undefined,
|
||||
},
|
||||
]);
|
||||
expect(ensureOpenClawModelsJsonMock).not.toHaveBeenCalled();
|
||||
expect(augmentCatalogMock).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("falls back to the registry when persisted read-only catalog has no model rows", async () => {
|
||||
readFileMock.mockResolvedValueOnce(
|
||||
JSON.stringify({
|
||||
providers: {
|
||||
openai: {
|
||||
modelOverrides: {
|
||||
"gpt-4.1": {
|
||||
contextWindow: 128000,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}),
|
||||
);
|
||||
const discoverAuthStorage = vi.fn(() => ({
|
||||
getOAuthProviders: () => [],
|
||||
}));
|
||||
__setModelCatalogImportForTest(
|
||||
async () =>
|
||||
({
|
||||
discoverAuthStorage,
|
||||
AuthStorage: function AuthStorage() {},
|
||||
ModelRegistry: class {
|
||||
getAll() {
|
||||
return [{ id: "gpt-4.1", name: "GPT-4.1", provider: "openai" }];
|
||||
}
|
||||
},
|
||||
}) as unknown as PiSdkModule,
|
||||
);
|
||||
|
||||
const result = await loadModelCatalog({ config: {} as OpenClawConfig, readOnly: true });
|
||||
|
||||
expect(result).toEqual([{ id: "gpt-4.1", name: "GPT-4.1", provider: "openai" }]);
|
||||
expect(ensureOpenClawModelsJsonMock).not.toHaveBeenCalled();
|
||||
expect(discoverAuthStorage).toHaveBeenCalledWith("/tmp/openclaw", { readOnly: true });
|
||||
});
|
||||
|
||||
it("preserves registry defaults for minimal persisted read-only catalog rows", async () => {
|
||||
readFileMock.mockResolvedValueOnce(
|
||||
JSON.stringify({
|
||||
providers: {
|
||||
custom: {
|
||||
models: [{ id: "local-tiny" }],
|
||||
},
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
const result = await loadModelCatalog({ config: {} as OpenClawConfig, readOnly: true });
|
||||
|
||||
expect(result).toEqual([
|
||||
{
|
||||
provider: "custom",
|
||||
id: "local-tiny",
|
||||
name: "local-tiny",
|
||||
reasoning: false,
|
||||
contextWindow: 128000,
|
||||
input: ["text"],
|
||||
compat: undefined,
|
||||
},
|
||||
]);
|
||||
expect(ensureOpenClawModelsJsonMock).not.toHaveBeenCalled();
|
||||
expect(augmentCatalogMock).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("preserves provider context defaults for persisted read-only catalog rows", async () => {
|
||||
readFileMock.mockResolvedValueOnce(
|
||||
JSON.stringify({
|
||||
providers: {
|
||||
custom: {
|
||||
contextWindow: 262144,
|
||||
models: [
|
||||
{ id: "inherits-provider-context" },
|
||||
{ id: "overrides-context", contextWindow: 65536 },
|
||||
],
|
||||
},
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
const result = await loadModelCatalog({ config: {} as OpenClawConfig, readOnly: true });
|
||||
|
||||
expect(result).toEqual([
|
||||
{
|
||||
provider: "custom",
|
||||
id: "inherits-provider-context",
|
||||
name: "inherits-provider-context",
|
||||
reasoning: false,
|
||||
contextWindow: 262144,
|
||||
input: ["text"],
|
||||
compat: undefined,
|
||||
},
|
||||
{
|
||||
provider: "custom",
|
||||
id: "overrides-context",
|
||||
name: "overrides-context",
|
||||
reasoning: false,
|
||||
contextWindow: 65536,
|
||||
input: ["text"],
|
||||
compat: undefined,
|
||||
},
|
||||
]);
|
||||
expect(ensureOpenClawModelsJsonMock).not.toHaveBeenCalled();
|
||||
expect(augmentCatalogMock).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("does not synthesize stale openai-codex/gpt-5.3-codex-spark entries from gpt-5.4", async () => {
|
||||
mockPiDiscoveryModels([
|
||||
{
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import { readFile } from "node:fs/promises";
|
||||
import { join } from "node:path";
|
||||
import { getRuntimeConfig } from "../config/config.js";
|
||||
import type { OpenClawConfig } from "../config/types.openclaw.js";
|
||||
@@ -20,6 +21,7 @@ import { ensureOpenClawModelsJson } from "./models-config.js";
|
||||
import { normalizeProviderId } from "./provider-id.js";
|
||||
|
||||
const log = createSubsystemLogger("model-catalog");
|
||||
const PI_CUSTOM_MODEL_DEFAULT_CONTEXT_WINDOW = 128_000;
|
||||
|
||||
export type { ModelCatalogEntry, ModelInputType } from "./model-catalog.types.js";
|
||||
export {
|
||||
@@ -161,12 +163,106 @@ export function loadManifestModelCatalog(params: {
|
||||
});
|
||||
}
|
||||
|
||||
function sortModelCatalogEntries(entries: ModelCatalogEntry[]): ModelCatalogEntry[] {
|
||||
return entries.toSorted((a, b) => {
|
||||
const p = a.provider.localeCompare(b.provider);
|
||||
if (p !== 0) {
|
||||
return p;
|
||||
}
|
||||
return a.name.localeCompare(b.name);
|
||||
});
|
||||
}
|
||||
|
||||
function normalizePersistedModelCatalogEntry(
|
||||
providerRaw: string,
|
||||
entry: Record<string, unknown>,
|
||||
defaults?: {
|
||||
contextWindow?: number;
|
||||
},
|
||||
): ModelCatalogEntry | undefined {
|
||||
const id = normalizeOptionalString(entry.id) ?? "";
|
||||
if (!id) {
|
||||
return undefined;
|
||||
}
|
||||
const provider = normalizeProviderId(providerRaw);
|
||||
if (!provider) {
|
||||
return undefined;
|
||||
}
|
||||
const name = normalizeOptionalString(entry.name ?? id) || id;
|
||||
const contextWindow =
|
||||
typeof entry?.contextWindow === "number" && entry.contextWindow > 0
|
||||
? entry.contextWindow
|
||||
: defaults?.contextWindow !== undefined
|
||||
? defaults.contextWindow
|
||||
: PI_CUSTOM_MODEL_DEFAULT_CONTEXT_WINDOW;
|
||||
const reasoning = typeof entry?.reasoning === "boolean" ? entry.reasoning : false;
|
||||
const parsedInput = Array.isArray(entry?.input)
|
||||
? entry.input.filter((value): value is ModelInputType =>
|
||||
["text", "image", "audio", "video", "document"].includes(String(value)),
|
||||
)
|
||||
: undefined;
|
||||
const input: ModelInputType[] = parsedInput?.length ? parsedInput : ["text"];
|
||||
const compat =
|
||||
entry?.compat && typeof entry.compat === "object"
|
||||
? (entry.compat as ModelCatalogEntry["compat"])
|
||||
: undefined;
|
||||
return { id, name, provider, contextWindow, reasoning, input, compat };
|
||||
}
|
||||
|
||||
async function loadReadOnlyPersistedModelCatalog(params?: {
|
||||
config?: OpenClawConfig;
|
||||
}): Promise<ModelCatalogEntry[]> {
|
||||
const cfg = params?.config ?? getRuntimeConfig();
|
||||
const agentDir = resolveOpenClawAgentDir();
|
||||
const raw = await readFile(join(agentDir, "models.json"), "utf8");
|
||||
const parsed = JSON.parse(raw) as Record<string, unknown>;
|
||||
const models: ModelCatalogEntry[] = [];
|
||||
const { buildShouldSuppressBuiltInModel } = await loadModelSuppression();
|
||||
const shouldSuppressBuiltInModel = buildShouldSuppressBuiltInModel({ config: cfg });
|
||||
const providers =
|
||||
parsed?.providers && typeof parsed.providers === "object"
|
||||
? (parsed.providers as Record<string, Record<string, unknown>>)
|
||||
: {};
|
||||
for (const [providerRaw, providerConfig] of Object.entries(providers)) {
|
||||
if (!Array.isArray(providerConfig?.models)) {
|
||||
continue;
|
||||
}
|
||||
const providerContextWindow =
|
||||
typeof providerConfig?.contextWindow === "number" && providerConfig.contextWindow > 0
|
||||
? providerConfig.contextWindow
|
||||
: undefined;
|
||||
for (const entry of providerConfig.models as Record<string, unknown>[]) {
|
||||
const normalized = normalizePersistedModelCatalogEntry(providerRaw, entry, {
|
||||
contextWindow: providerContextWindow,
|
||||
});
|
||||
if (normalized && !shouldSuppressBuiltInModel(normalized)) {
|
||||
models.push(normalized);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (models.length === 0) {
|
||||
throw new Error("persisted model catalog has no usable model rows");
|
||||
}
|
||||
const configuredModels = buildConfiguredModelCatalog({ cfg });
|
||||
if (configuredModels.length > 0) {
|
||||
appendCatalogEntriesIfAbsent(models, configuredModels);
|
||||
}
|
||||
return sortModelCatalogEntries(models);
|
||||
}
|
||||
|
||||
export async function loadModelCatalog(params?: {
|
||||
config?: OpenClawConfig;
|
||||
useCache?: boolean;
|
||||
readOnly?: boolean;
|
||||
}): Promise<ModelCatalogEntry[]> {
|
||||
const readOnly = params?.readOnly === true;
|
||||
if (readOnly) {
|
||||
try {
|
||||
return await loadReadOnlyPersistedModelCatalog(params);
|
||||
} catch {
|
||||
// fall through to full catalog path
|
||||
}
|
||||
}
|
||||
if (!readOnly && params?.useCache === false) {
|
||||
modelCatalogPromise = null;
|
||||
}
|
||||
@@ -185,14 +281,7 @@ export async function loadModelCatalog(params?: {
|
||||
const suffix = extra ? ` ${extra}` : "";
|
||||
log.info(`model-catalog stage=${stage} elapsedMs=${Date.now() - startMs}${suffix}`);
|
||||
};
|
||||
const sortModels = (entries: ModelCatalogEntry[]) =>
|
||||
entries.sort((a, b) => {
|
||||
const p = a.provider.localeCompare(b.provider);
|
||||
if (p !== 0) {
|
||||
return p;
|
||||
}
|
||||
return a.name.localeCompare(b.name);
|
||||
});
|
||||
const sortModels = sortModelCatalogEntries;
|
||||
try {
|
||||
const cfg = params?.config ?? getRuntimeConfig();
|
||||
if (!readOnly) {
|
||||
@@ -247,18 +336,20 @@ export async function loadModelCatalog(params?: {
|
||||
const compat = entry?.compat && typeof entry.compat === "object" ? entry.compat : undefined;
|
||||
models.push({ id, name, provider, contextWindow, reasoning, input, compat });
|
||||
}
|
||||
const supplemental = await augmentModelCatalogWithProviderPlugins({
|
||||
config: cfg,
|
||||
env: process.env,
|
||||
context: {
|
||||
if (!readOnly) {
|
||||
const supplemental = await augmentModelCatalogWithProviderPlugins({
|
||||
config: cfg,
|
||||
agentDir,
|
||||
env: process.env,
|
||||
entries: [...models],
|
||||
},
|
||||
});
|
||||
if (supplemental.length > 0) {
|
||||
appendCatalogEntriesIfAbsent(models, supplemental);
|
||||
context: {
|
||||
config: cfg,
|
||||
agentDir,
|
||||
env: process.env,
|
||||
entries: [...models],
|
||||
},
|
||||
});
|
||||
if (supplemental.length > 0) {
|
||||
appendCatalogEntriesIfAbsent(models, supplemental);
|
||||
}
|
||||
}
|
||||
logStage("plugin-models-merged", `entries=${models.length}`);
|
||||
|
||||
|
||||
@@ -23,6 +23,7 @@ describe("models.list", () => {
|
||||
it("does not block the configured view on slow model catalog discovery", async () => {
|
||||
const catalog = createDeferred<never>();
|
||||
const respond = vi.fn();
|
||||
const loadGatewayModelCatalog = vi.fn(() => catalog.promise);
|
||||
|
||||
vi.useFakeTimers();
|
||||
try {
|
||||
@@ -51,7 +52,7 @@ describe("models.list", () => {
|
||||
};
|
||||
return config as unknown as OpenClawConfig;
|
||||
},
|
||||
loadGatewayModelCatalog: vi.fn(() => catalog.promise),
|
||||
loadGatewayModelCatalog,
|
||||
logGateway: {
|
||||
debug: vi.fn(),
|
||||
},
|
||||
@@ -74,6 +75,7 @@ describe("models.list", () => {
|
||||
},
|
||||
undefined,
|
||||
);
|
||||
expect(loadGatewayModelCatalog).toHaveBeenCalledWith({ readOnly: true });
|
||||
} finally {
|
||||
vi.useRealTimers();
|
||||
}
|
||||
@@ -82,6 +84,7 @@ describe("models.list", () => {
|
||||
it("keeps the all view exact instead of timing out to a partial catalog", async () => {
|
||||
const catalog = createDeferred<[{ id: string; name: string; provider: string }]>();
|
||||
const respond = vi.fn();
|
||||
const loadGatewayModelCatalog = vi.fn(() => catalog.promise);
|
||||
|
||||
vi.useFakeTimers();
|
||||
try {
|
||||
@@ -98,7 +101,7 @@ describe("models.list", () => {
|
||||
isWebchatConnect: () => false,
|
||||
context: {
|
||||
getRuntimeConfig: () => ({}) as OpenClawConfig,
|
||||
loadGatewayModelCatalog: vi.fn(() => catalog.promise),
|
||||
loadGatewayModelCatalog,
|
||||
logGateway: {
|
||||
debug: vi.fn(),
|
||||
},
|
||||
@@ -116,6 +119,7 @@ describe("models.list", () => {
|
||||
{ models: [{ id: "gpt-test", name: "GPT Test", provider: "openai" }] },
|
||||
undefined,
|
||||
);
|
||||
expect(loadGatewayModelCatalog).toHaveBeenCalledWith({ readOnly: false });
|
||||
} finally {
|
||||
vi.useRealTimers();
|
||||
}
|
||||
|
||||
@@ -26,11 +26,11 @@ async function loadModelsListCatalog(
|
||||
view: ModelsListView,
|
||||
): Promise<GatewayModelCatalog> {
|
||||
if (view === "all") {
|
||||
return await context.loadGatewayModelCatalog();
|
||||
return await context.loadGatewayModelCatalog({ readOnly: false });
|
||||
}
|
||||
let timeout: NodeJS.Timeout | undefined;
|
||||
const timedOut = Symbol("models-list-catalog-timeout");
|
||||
const catalogPromise = context.loadGatewayModelCatalog();
|
||||
const catalogPromise = context.loadGatewayModelCatalog({ readOnly: true });
|
||||
const timeoutPromise = new Promise<typeof timedOut>((resolve) => {
|
||||
timeout = setTimeout(() => resolve(timedOut), MODELS_LIST_CATALOG_TIMEOUT_MS);
|
||||
timeout.unref?.();
|
||||
|
||||
@@ -45,7 +45,7 @@ export type GatewayRequestContext = {
|
||||
getRuntimeConfig: () => OpenClawConfig;
|
||||
execApprovalManager?: ExecApprovalManager;
|
||||
pluginApprovalManager?: ExecApprovalManager<PluginApprovalRequestPayload>;
|
||||
loadGatewayModelCatalog: () => Promise<ModelCatalogEntry[]>;
|
||||
loadGatewayModelCatalog: (params?: { readOnly?: boolean }) => Promise<ModelCatalogEntry[]>;
|
||||
getHealthCache: () => HealthSummary | null;
|
||||
refreshHealthSnapshot: (opts?: {
|
||||
probe?: boolean;
|
||||
|
||||
@@ -45,6 +45,35 @@ describe("loadGatewayModelCatalog", () => {
|
||||
await expect(loadGatewayModelCatalog({ getConfig, loadModelCatalog })).resolves.toBe(catalog);
|
||||
|
||||
expect(loadModelCatalog).toHaveBeenCalledTimes(1);
|
||||
expect(loadModelCatalog).toHaveBeenCalledWith({ config: getConfig(), readOnly: true });
|
||||
});
|
||||
|
||||
it("keeps read-only and full catalog caches separate", async () => {
|
||||
const readOnlyCatalog = [model("configured-only")];
|
||||
const fullCatalog = [model("configured-only"), model("browse-only")];
|
||||
const loadModelCatalog = vi.fn<LoadModelCatalogForTest>(async (params) =>
|
||||
params.readOnly === false ? fullCatalog : readOnlyCatalog,
|
||||
);
|
||||
|
||||
await expect(loadGatewayModelCatalog({ getConfig, loadModelCatalog })).resolves.toBe(
|
||||
readOnlyCatalog,
|
||||
);
|
||||
await expect(
|
||||
loadGatewayModelCatalog({ getConfig, loadModelCatalog, readOnly: false }),
|
||||
).resolves.toBe(fullCatalog);
|
||||
await expect(loadGatewayModelCatalog({ getConfig, loadModelCatalog })).resolves.toBe(
|
||||
readOnlyCatalog,
|
||||
);
|
||||
|
||||
expect(loadModelCatalog).toHaveBeenCalledTimes(2);
|
||||
expect(loadModelCatalog).toHaveBeenNthCalledWith(1, {
|
||||
config: getConfig(),
|
||||
readOnly: true,
|
||||
});
|
||||
expect(loadModelCatalog).toHaveBeenNthCalledWith(2, {
|
||||
config: getConfig(),
|
||||
readOnly: false,
|
||||
});
|
||||
});
|
||||
|
||||
it("does not cache an empty catalog so the next request retries", async () => {
|
||||
|
||||
@@ -5,26 +5,50 @@ export type GatewayModelChoice = import("../agents/model-catalog.js").ModelCatal
|
||||
type GatewayModelCatalogConfig = ReturnType<typeof getRuntimeConfig>;
|
||||
type LoadModelCatalog = (params: {
|
||||
config: GatewayModelCatalogConfig;
|
||||
readOnly?: boolean;
|
||||
}) => Promise<GatewayModelChoice[]>;
|
||||
type LoadGatewayModelCatalogParams = {
|
||||
getConfig?: () => GatewayModelCatalogConfig;
|
||||
loadModelCatalog?: LoadModelCatalog;
|
||||
readOnly?: boolean;
|
||||
};
|
||||
|
||||
let lastSuccessfulCatalog: GatewayModelChoice[] | null = null;
|
||||
let inFlightRefresh: Promise<GatewayModelChoice[]> | null = null;
|
||||
let staleGeneration = 0;
|
||||
let appliedGeneration = 0;
|
||||
type GatewayModelCatalogCache = {
|
||||
lastSuccessfulCatalog: GatewayModelChoice[] | null;
|
||||
inFlightRefresh: Promise<GatewayModelChoice[]> | null;
|
||||
staleGeneration: number;
|
||||
appliedGeneration: number;
|
||||
};
|
||||
|
||||
function resetGatewayModelCatalogState(): void {
|
||||
lastSuccessfulCatalog = null;
|
||||
inFlightRefresh = null;
|
||||
staleGeneration = 0;
|
||||
appliedGeneration = 0;
|
||||
function createGatewayModelCatalogCache(): GatewayModelCatalogCache {
|
||||
return {
|
||||
lastSuccessfulCatalog: null,
|
||||
inFlightRefresh: null,
|
||||
staleGeneration: 0,
|
||||
appliedGeneration: 0,
|
||||
};
|
||||
}
|
||||
|
||||
function isGatewayModelCatalogStale(): boolean {
|
||||
return appliedGeneration < staleGeneration;
|
||||
const readOnlyModelCatalogCache = createGatewayModelCatalogCache();
|
||||
const fullModelCatalogCache = createGatewayModelCatalogCache();
|
||||
|
||||
function resolveGatewayModelCatalogCache(
|
||||
params?: LoadGatewayModelCatalogParams,
|
||||
): GatewayModelCatalogCache {
|
||||
return params?.readOnly === false ? fullModelCatalogCache : readOnlyModelCatalogCache;
|
||||
}
|
||||
|
||||
function resetGatewayModelCatalogState(): void {
|
||||
for (const cache of [readOnlyModelCatalogCache, fullModelCatalogCache]) {
|
||||
cache.lastSuccessfulCatalog = null;
|
||||
cache.inFlightRefresh = null;
|
||||
cache.staleGeneration = 0;
|
||||
cache.appliedGeneration = 0;
|
||||
}
|
||||
}
|
||||
|
||||
function isGatewayModelCatalogStale(cache: GatewayModelCatalogCache): boolean {
|
||||
return cache.appliedGeneration < cache.staleGeneration;
|
||||
}
|
||||
|
||||
async function resolveLoadModelCatalog(
|
||||
@@ -40,28 +64,31 @@ async function resolveLoadModelCatalog(
|
||||
function startGatewayModelCatalogRefresh(
|
||||
params?: LoadGatewayModelCatalogParams,
|
||||
): Promise<GatewayModelChoice[]> {
|
||||
const cache = resolveGatewayModelCatalogCache(params);
|
||||
const config = (params?.getConfig ?? getRuntimeConfig)();
|
||||
const refreshGeneration = staleGeneration;
|
||||
const readOnly = params?.readOnly !== false;
|
||||
const refreshGeneration = cache.staleGeneration;
|
||||
const refresh = resolveLoadModelCatalog(params)
|
||||
.then((loadModelCatalog) => loadModelCatalog({ config }))
|
||||
.then((loadModelCatalog) => loadModelCatalog({ config, readOnly }))
|
||||
.then((catalog) => {
|
||||
if (catalog.length > 0 && refreshGeneration === staleGeneration) {
|
||||
lastSuccessfulCatalog = catalog;
|
||||
appliedGeneration = staleGeneration;
|
||||
if (catalog.length > 0 && refreshGeneration === cache.staleGeneration) {
|
||||
cache.lastSuccessfulCatalog = catalog;
|
||||
cache.appliedGeneration = cache.staleGeneration;
|
||||
}
|
||||
return catalog;
|
||||
})
|
||||
.finally(() => {
|
||||
if (inFlightRefresh === refresh) {
|
||||
inFlightRefresh = null;
|
||||
if (cache.inFlightRefresh === refresh) {
|
||||
cache.inFlightRefresh = null;
|
||||
}
|
||||
});
|
||||
inFlightRefresh = refresh;
|
||||
cache.inFlightRefresh = refresh;
|
||||
return refresh;
|
||||
}
|
||||
|
||||
export function markGatewayModelCatalogStaleForReload(): void {
|
||||
staleGeneration += 1;
|
||||
readOnlyModelCatalogCache.staleGeneration += 1;
|
||||
fullModelCatalogCache.staleGeneration += 1;
|
||||
}
|
||||
|
||||
// Test-only escape hatch: model catalog is cached at module scope for the
|
||||
@@ -76,18 +103,19 @@ export async function __resetModelCatalogCacheForTest(): Promise<void> {
|
||||
export async function loadGatewayModelCatalog(
|
||||
params?: LoadGatewayModelCatalogParams,
|
||||
): Promise<GatewayModelChoice[]> {
|
||||
const isStale = isGatewayModelCatalogStale();
|
||||
if (!isStale && lastSuccessfulCatalog) {
|
||||
return lastSuccessfulCatalog;
|
||||
const cache = resolveGatewayModelCatalogCache(params);
|
||||
const isStale = isGatewayModelCatalogStale(cache);
|
||||
if (!isStale && cache.lastSuccessfulCatalog) {
|
||||
return cache.lastSuccessfulCatalog;
|
||||
}
|
||||
if (isStale && lastSuccessfulCatalog) {
|
||||
if (!inFlightRefresh) {
|
||||
if (isStale && cache.lastSuccessfulCatalog) {
|
||||
if (!cache.inFlightRefresh) {
|
||||
void startGatewayModelCatalogRefresh(params).catch(() => undefined);
|
||||
}
|
||||
return lastSuccessfulCatalog;
|
||||
return cache.lastSuccessfulCatalog;
|
||||
}
|
||||
if (inFlightRefresh) {
|
||||
return await inFlightRefresh;
|
||||
if (cache.inFlightRefresh) {
|
||||
return await cache.inFlightRefresh;
|
||||
}
|
||||
return await startGatewayModelCatalogRefresh(params);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user