fix(models): reset warmed provider auth on hot reload

This commit is contained in:
Peter Steinberger
2026-05-21 18:51:07 +01:00
parent 7ddcca6c77
commit aef8d1771d
7 changed files with 244 additions and 44 deletions

View File

@@ -7,7 +7,8 @@ const modelCatalogMocks = vi.hoisted(() => ({
}));
const modelAuthMocks = vi.hoisted(() => ({
hasRuntimeAvailableProviderAuth: vi.fn<(params: { provider: string }) => boolean>(),
hasRuntimeAvailableProviderAuth:
vi.fn<(params: { provider: string; cfg?: OpenClawConfig; workspaceDir?: string }) => boolean>(),
}));
const authProfilesMocks = vi.hoisted(() => ({
@@ -44,6 +45,7 @@ const { clearCurrentProviderAuthState, hasAuthForModelProvider, warmCurrentProvi
describe("prepared provider auth state", () => {
afterEach(() => {
vi.useRealTimers();
clearCurrentProviderAuthState();
vi.clearAllMocks();
});
@@ -105,6 +107,41 @@ describe("prepared provider auth state", () => {
expect(modelAuthMocks.hasRuntimeAvailableProviderAuth).toHaveBeenCalledTimes(2);
});
it("hasAuthForModelProvider uses the prepared answer for equivalent runtime config clones", async () => {
const cfg = { gateway: { port: 18789 } } as OpenClawConfig;
const clonedCfg = structuredClone(cfg);
modelCatalogMocks.loadModelCatalog.mockResolvedValue([
{ id: "gpt", name: "gpt", provider: "openai" },
]);
modelAuthMocks.hasRuntimeAvailableProviderAuth.mockReturnValue(true);
await warmCurrentProviderAuthState(cfg);
expect(modelAuthMocks.hasRuntimeAvailableProviderAuth).toHaveBeenCalledTimes(1);
modelAuthMocks.hasRuntimeAvailableProviderAuth.mockReturnValue(false);
expect(hasAuthForModelProvider({ provider: "openai", cfg: clonedCfg })).toBe(true);
expect(modelAuthMocks.hasRuntimeAvailableProviderAuth).toHaveBeenCalledTimes(1);
});
it("hasAuthForModelProvider falls through after the prepared auth state TTL", async () => {
vi.useFakeTimers();
vi.setSystemTime(0);
const cfg = {} as OpenClawConfig;
modelCatalogMocks.loadModelCatalog.mockResolvedValue([
{ id: "gpt", name: "gpt", provider: "openai" },
]);
modelAuthMocks.hasRuntimeAvailableProviderAuth.mockReturnValue(false);
await warmCurrentProviderAuthState(cfg);
expect(modelAuthMocks.hasRuntimeAvailableProviderAuth).toHaveBeenCalledTimes(1);
modelAuthMocks.hasRuntimeAvailableProviderAuth.mockReturnValue(true);
expect(hasAuthForModelProvider({ provider: "openai", cfg })).toBe(false);
expect(modelAuthMocks.hasRuntimeAvailableProviderAuth).toHaveBeenCalledTimes(1);
vi.setSystemTime(10_001);
expect(hasAuthForModelProvider({ provider: "openai", cfg })).toBe(true);
expect(modelAuthMocks.hasRuntimeAvailableProviderAuth).toHaveBeenCalledTimes(2);
});
it("hasAuthForModelProvider falls through to compute when the caller passes a non-default workspaceDir", async () => {
const cfg = {} as OpenClawConfig;
modelCatalogMocks.loadModelCatalog.mockResolvedValue([
@@ -137,4 +174,42 @@ describe("prepared provider auth state", () => {
).toBe(true);
expect(modelAuthMocks.hasRuntimeAvailableProviderAuth).toHaveBeenCalledTimes(2);
});
it("does not publish an older warm after the prepared auth state is cleared", async () => {
const firstCfg = { gateway: { port: 18789 } } as OpenClawConfig;
const secondCfg = { gateway: { port: 19001 } } as OpenClawConfig;
let resolveFirstCatalog: ((catalog: ModelCatalogEntry[]) => void) | undefined;
let resolveSecondCatalog: ((catalog: ModelCatalogEntry[]) => void) | undefined;
modelCatalogMocks.loadModelCatalog
.mockReturnValueOnce(
new Promise<ModelCatalogEntry[]>((resolve) => {
resolveFirstCatalog = resolve;
}),
)
.mockReturnValueOnce(
new Promise<ModelCatalogEntry[]>((resolve) => {
resolveSecondCatalog = resolve;
}),
);
modelAuthMocks.hasRuntimeAvailableProviderAuth.mockImplementation(
({ cfg }) => cfg === firstCfg,
);
const firstWarm = warmCurrentProviderAuthState(firstCfg);
await Promise.resolve();
clearCurrentProviderAuthState();
const secondWarm = warmCurrentProviderAuthState(secondCfg);
resolveSecondCatalog?.([{ id: "gpt", name: "gpt", provider: "openai" }]);
await secondWarm;
resolveFirstCatalog?.([{ id: "gpt", name: "gpt", provider: "openai" }]);
await firstWarm;
expect(modelAuthMocks.hasRuntimeAvailableProviderAuth).toHaveBeenCalledTimes(2);
modelAuthMocks.hasRuntimeAvailableProviderAuth.mockReturnValue(true);
expect(hasAuthForModelProvider({ provider: "openai", cfg: secondCfg })).toBe(false);
expect(modelAuthMocks.hasRuntimeAvailableProviderAuth).toHaveBeenCalledTimes(2);
expect(hasAuthForModelProvider({ provider: "openai", cfg: firstCfg })).toBe(true);
expect(modelAuthMocks.hasRuntimeAvailableProviderAuth).toHaveBeenCalledTimes(3);
});
});

View File

@@ -1,3 +1,4 @@
import { hashRuntimeConfigValue } from "../config/runtime-snapshot.js";
import type { OpenClawConfig } from "../config/types.openclaw.js";
import {
externalCliDiscoveryForProviderAuth,
@@ -18,18 +19,38 @@ import { resolveDefaultAgentWorkspaceDir } from "./workspace.js";
// (pickers, /models, status commands, CLI) skips the per-provider plugin
// discovery and external-CLI probing on the hot path.
let currentProviderAuthState: ReadonlyMap<string, boolean> | null = null;
let currentProviderAuthStateWorkspaceDir: string | undefined;
type PreparedProviderAuthState = {
configFingerprint: string;
workspaceDir: string;
preparedAtMs: number;
providers: ReadonlyMap<string, boolean>;
};
const PREPARED_PROVIDER_AUTH_STATE_TTL_MS = 10_000;
let currentProviderAuthState: PreparedProviderAuthState | null = null;
const configFingerprintCache = new WeakMap<OpenClawConfig, string>();
// Generation counter guards against an in-flight warm publishing stale
// state after a subsequent clear/reload has invalidated it.
// state after a subsequent warm or clear has invalidated it.
let currentProviderAuthStateGeneration = 0;
export function clearCurrentProviderAuthState(): void {
currentProviderAuthState = null;
currentProviderAuthStateWorkspaceDir = undefined;
currentProviderAuthStateGeneration += 1;
}
function resolveProviderAuthConfigFingerprint(cfg: OpenClawConfig | undefined): string | null {
if (!cfg) {
return null;
}
const cached = configFingerprintCache.get(cfg);
if (cached !== undefined) {
return cached;
}
const fingerprint = hashRuntimeConfigValue(cfg);
configFingerprintCache.set(cfg, fingerprint);
return fingerprint;
}
export function hasAuthForModelProvider(params: {
provider: string;
cfg?: OpenClawConfig;
@@ -48,16 +69,23 @@ export function hasAuthForModelProvider(params: {
// that narrow the scope — e.g. gateway `models.list` with
// `runtimeAuthDiscovery: false`, or per-agent picker calls that pass a
// non-default workspaceDir — get the answer they asked for.
const preparedState = currentProviderAuthState;
const workspaceDir = params.workspaceDir ?? resolveDefaultAgentWorkspaceDir();
const configFingerprint = resolveProviderAuthConfigFingerprint(params.cfg);
const preparedStateFresh =
preparedState !== null &&
Date.now() - preparedState.preparedAtMs <= PREPARED_PROVIDER_AUTH_STATE_TTL_MS;
const matchesWarmedScope =
preparedStateFresh &&
configFingerprint === preparedState.configFingerprint &&
workspaceDir === preparedState.workspaceDir &&
params.discoverExternalCliAuth !== false &&
params.allowPluginSyntheticAuth !== false &&
params.agentDir === undefined &&
params.env === undefined &&
params.store === undefined &&
(params.workspaceDir === undefined ||
params.workspaceDir === currentProviderAuthStateWorkspaceDir);
params.store === undefined;
if (matchesWarmedScope) {
const preparedAnswer = currentProviderAuthState?.get(provider);
const preparedAnswer = preparedState.providers.get(provider);
if (preparedAnswer !== undefined) {
return preparedAnswer;
}
@@ -152,6 +180,10 @@ export async function warmCurrentProviderAuthState(cfg: OpenClawConfig): Promise
// the newer answer wins.
return;
}
currentProviderAuthState = state;
currentProviderAuthStateWorkspaceDir = workspaceDir;
currentProviderAuthState = {
configFingerprint: resolveProviderAuthConfigFingerprint(cfg) ?? "",
workspaceDir,
preparedAtMs: Date.now(),
providers: state,
};
}

View File

@@ -23,6 +23,8 @@ const mocks = vi.hoisted(() => ({
(params: { agentDir?: string }) => params.agentDir,
),
refreshActiveSecretsRuntimeSnapshot: vi.fn(async () => false),
clearCurrentProviderAuthState: vi.fn(),
warmCurrentProviderAuthState: vi.fn(async (_cfg: unknown) => {}),
buildAuthHealthSummary: vi.fn(
(): AuthHealthSummary => ({ now: 0, warnAfterMs: 0, profiles: [], providers: [] }),
),
@@ -70,6 +72,11 @@ vi.mock("../../secrets/runtime.js", () => ({
refreshActiveSecretsRuntimeSnapshot: mocks.refreshActiveSecretsRuntimeSnapshot,
}));
vi.mock("../../agents/model-provider-auth.js", () => ({
clearCurrentProviderAuthState: mocks.clearCurrentProviderAuthState,
warmCurrentProviderAuthState: mocks.warmCurrentProviderAuthState,
}));
import {
aggregateOAuthStatus,
invalidateModelAuthStatusCache,
@@ -614,6 +621,8 @@ describe("models.authLogout", () => {
agentDir: "/tmp/agent",
});
expect(mocks.refreshActiveSecretsRuntimeSnapshot).toHaveBeenCalledTimes(1);
expect(mocks.clearCurrentProviderAuthState).toHaveBeenCalled();
expect(mocks.warmCurrentProviderAuthState).toHaveBeenCalledWith({});
const [ok, payload] = firstRespondCall(opts) ?? [];
expect(ok).toBe(true);
expect((payload as ModelAuthLogoutResult).removedProfiles).toEqual(["openrouter:default"]);

View File

@@ -390,6 +390,7 @@ export const modelsAuthStatusHandlers: GatewayRequestHandlers = {
}
await refreshActiveSecretsRuntimeSnapshot();
invalidateModelAuthStatusCache();
clearCurrentProviderAuthState();
void warmCurrentProviderAuthState(context.getRuntimeConfig()).catch((err) => {
log.warn(`provider auth state rewarm after logout failed: ${formatForLog(err)}`);
});

View File

@@ -40,6 +40,10 @@ const hoisted = vi.hoisted(() => ({
activeEmbeddedRunSessionKeys: [] as string[],
markRestartAbortedMainSessions: vi.fn(async (_params: unknown) => ({ marked: 1, skipped: 0 })),
runtimeConfig: { value: { session: { store: "/tmp/active-sessions.json" } } as OpenClawConfig },
reloadEvents: [] as string[],
resetModelCatalogCache: vi.fn(() => {}),
clearCurrentProviderAuthState: vi.fn(() => {}),
warmCurrentProviderAuthState: vi.fn(async (_cfg: OpenClawConfig) => {}),
}));
vi.mock("../hooks/gmail-watcher.js", () => ({
@@ -95,6 +99,24 @@ vi.mock("../config/config.js", () => ({
getRuntimeConfig: () => hoisted.runtimeConfig.value,
}));
vi.mock("../agents/model-catalog.js", () => ({
resetModelCatalogCache: () => {
hoisted.reloadEvents.push("reset-model-catalog");
hoisted.resetModelCatalogCache();
},
}));
vi.mock("../agents/model-provider-auth.js", () => ({
clearCurrentProviderAuthState: () => {
hoisted.reloadEvents.push("clear-provider-auth");
hoisted.clearCurrentProviderAuthState();
},
warmCurrentProviderAuthState: async (cfg: OpenClawConfig) => {
hoisted.reloadEvents.push("warm-provider-auth");
await hoisted.warmCurrentProviderAuthState(cfg);
},
}));
function createReloadHandlersForTest(logReload = { info: vi.fn(), warn: vi.fn() }) {
const cron = { start: vi.fn(async () => {}), stop: vi.fn() };
const heartbeatRunner = {
@@ -139,6 +161,76 @@ afterEach(() => {
hoisted.activeEmbeddedRunSessionKeys.length = 0;
hoisted.markRestartAbortedMainSessions.mockClear();
hoisted.runtimeConfig.value = { session: { store: "/tmp/active-sessions.json" } };
hoisted.reloadEvents.length = 0;
hoisted.resetModelCatalogCache.mockClear();
hoisted.clearCurrentProviderAuthState.mockClear();
hoisted.warmCurrentProviderAuthState.mockClear();
});
describe("gateway hot reload model state", () => {
it("resets prepared model runtime state for every hot reload and rewarms after plugin reload", async () => {
const reloadPlugins = vi.fn(async (): Promise<GatewayPluginReloadResult> => {
hoisted.reloadEvents.push("reload-plugins");
return {
restartChannels: new Set(),
activeChannels: new Set(),
};
});
const { applyHotReload } = createGatewayReloadHandlers({
deps: {} as never,
broadcast: vi.fn(),
getState: () => ({
hooksConfig: {} as never,
hookClientIpConfig: {} as never,
heartbeatRunner: { stop: vi.fn(), updateConfig: vi.fn() } as never,
cronState: {
cron: { start: vi.fn(async () => {}), stop: vi.fn() },
storePath: "/tmp/cron.json",
cronEnabled: false,
} as never,
channelHealthMonitor: null,
}),
setState: vi.fn(),
startChannel: vi.fn(async () => {}),
stopChannel: vi.fn(async () => {}),
reloadPlugins,
logHooks: { info: vi.fn(), warn: vi.fn(), error: vi.fn() },
logChannels: { info: vi.fn(), error: vi.fn() },
logCron: { error: vi.fn() },
logReload: { info: vi.fn(), warn: vi.fn() },
createHealthMonitor: () => null,
});
const nextConfig = { plugins: { enabled: true } } as OpenClawConfig;
await applyHotReload(
{
changedPaths: ["plugins.enabled"],
restartGateway: false,
restartReasons: [],
hotReasons: ["plugins.enabled"],
reloadHooks: false,
restartGmailWatcher: false,
restartCron: false,
restartHeartbeat: false,
restartHealthMonitor: false,
reloadPlugins: true,
restartChannels: new Set(),
disposeMcpRuntimes: false,
noopPaths: [],
},
nextConfig,
);
expect(hoisted.reloadEvents).toEqual([
"reset-model-catalog",
"clear-provider-auth",
"reload-plugins",
"reset-model-catalog",
"clear-provider-auth",
"warm-provider-auth",
]);
expect(hoisted.warmCurrentProviderAuthState).toHaveBeenCalledWith(nextConfig);
});
});
describe("gateway restart deferral preflight", () => {

View File

@@ -90,6 +90,12 @@ const MCP_RUNTIME_RELOAD_DISPOSE_TIMEOUT_MS = 5_000;
const CHANNEL_RELOAD_DEFERRAL_POLL_MS = 500;
const CHANNEL_RELOAD_STILL_PENDING_WARN_MS = 30_000;
function resetPreparedModelRuntimeStateForHotReload(): void {
resetModelCatalogCache();
clearCurrentProviderAuthState();
markGatewayModelCatalogStaleForReload();
}
async function disposeMcpRuntimesWithTimeout(params: {
dispose: () => Promise<void>;
timeoutMs: number;
@@ -306,31 +312,7 @@ export function createGatewayReloadHandlers(params: GatewayReloadHandlerParams)
const state = params.getState();
const nextState = { ...state };
const modelConfigChanged = plan.changedPaths.some(
(path) =>
path === "models" ||
path.startsWith("models.") ||
path === "agents.defaults.model" ||
path.startsWith("agents.defaults.model.") ||
path === "agents.defaults.models" ||
path.startsWith("agents.defaults.models."),
);
if (modelConfigChanged) {
resetModelCatalogCache();
markGatewayModelCatalogStaleForReload();
}
// Provider-auth answers come from env/synthetic/plugin sources as well
// as the model catalog, so plugin config changes (e.g. plugins.entries.*
// env vars or synthetic-auth wiring) can also flip the answer. Clear
// up front so callers don't keep seeing the pre-reload answer; the
// matching rewarm runs after plan.reloadPlugins so it reads the new
// plugin runtime.
const providerAuthStateInvalidated =
modelConfigChanged ||
plan.changedPaths.some((path) => path === "plugins" || path.startsWith("plugins."));
if (providerAuthStateInvalidated) {
clearCurrentProviderAuthState();
}
resetPreparedModelRuntimeStateForHotReload();
if (plan.reloadHooks) {
try {
@@ -416,14 +398,7 @@ export function createGatewayReloadHandlers(params: GatewayReloadHandlerParams)
channelsToRestart.add(channel);
}
activePluginChannelsAfterReload = pluginReloadResult.activeChannels;
}
if (providerAuthStateInvalidated) {
// Schedule the rewarm after plan.reloadPlugins so the warmer reads
// the new plugin runtime, not the pre-reload one.
void warmCurrentProviderAuthState(nextConfig).catch((err) => {
params.logReload.warn(`provider auth state rewarm failed: ${String(err)}`);
});
resetPreparedModelRuntimeStateForHotReload();
}
if (plan.restartCron) {
@@ -525,6 +500,10 @@ export function createGatewayReloadHandlers(params: GatewayReloadHandlerParams)
applyGatewayLaneConcurrency(nextConfig);
void warmCurrentProviderAuthState(nextConfig).catch((err) => {
params.logReload.warn(`provider auth state rewarm failed: ${String(err)}`);
});
if (plan.hotReasons.length > 0) {
params.logReload.info(`config hot reload applied (${plan.hotReasons.join(", ")})`);
} else if (plan.noopPaths.length > 0) {

View File

@@ -58,6 +58,8 @@ const hoisted = vi.hoisted(() => {
const startGmailWatcher = vi.fn(async () => ({ started: true }));
const stopGmailWatcher = vi.fn(async () => {});
const resetModelCatalogCache = vi.fn();
const clearCurrentProviderAuthState = vi.fn();
const warmCurrentProviderAuthState = vi.fn(async (_cfg: unknown) => {});
const disposeAllSessionMcpRuntimes = vi.fn(async () => {});
const resolveOpenClawPackageRootSync = vi.fn((_params: unknown) => "/package");
@@ -162,6 +164,8 @@ const hoisted = vi.hoisted(() => {
startGmailWatcher,
stopGmailWatcher,
resetModelCatalogCache,
clearCurrentProviderAuthState,
warmCurrentProviderAuthState,
disposeAllSessionMcpRuntimes,
resolveOpenClawPackageRootSync,
providerManager,
@@ -203,6 +207,11 @@ vi.mock("../agents/model-catalog.js", async () => {
};
});
vi.mock("../agents/model-provider-auth.js", () => ({
clearCurrentProviderAuthState: hoisted.clearCurrentProviderAuthState,
warmCurrentProviderAuthState: hoisted.warmCurrentProviderAuthState,
}));
vi.mock("../agents/pi-bundle-mcp-tools.js", async () => {
const actual = await vi.importActual<typeof import("../agents/pi-bundle-mcp-tools.js")>(
"../agents/pi-bundle-mcp-tools.js",
@@ -334,6 +343,9 @@ describe("gateway hot reload", () => {
hoisted.activeTaskBlockers.length = 0;
embeddedRunMock.activeIds.clear();
hoisted.resetModelCatalogCache.mockReset();
hoisted.clearCurrentProviderAuthState.mockReset();
hoisted.warmCurrentProviderAuthState.mockReset();
hoisted.warmCurrentProviderAuthState.mockResolvedValue(undefined);
hoisted.disposeAllSessionMcpRuntimes.mockReset();
hoisted.disposeAllSessionMcpRuntimes.mockResolvedValue(undefined);
hoisted.resolveOpenClawPackageRootSync.mockClear();