mirror of
https://github.com/openclaw/openclaw.git
synced 2026-03-23 07:51:33 +00:00
fix(gateway): harden first-turn startup readiness (#52387)
* fix(gateway): harden first-turn startup readiness * fix(gateway): scope startup model retry
This commit is contained in:
@@ -123,6 +123,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Doctor/extensions: keep Matrix DM `allowFrom` repairs on the canonical `dm.allowFrom` path and stop treating Zalouser group sender gating as if it fell back to `allowFrom`, so doctor warnings and `--fix` stay aligned with runtime access control. Thanks @vincentkoc.
|
||||
- Doctor/refactor: centralize built-in channel doctor semantics in one static capability registry with conservative fallback behavior for unknown/external channels, so future extension changes stop depending on scattered shared string checks. Thanks @vincentkoc.
|
||||
- Models/OpenRouter runtime capabilities: fetch uncatalogued OpenRouter model metadata on first use so newly added vision models keep image input instead of silently degrading to text-only, with top-level capability field fallbacks for `/api/v1/models`. (#45824) Thanks @DJjjjhao.
|
||||
- Gateway/startup: prewarm the configured primary model before channel startup and retry one transient provider-runtime miss so the first Telegram or Discord message after boot no longer fails with `Unknown model: openai-codex/gpt-5.4`. Thanks @vincentkoc.
|
||||
- Channels/plugins: keep shared interactive payloads merge-ready by fixing Slack custom callback routing and repeat-click dedupe, allowing interactive-only sends, and preserving ordered Discord shared text blocks. (#47715) Thanks @vincentkoc.
|
||||
- Slack/interactive replies: preserve `channelData.slack.blocks` through live DM delivery and preview-finalized edits so Block Kit button and select directives render instead of falling back to raw text. (#45890) Thanks @vincentkoc.
|
||||
- Feishu/actions: expand the runtime action surface with message read/edit, explicit thread replies, pinning, and operator-facing chat/member inspection so Feishu can operate more of the workspace directly. (#47968) Thanks @Takhoffman.
|
||||
|
||||
98
src/agents/pi-embedded-runner/model.startup-retry.test.ts
Normal file
98
src/agents/pi-embedded-runner/model.startup-retry.test.ts
Normal file
@@ -0,0 +1,98 @@
|
||||
import { beforeEach, describe, expect, it, vi } from "vitest";
|
||||
|
||||
const discoverAuthStorageMock = vi.fn<(agentDir?: string) => { mocked: true }>(() => ({
|
||||
mocked: true,
|
||||
}));
|
||||
const discoverModelsMock = vi.fn<
|
||||
(authStorage: unknown, agentDir: string) => { find: ReturnType<typeof vi.fn> }
|
||||
>(() => ({ find: vi.fn(() => null) }));
|
||||
|
||||
let hookCacheCleared = false;
|
||||
const clearProviderRuntimeHookCacheMock = vi.fn<() => void>(() => {
|
||||
hookCacheCleared = true;
|
||||
});
|
||||
const resolveProviderRuntimePluginMock = vi.fn<(params: unknown) => unknown>(() =>
|
||||
hookCacheCleared ? { id: "openai", label: "OpenAI", auth: [] } : undefined,
|
||||
);
|
||||
const prepareProviderDynamicModelMock = vi.fn<(params: unknown) => Promise<void>>(async () => {});
|
||||
const runProviderDynamicModelMock = vi.fn<(params: unknown) => unknown>(() =>
|
||||
hookCacheCleared
|
||||
? {
|
||||
id: "gpt-5.4",
|
||||
name: "gpt-5.4",
|
||||
provider: "openai-codex",
|
||||
api: "openai-codex-responses",
|
||||
baseUrl: "https://chatgpt.com/backend-api",
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
contextWindow: 1_050_000,
|
||||
maxTokens: 128_000,
|
||||
}
|
||||
: undefined,
|
||||
);
|
||||
|
||||
vi.mock("../pi-model-discovery.js", () => ({
|
||||
discoverAuthStorage: discoverAuthStorageMock,
|
||||
discoverModels: discoverModelsMock,
|
||||
}));
|
||||
|
||||
vi.mock("../../plugins/provider-runtime.js", async (importOriginal) => {
|
||||
const actual = await importOriginal<typeof import("../../plugins/provider-runtime.js")>();
|
||||
return {
|
||||
...actual,
|
||||
clearProviderRuntimeHookCache: clearProviderRuntimeHookCacheMock,
|
||||
normalizeProviderResolvedModelWithPlugin: () => undefined,
|
||||
prepareProviderDynamicModel: (params: unknown) => prepareProviderDynamicModelMock(params),
|
||||
resolveProviderRuntimePlugin: (params: unknown) => resolveProviderRuntimePluginMock(params),
|
||||
runProviderDynamicModel: (params: unknown) => runProviderDynamicModelMock(params),
|
||||
};
|
||||
});
|
||||
|
||||
describe("resolveModelAsync startup retry", () => {
|
||||
beforeEach(() => {
|
||||
hookCacheCleared = false;
|
||||
clearProviderRuntimeHookCacheMock.mockClear();
|
||||
resolveProviderRuntimePluginMock.mockClear();
|
||||
prepareProviderDynamicModelMock.mockClear();
|
||||
runProviderDynamicModelMock.mockClear();
|
||||
discoverAuthStorageMock.mockClear();
|
||||
discoverModelsMock.mockClear();
|
||||
});
|
||||
|
||||
it("retries once after clearing the provider-runtime hook cache", async () => {
|
||||
const { resolveModelAsync } = await import("./model.js");
|
||||
|
||||
const result = await resolveModelAsync(
|
||||
"openai-codex",
|
||||
"gpt-5.4",
|
||||
"/tmp/agent",
|
||||
{},
|
||||
{
|
||||
retryTransientProviderRuntimeMiss: true,
|
||||
},
|
||||
);
|
||||
|
||||
expect(result.error).toBeUndefined();
|
||||
expect(result.model).toMatchObject({
|
||||
provider: "openai-codex",
|
||||
id: "gpt-5.4",
|
||||
api: "openai-codex-responses",
|
||||
});
|
||||
expect(clearProviderRuntimeHookCacheMock).toHaveBeenCalledTimes(1);
|
||||
expect(resolveProviderRuntimePluginMock).toHaveBeenCalledTimes(2);
|
||||
expect(runProviderDynamicModelMock).toHaveBeenCalledTimes(2);
|
||||
});
|
||||
|
||||
it("does not clear the hook cache during steady-state misses", async () => {
|
||||
const { resolveModelAsync } = await import("./model.js");
|
||||
|
||||
const result = await resolveModelAsync("openai-codex", "gpt-5.4", "/tmp/agent", {});
|
||||
|
||||
expect(result.model).toBeUndefined();
|
||||
expect(result.error).toBe("Unknown model: openai-codex/gpt-5.4");
|
||||
expect(clearProviderRuntimeHookCacheMock).not.toHaveBeenCalled();
|
||||
expect(resolveProviderRuntimePluginMock).toHaveBeenCalledTimes(1);
|
||||
expect(runProviderDynamicModelMock).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
});
|
||||
@@ -3,6 +3,7 @@ import type { AuthStorage, ModelRegistry } from "@mariozechner/pi-coding-agent";
|
||||
import type { OpenClawConfig } from "../../config/config.js";
|
||||
import type { ModelDefinitionConfig } from "../../config/types.js";
|
||||
import {
|
||||
clearProviderRuntimeHookCache,
|
||||
prepareProviderDynamicModel,
|
||||
resolveProviderRuntimePlugin,
|
||||
runProviderDynamicModel,
|
||||
@@ -349,6 +350,9 @@ export async function resolveModelAsync(
|
||||
modelId: string,
|
||||
agentDir?: string,
|
||||
cfg?: OpenClawConfig,
|
||||
options?: {
|
||||
retryTransientProviderRuntimeMiss?: boolean;
|
||||
},
|
||||
): Promise<{
|
||||
model?: Model<Api>;
|
||||
error?: string;
|
||||
@@ -372,7 +376,11 @@ export async function resolveModelAsync(
|
||||
modelRegistry,
|
||||
};
|
||||
}
|
||||
if (!explicitModel) {
|
||||
const providerConfig = resolveConfiguredProviderConfig(cfg, provider);
|
||||
const resolveDynamicAttempt = async (options?: { clearHookCache?: boolean }) => {
|
||||
if (options?.clearHookCache) {
|
||||
clearProviderRuntimeHookCache();
|
||||
}
|
||||
const providerPlugin = resolveProviderRuntimePlugin({
|
||||
provider,
|
||||
config: cfg,
|
||||
@@ -387,21 +395,26 @@ export async function resolveModelAsync(
|
||||
provider,
|
||||
modelId,
|
||||
modelRegistry,
|
||||
providerConfig: resolveConfiguredProviderConfig(cfg, provider),
|
||||
providerConfig,
|
||||
},
|
||||
});
|
||||
}
|
||||
return resolveModelWithRegistry({
|
||||
provider,
|
||||
modelId,
|
||||
modelRegistry,
|
||||
cfg,
|
||||
agentDir: resolvedAgentDir,
|
||||
});
|
||||
};
|
||||
let model =
|
||||
explicitModel?.kind === "resolved" ? explicitModel.model : await resolveDynamicAttempt();
|
||||
if (!model && !explicitModel && options?.retryTransientProviderRuntimeMiss) {
|
||||
// Startup can race the first provider-runtime snapshot load on a fresh
|
||||
// gateway boot. Retry once with a cleared hook cache before surfacing a
|
||||
// user-visible "Unknown model" that disappears on the next message.
|
||||
model = await resolveDynamicAttempt({ clearHookCache: true });
|
||||
}
|
||||
const model =
|
||||
explicitModel?.kind === "resolved"
|
||||
? explicitModel.model
|
||||
: resolveModelWithRegistry({
|
||||
provider,
|
||||
modelId,
|
||||
modelRegistry,
|
||||
cfg,
|
||||
agentDir: resolvedAgentDir,
|
||||
});
|
||||
if (model) {
|
||||
return { model, authStorage, modelRegistry };
|
||||
}
|
||||
|
||||
88
src/gateway/server-startup.test.ts
Normal file
88
src/gateway/server-startup.test.ts
Normal file
@@ -0,0 +1,88 @@
|
||||
import { beforeEach, describe, expect, it, vi } from "vitest";
|
||||
import type { OpenClawConfig } from "../config/config.js";
|
||||
|
||||
const ensureOpenClawModelsJsonMock = vi.fn<
|
||||
(config: unknown, agentDir: unknown) => Promise<{ agentDir: string; wrote: boolean }>
|
||||
>(async () => ({ agentDir: "/tmp/agent", wrote: false }));
|
||||
const resolveModelAsyncMock = vi.fn<
|
||||
(
|
||||
provider: unknown,
|
||||
modelId: unknown,
|
||||
agentDir: unknown,
|
||||
cfg: unknown,
|
||||
options?: unknown,
|
||||
) => Promise<{ model: { id: string; provider: string; api: string } }>
|
||||
>(async () => ({
|
||||
model: {
|
||||
id: "gpt-5.4",
|
||||
provider: "openai-codex",
|
||||
api: "openai-codex-responses",
|
||||
},
|
||||
}));
|
||||
|
||||
vi.mock("../agents/agent-paths.js", () => ({
|
||||
resolveOpenClawAgentDir: () => "/tmp/agent",
|
||||
}));
|
||||
|
||||
vi.mock("../agents/models-config.js", () => ({
|
||||
ensureOpenClawModelsJson: (config: unknown, agentDir: unknown) =>
|
||||
ensureOpenClawModelsJsonMock(config, agentDir),
|
||||
}));
|
||||
|
||||
vi.mock("../agents/pi-embedded-runner/model.js", () => ({
|
||||
resolveModelAsync: (
|
||||
provider: unknown,
|
||||
modelId: unknown,
|
||||
agentDir: unknown,
|
||||
cfg: unknown,
|
||||
options?: unknown,
|
||||
) => resolveModelAsyncMock(provider, modelId, agentDir, cfg, options),
|
||||
}));
|
||||
|
||||
describe("gateway startup primary model warmup", () => {
|
||||
beforeEach(() => {
|
||||
ensureOpenClawModelsJsonMock.mockClear();
|
||||
resolveModelAsyncMock.mockClear();
|
||||
});
|
||||
|
||||
it("prewarms an explicit configured primary model", async () => {
|
||||
const { __testing } = await import("./server-startup.js");
|
||||
const cfg = {
|
||||
agents: {
|
||||
defaults: {
|
||||
model: {
|
||||
primary: "openai-codex/gpt-5.4",
|
||||
},
|
||||
},
|
||||
},
|
||||
} as OpenClawConfig;
|
||||
|
||||
await __testing.prewarmConfiguredPrimaryModel({
|
||||
cfg,
|
||||
log: { warn: vi.fn() },
|
||||
});
|
||||
|
||||
expect(ensureOpenClawModelsJsonMock).toHaveBeenCalledWith(cfg, "/tmp/agent");
|
||||
expect(resolveModelAsyncMock).toHaveBeenCalledWith(
|
||||
"openai-codex",
|
||||
"gpt-5.4",
|
||||
"/tmp/agent",
|
||||
cfg,
|
||||
{
|
||||
retryTransientProviderRuntimeMiss: true,
|
||||
},
|
||||
);
|
||||
});
|
||||
|
||||
it("skips warmup when no explicit primary model is configured", async () => {
|
||||
const { __testing } = await import("./server-startup.js");
|
||||
|
||||
await __testing.prewarmConfiguredPrimaryModel({
|
||||
cfg: {} as OpenClawConfig,
|
||||
log: { warn: vi.fn() },
|
||||
});
|
||||
|
||||
expect(ensureOpenClawModelsJsonMock).not.toHaveBeenCalled();
|
||||
expect(resolveModelAsyncMock).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
@@ -1,5 +1,6 @@
|
||||
import { getAcpSessionManager } from "../acp/control-plane/manager.js";
|
||||
import { ACP_SESSION_IDENTITY_RENDERER_VERSION } from "../acp/runtime/session-identifiers.js";
|
||||
import { resolveOpenClawAgentDir } from "../agents/agent-paths.js";
|
||||
import { DEFAULT_MODEL, DEFAULT_PROVIDER } from "../agents/defaults.js";
|
||||
import { loadModelCatalog } from "../agents/model-catalog.js";
|
||||
import {
|
||||
@@ -7,10 +8,13 @@ import {
|
||||
resolveConfiguredModelRef,
|
||||
resolveHooksGmailModel,
|
||||
} from "../agents/model-selection.js";
|
||||
import { ensureOpenClawModelsJson } from "../agents/models-config.js";
|
||||
import { resolveModelAsync } from "../agents/pi-embedded-runner/model.js";
|
||||
import { resolveAgentSessionDirs } from "../agents/session-dirs.js";
|
||||
import { cleanStaleLockFiles } from "../agents/session-write-lock.js";
|
||||
import type { CliDeps } from "../cli/deps.js";
|
||||
import type { loadConfig } from "../config/config.js";
|
||||
import { resolveAgentModelPrimaryValue } from "../config/model-input.js";
|
||||
import { resolveStateDir } from "../config/paths.js";
|
||||
import { startGmailWatcherWithLogs } from "../hooks/gmail-watcher-lifecycle.js";
|
||||
import {
|
||||
@@ -31,6 +35,33 @@ import { startGatewayMemoryBackend } from "./server-startup-memory.js";
|
||||
|
||||
const SESSION_LOCK_STALE_MS = 30 * 60 * 1000;
|
||||
|
||||
async function prewarmConfiguredPrimaryModel(params: {
|
||||
cfg: ReturnType<typeof loadConfig>;
|
||||
log: { warn: (msg: string) => void };
|
||||
}): Promise<void> {
|
||||
const explicitPrimary = resolveAgentModelPrimaryValue(params.cfg.agents?.defaults?.model)?.trim();
|
||||
if (!explicitPrimary) {
|
||||
return;
|
||||
}
|
||||
const { provider, model } = resolveConfiguredModelRef({
|
||||
cfg: params.cfg,
|
||||
defaultProvider: DEFAULT_PROVIDER,
|
||||
defaultModel: DEFAULT_MODEL,
|
||||
});
|
||||
const agentDir = resolveOpenClawAgentDir();
|
||||
try {
|
||||
await ensureOpenClawModelsJson(params.cfg, agentDir);
|
||||
const resolved = await resolveModelAsync(provider, model, agentDir, params.cfg, {
|
||||
retryTransientProviderRuntimeMiss: true,
|
||||
});
|
||||
if (!resolved.model) {
|
||||
throw new Error(resolved.error ?? `Unknown model: ${provider}/${model}`);
|
||||
}
|
||||
} catch (err) {
|
||||
params.log.warn(`startup model warmup failed for ${provider}/${model}: ${String(err)}`);
|
||||
}
|
||||
}
|
||||
|
||||
export async function startGatewaySidecars(params: {
|
||||
cfg: ReturnType<typeof loadConfig>;
|
||||
pluginRegistry: ReturnType<typeof loadOpenClawPlugins>;
|
||||
@@ -129,6 +160,10 @@ export async function startGatewaySidecars(params: {
|
||||
isTruthyEnvValue(process.env.OPENCLAW_SKIP_PROVIDERS);
|
||||
if (!skipChannels) {
|
||||
try {
|
||||
await prewarmConfiguredPrimaryModel({
|
||||
cfg: params.cfg,
|
||||
log: params.log,
|
||||
});
|
||||
await params.startChannels();
|
||||
} catch (err) {
|
||||
params.logChannels.error(`channel startup failed: ${String(err)}`);
|
||||
@@ -189,3 +224,7 @@ export async function startGatewaySidecars(params: {
|
||||
|
||||
return { browserControl, pluginServices };
|
||||
}
|
||||
|
||||
export const __testing = {
|
||||
prewarmConfiguredPrimaryModel,
|
||||
};
|
||||
|
||||
@@ -89,7 +89,7 @@ function buildHookProviderCacheKey(params: {
|
||||
return `${roots.workspace ?? ""}::${roots.global}::${roots.stock ?? ""}::${JSON.stringify(params.onlyPluginIds ?? [])}`;
|
||||
}
|
||||
|
||||
export function resetProviderRuntimeHookCacheForTest(): void {
|
||||
export function clearProviderRuntimeHookCache(): void {
|
||||
cachedHookProvidersWithoutConfig = new WeakMap<
|
||||
NodeJS.ProcessEnv,
|
||||
Map<string, ProviderPlugin[]>
|
||||
@@ -100,6 +100,10 @@ export function resetProviderRuntimeHookCacheForTest(): void {
|
||||
>();
|
||||
}
|
||||
|
||||
export function resetProviderRuntimeHookCacheForTest(): void {
|
||||
clearProviderRuntimeHookCache();
|
||||
}
|
||||
|
||||
function resolveProviderPluginsForHooks(params: {
|
||||
config?: OpenClawConfig;
|
||||
workspaceDir?: string;
|
||||
|
||||
Reference in New Issue
Block a user