perf(gateway): defer early maintenance startup

This commit is contained in:
Peter Steinberger
2026-05-03 13:50:52 +01:00
parent 6a653888fd
commit 6f9a9241a6
7 changed files with 149 additions and 77 deletions

View File

@@ -10,6 +10,7 @@ Docs: https://docs.openclaw.ai
### Changes
- Gateway/performance: lazy-load early runtime discovery and shutdown-hook helpers, defer maintenance timers until after readiness, and trim duplicate plugin auto-enable work during Gateway startup.
- Gateway/performance: lazy-load the heavy cron runtime after the rest of Gateway startup, defer restart-sentinel refresh after readiness, and let the Gateway startup benchmark write per-run V8 CPU profiles with `--cpu-prof-dir`.
- Gateway/performance: keep raw channel-config schema parsing from discovering bundled plugin runtime metadata, and add `pnpm gateway:watch --benchmark-no-force` for profiling startup without the default port cleanup.
- Plugins/onboarding: let Manual setup install optional official plugins, including ClawHub-backed diagnostics with npm fallback, and expose the external Codex plugin as a selectable provider setup choice. Thanks @vincentkoc.

View File

@@ -16,6 +16,7 @@ describe("gateway startup import boundaries", () => {
expect(serverImpl).not.toContain('from "./server-cron.js"');
expect(serverImpl).toContain('from "./server-cron-lazy.js"');
expect(serverImpl).not.toContain('from "./server-methods.js"');
expect(serverImpl).not.toContain('from "../plugins/hook-runner-global.js"');
expect(validation).not.toContain("legacy-secretref-env-marker");
expect(validation).not.toContain("commands/doctor");
});

View File

@@ -24,6 +24,10 @@ type GatewayPluginBootstrapLog = {
debug: (msg: string) => void;
};
type GatewayStartupTrace = {
detail: (name: string, metrics: ReadonlyArray<readonly [string, number | string]>) => void;
};
type GatewayPluginBootstrapParams = {
cfg: OpenClawConfig;
activationSourceConfig?: OpenClawConfig;
@@ -37,6 +41,7 @@ type GatewayPluginBootstrapParams = {
preferSetupRuntimeForChannelPlugins?: boolean;
suppressPluginInfoLogs?: boolean;
logDiagnostics?: boolean;
startupTrace?: GatewayStartupTrace;
beforePrimeRegistry?: (pluginRegistry: PluginRegistry) => void;
};
@@ -102,6 +107,7 @@ export function prepareGatewayPluginLoad(params: GatewayPluginBootstrapParams) {
pluginLookUpTable: params.pluginLookUpTable,
preferSetupRuntimeForChannelPlugins: params.preferSetupRuntimeForChannelPlugins,
suppressPluginInfoLogs: params.suppressPluginInfoLogs,
startupTrace: params.startupTrace,
});
params.beforePrimeRegistry?.(loaded.pluginRegistry);
primeConfiguredBindingRegistry({ cfg: resolvedConfig });

View File

@@ -1,4 +1,5 @@
import { randomUUID } from "node:crypto";
import { performance } from "node:perf_hooks";
import { normalizeModelRef, parseModelRef } from "../agents/model-selection.js";
import { applyPluginAutoEnable } from "../config/plugin-auto-enable.js";
import type { OpenClawConfig } from "../config/types.openclaw.js";
@@ -530,9 +531,13 @@ export function loadGatewayPlugins(params: {
pluginLookUpTable?: PluginLookUpTable;
preferSetupRuntimeForChannelPlugins?: boolean;
suppressPluginInfoLogs?: boolean;
startupTrace?: {
detail: (name: string, metrics: ReadonlyArray<readonly [string, number | string]>) => void;
};
}) {
const started = performance.now();
const activationAutoEnabled =
params.activationSourceConfig !== undefined
params.activationSourceConfig !== undefined && params.autoEnabledReasons === undefined
? applyPluginAutoEnable({
config: params.activationSourceConfig,
env: process.env,
@@ -541,6 +546,7 @@ export function loadGatewayPlugins(params: {
: {}),
})
: undefined;
const autoEnableMs = performance.now() - started;
const autoEnabled =
params.activationSourceConfig !== undefined
? {
@@ -562,6 +568,7 @@ export function loadGatewayPlugins(params: {
? { manifestRegistry: params.pluginLookUpTable.manifestRegistry }
: {}),
});
const resolvedConfigMs = performance.now() - started;
const resolvedConfig = autoEnabled.config;
const pluginIds = params.pluginIds ?? [
...(
@@ -574,15 +581,24 @@ export function loadGatewayPlugins(params: {
})
).startup.pluginIds,
];
const pluginIdsMs = performance.now() - started;
if (pluginIds.length === 0) {
clearActivatedPluginRuntimeState();
const pluginRegistry = createEmptyPluginRegistry();
setActivePluginRegistry(pluginRegistry, undefined, "gateway-bindable", params.workspaceDir);
params.startupTrace?.detail("plugins.gateway-load", [
["autoEnableMs", autoEnableMs],
["resolvedConfigMs", resolvedConfigMs],
["pluginIdsMs", pluginIdsMs],
["loadMs", 0],
["pluginIds", "0"],
]);
return {
pluginRegistry,
gatewayMethods: [...params.baseMethods],
};
}
const beforeLoad = performance.now();
const pluginRegistry = loadOpenClawPlugins({
config: resolvedConfig,
activationSourceConfig: params.activationSourceConfig ?? params.cfg,
@@ -607,7 +623,16 @@ export function loadGatewayPlugins(params: {
? { manifestRegistry: params.pluginLookUpTable.manifestRegistry }
: {}),
});
const loadMs = performance.now() - beforeLoad;
const pluginMethods = Object.keys(pluginRegistry.gatewayHandlers);
const gatewayMethods = Array.from(new Set([...params.baseMethods, ...pluginMethods]));
params.startupTrace?.detail("plugins.gateway-load", [
["autoEnableMs", autoEnableMs],
["resolvedConfigMs", resolvedConfigMs],
["pluginIdsMs", pluginIdsMs],
["loadMs", loadMs],
["pluginIds", String(pluginIds.length)],
["gatewayHandlers", String(pluginMethods.length)],
]);
return { pluginRegistry, gatewayMethods };
}

View File

@@ -1,20 +1,25 @@
import { registerSkillsChangeListener } from "../agents/skills/refresh.js";
import type { GatewayTailscaleMode } from "../config/types.gateway.js";
import type { OpenClawConfig } from "../config/types.openclaw.js";
import { resolveCronStorePath } from "../cron/store.js";
import { getMachineDisplayName } from "../infra/machine-name.js";
import {
primeRemoteSkillsCache,
refreshRemoteBinsForConnectedNodes,
setSkillsRemoteRegistry,
} from "../infra/skills-remote.js";
import type { PluginRegistry } from "../plugins/registry-types.js";
import {
configureTaskRegistryMaintenance,
startTaskRegistryMaintenance,
} from "../tasks/task-registry.maintenance.js";
import { startGatewayDiscovery } from "./server-discovery-runtime.js";
import { startGatewayMaintenanceTimers } from "./server-maintenance.js";
type Awaitable<T> = T | Promise<T>;
type GatewayStartupTrace = {
measure: <T>(name: string, run: () => Awaitable<T>) => Promise<T>;
};
type StartGatewayMaintenanceTimers =
typeof import("./server-maintenance.js").startGatewayMaintenanceTimers;
type GatewayMaintenanceParams = Parameters<StartGatewayMaintenanceTimers>[0];
async function measureStartup<T>(
startupTrace: GatewayStartupTrace | undefined,
name: string,
run: () => Awaitable<T>,
): Promise<T> {
return startupTrace ? startupTrace.measure(name, run) : await run();
}
export async function startGatewayPluginDiscovery(params: {
minimalTestGateway: boolean;
@@ -27,25 +32,33 @@ export async function startGatewayPluginDiscovery(params: {
warn: (msg: string) => void;
};
pluginRegistry?: PluginRegistry;
startupTrace?: GatewayStartupTrace;
}): Promise<(() => Promise<void>) | null> {
if (params.minimalTestGateway) {
return null;
}
const machineDisplayName = await getMachineDisplayName();
const discovery = await startGatewayDiscovery({
machineDisplayName,
port: params.port,
gatewayTls: params.gatewayTls.enabled
? { enabled: true, fingerprintSha256: params.gatewayTls.fingerprintSha256 }
: undefined,
wideAreaDiscoveryEnabled: params.cfgAtStart.discovery?.wideArea?.enabled === true,
wideAreaDiscoveryDomain: params.cfgAtStart.discovery?.wideArea?.domain,
tailscaleMode: params.tailscaleMode,
mdnsMode: params.cfgAtStart.discovery?.mdns?.mode,
gatewayDiscoveryServices: params.pluginRegistry?.gatewayDiscoveryServices,
logDiscovery: params.logDiscovery,
const machineDisplayName = await measureStartup(
params.startupTrace,
"runtime.early.discovery.machine-name",
async () => (await import("../infra/machine-name.js")).getMachineDisplayName(),
);
return await measureStartup(params.startupTrace, "runtime.early.discovery.start", async () => {
const { startGatewayDiscovery } = await import("./server-discovery-runtime.js");
const discovery = await startGatewayDiscovery({
machineDisplayName,
port: params.port,
gatewayTls: params.gatewayTls.enabled
? { enabled: true, fingerprintSha256: params.gatewayTls.fingerprintSha256 }
: undefined,
wideAreaDiscoveryEnabled: params.cfgAtStart.discovery?.wideArea?.enabled === true,
wideAreaDiscoveryDomain: params.cfgAtStart.discovery?.wideArea?.domain,
tailscaleMode: params.tailscaleMode,
mdnsMode: params.cfgAtStart.discovery?.mdns?.mode,
gatewayDiscoveryServices: params.pluginRegistry?.gatewayDiscoveryServices,
logDiscovery: params.logDiscovery,
});
return discovery.bonjourStop;
});
return discovery.bonjourStop;
}
export async function startGatewayEarlyRuntime(params: {
@@ -62,67 +75,82 @@ export async function startGatewayEarlyRuntime(params: {
info: (msg: string) => void;
warn: (msg: string) => void;
};
nodeRegistry: Parameters<typeof setSkillsRemoteRegistry>[0];
nodeRegistry: Parameters<typeof import("../infra/skills-remote.js").setSkillsRemoteRegistry>[0];
pluginRegistry?: PluginRegistry;
broadcast: Parameters<typeof startGatewayMaintenanceTimers>[0]["broadcast"];
nodeSendToAllSubscribed: Parameters<
typeof startGatewayMaintenanceTimers
>[0]["nodeSendToAllSubscribed"];
getPresenceVersion: Parameters<typeof startGatewayMaintenanceTimers>[0]["getPresenceVersion"];
getHealthVersion: Parameters<typeof startGatewayMaintenanceTimers>[0]["getHealthVersion"];
refreshGatewayHealthSnapshot: Parameters<
typeof startGatewayMaintenanceTimers
>[0]["refreshGatewayHealthSnapshot"];
logHealth: Parameters<typeof startGatewayMaintenanceTimers>[0]["logHealth"];
dedupe: Parameters<typeof startGatewayMaintenanceTimers>[0]["dedupe"];
chatAbortControllers: Parameters<typeof startGatewayMaintenanceTimers>[0]["chatAbortControllers"];
chatRunState: Parameters<typeof startGatewayMaintenanceTimers>[0]["chatRunState"];
chatRunBuffers: Parameters<typeof startGatewayMaintenanceTimers>[0]["chatRunBuffers"];
chatDeltaSentAt: Parameters<typeof startGatewayMaintenanceTimers>[0]["chatDeltaSentAt"];
chatDeltaLastBroadcastLen: Parameters<
typeof startGatewayMaintenanceTimers
>[0]["chatDeltaLastBroadcastLen"];
removeChatRun: Parameters<typeof startGatewayMaintenanceTimers>[0]["removeChatRun"];
agentRunSeq: Parameters<typeof startGatewayMaintenanceTimers>[0]["agentRunSeq"];
nodeSendToSession: Parameters<typeof startGatewayMaintenanceTimers>[0]["nodeSendToSession"];
broadcast: GatewayMaintenanceParams["broadcast"];
nodeSendToAllSubscribed: Parameters<StartGatewayMaintenanceTimers>[0]["nodeSendToAllSubscribed"];
getPresenceVersion: GatewayMaintenanceParams["getPresenceVersion"];
getHealthVersion: GatewayMaintenanceParams["getHealthVersion"];
refreshGatewayHealthSnapshot: GatewayMaintenanceParams["refreshGatewayHealthSnapshot"];
logHealth: GatewayMaintenanceParams["logHealth"];
dedupe: GatewayMaintenanceParams["dedupe"];
chatAbortControllers: GatewayMaintenanceParams["chatAbortControllers"];
chatRunState: GatewayMaintenanceParams["chatRunState"];
chatRunBuffers: GatewayMaintenanceParams["chatRunBuffers"];
chatDeltaSentAt: GatewayMaintenanceParams["chatDeltaSentAt"];
chatDeltaLastBroadcastLen: GatewayMaintenanceParams["chatDeltaLastBroadcastLen"];
removeChatRun: GatewayMaintenanceParams["removeChatRun"];
agentRunSeq: GatewayMaintenanceParams["agentRunSeq"];
nodeSendToSession: GatewayMaintenanceParams["nodeSendToSession"];
mediaCleanupTtlMs?: number;
skillsRefreshDelayMs: number;
getSkillsRefreshTimer: () => ReturnType<typeof setTimeout> | null;
setSkillsRefreshTimer: (timer: ReturnType<typeof setTimeout> | null) => void;
getRuntimeConfig: () => OpenClawConfig;
startupTrace?: GatewayStartupTrace;
}) {
const bonjourStop = await startGatewayPluginDiscovery(params);
const bonjourStop = await measureStartup(params.startupTrace, "runtime.early.discovery", () =>
startGatewayPluginDiscovery(params),
);
if (!params.minimalTestGateway) {
const [{ primeRemoteSkillsCache, setSkillsRemoteRegistry }, taskRegistryMaintenance] =
await measureStartup(params.startupTrace, "runtime.early.lazy-runtime-imports", () =>
Promise.all([
import("../infra/skills-remote.js"),
import("../tasks/task-registry.maintenance.js"),
]),
);
setSkillsRemoteRegistry(params.nodeRegistry);
void primeRemoteSkillsCache();
configureTaskRegistryMaintenance({
taskRegistryMaintenance.configureTaskRegistryMaintenance({
cronStorePath: resolveCronStorePath(params.cfgAtStart.cron?.store),
cronRuntimeAuthoritative: true,
});
startTaskRegistryMaintenance();
taskRegistryMaintenance.startTaskRegistryMaintenance();
}
const skillsChangeUnsub = params.minimalTestGateway
? () => {}
: registerSkillsChangeListener((event) => {
if (event.reason === "remote-node") {
return;
}
const existingTimer = params.getSkillsRefreshTimer();
if (existingTimer) {
clearTimeout(existingTimer);
}
const nextTimer = setTimeout(() => {
params.setSkillsRefreshTimer(null);
void refreshRemoteBinsForConnectedNodes(params.getRuntimeConfig());
}, params.skillsRefreshDelayMs);
params.setSkillsRefreshTimer(nextTimer);
: await measureStartup(params.startupTrace, "runtime.early.skills-listener", async () => {
const [{ registerSkillsChangeListener }, { refreshRemoteBinsForConnectedNodes }] =
await Promise.all([
import("../agents/skills/refresh.js"),
import("../infra/skills-remote.js"),
]);
return registerSkillsChangeListener((event) => {
if (event.reason === "remote-node") {
return;
}
const existingTimer = params.getSkillsRefreshTimer();
if (existingTimer) {
clearTimeout(existingTimer);
}
const nextTimer = setTimeout(() => {
params.setSkillsRefreshTimer(null);
void refreshRemoteBinsForConnectedNodes(params.getRuntimeConfig());
}, params.skillsRefreshDelayMs);
params.setSkillsRefreshTimer(nextTimer);
});
});
const maintenance = params.minimalTestGateway
? null
: startGatewayMaintenanceTimers({
const startMaintenance = async () => {
if (params.minimalTestGateway) {
return null;
}
return await measureStartup(params.startupTrace, "post-ready.maintenance", async () => {
const { startGatewayMaintenanceTimers } = await import("./server-maintenance.js");
return startGatewayMaintenanceTimers({
broadcast: params.broadcast,
nodeSendToAllSubscribed: params.nodeSendToAllSubscribed,
getPresenceVersion: params.getPresenceVersion,
@@ -142,10 +170,12 @@ export async function startGatewayEarlyRuntime(params: {
? { mediaCleanupTtlMs: params.mediaCleanupTtlMs }
: {}),
});
});
};
return {
bonjourStop,
skillsChangeUnsub,
maintenance,
startMaintenance,
};
}

View File

@@ -19,6 +19,10 @@ type GatewayPluginBootstrapLog = {
debug: (message: string) => void;
};
type GatewayStartupTrace = {
detail: (name: string, metrics: ReadonlyArray<readonly [string, number | string]>) => void;
};
export function resolveGatewayStartupMaintenanceConfig(params: {
cfgAtStart: OpenClawConfig;
startupRuntimeConfig: OpenClawConfig;
@@ -151,6 +155,7 @@ export async function loadGatewayStartupPluginRuntime(params: {
pluginLookUpTable?: ReturnType<typeof loadPluginLookUpTable>;
preferSetupRuntimeForChannelPlugins?: boolean;
suppressPluginInfoLogs?: boolean;
startupTrace?: GatewayStartupTrace;
}) {
return loadGatewayStartupPlugins({
cfg: params.cfg,
@@ -163,5 +168,6 @@ export async function loadGatewayStartupPluginRuntime(params: {
pluginLookUpTable: params.pluginLookUpTable,
preferSetupRuntimeForChannelPlugins: params.preferSetupRuntimeForChannelPlugins,
suppressPluginInfoLogs: params.suppressPluginInfoLogs,
startupTrace: params.startupTrace,
});
}

View File

@@ -42,7 +42,6 @@ import {
clearCurrentPluginMetadataSnapshot,
setCurrentPluginMetadataSnapshot,
} from "../plugins/current-plugin-metadata-snapshot.js";
import { runGlobalGatewayStopSafely } from "../plugins/hook-runner-global.js";
import type { PluginHookGatewayCronService } from "../plugins/hook-types.js";
import {
pinActivePluginChannelRegistry,
@@ -959,16 +958,11 @@ export async function startGatewayServer(
runtimeState.skillsRefreshTimer = timer;
},
getRuntimeConfig,
startupTrace,
}),
);
runtimeState.bonjourStop = earlyRuntime.bonjourStop;
runtimeState.skillsChangeUnsub = earlyRuntime.skillsChangeUnsub;
if (earlyRuntime.maintenance) {
runtimeState.tickInterval = earlyRuntime.maintenance.tickInterval;
runtimeState.healthInterval = earlyRuntime.maintenance.healthInterval;
runtimeState.dedupeCleanup = earlyRuntime.maintenance.dedupeCleanup;
runtimeState.mediaCleanup = earlyRuntime.maintenance.mediaCleanup;
}
Object.assign(
runtimeState,
@@ -1355,6 +1349,7 @@ export async function startGatewayServer(
baseMethods,
startupPluginIds,
pluginLookUpTable,
startupTrace,
}),
onStartupPluginsLoading: () => {
startupPendingReason = "startup-sidecars";
@@ -1426,6 +1421,13 @@ export async function startGatewayServer(
log.warn(`gateway: failed to promote config last-known-good backup: ${String(err)}`);
});
if (!minimalTestGateway) {
const maintenance = await earlyRuntime.startMaintenance();
if (maintenance) {
runtimeState.tickInterval = maintenance.tickInterval;
runtimeState.healthInterval = maintenance.healthInterval;
runtimeState.dedupeCleanup = maintenance.dedupeCleanup;
runtimeState.mediaCleanup = maintenance.mediaCleanup;
}
startGatewayCronWithLogging({
cron: runtimeState.cronState.cron,
logCron,
@@ -1442,6 +1444,7 @@ export async function startGatewayServer(
close: async (opts) => {
try {
// Run gateway_stop plugin hook before shutdown
const { runGlobalGatewayStopSafely } = await import("../plugins/hook-runner-global.js");
await runGlobalGatewayStopSafely({
event: { reason: opts?.reason ?? "gateway stopping" },
ctx: { port },