From 6f9a9241a690550e0879700f9d04b3cc94a04574 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sun, 3 May 2026 13:50:52 +0100 Subject: [PATCH] perf(gateway): defer early maintenance startup --- CHANGELOG.md | 1 + src/gateway/server-import-boundary.test.ts | 1 + src/gateway/server-plugin-bootstrap.ts | 6 + src/gateway/server-plugins.ts | 27 +++- src/gateway/server-startup-early.ts | 168 ++++++++++++--------- src/gateway/server-startup-plugins.ts | 6 + src/gateway/server.impl.ts | 17 ++- 7 files changed, 149 insertions(+), 77 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index af57a1af112..a6a5bffc9f7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ Docs: https://docs.openclaw.ai ### Changes +- Gateway/performance: lazy-load early runtime discovery and shutdown-hook helpers, defer maintenance timers until after readiness, and trim duplicate plugin auto-enable work during Gateway startup. - Gateway/performance: lazy-load the heavy cron runtime after the rest of Gateway startup, defer restart-sentinel refresh after readiness, and let the Gateway startup benchmark write per-run V8 CPU profiles with `--cpu-prof-dir`. - Gateway/performance: keep raw channel-config schema parsing from discovering bundled plugin runtime metadata, and add `pnpm gateway:watch --benchmark-no-force` for profiling startup without the default port cleanup. - Plugins/onboarding: let Manual setup install optional official plugins, including ClawHub-backed diagnostics with npm fallback, and expose the external Codex plugin as a selectable provider setup choice. Thanks @vincentkoc. diff --git a/src/gateway/server-import-boundary.test.ts b/src/gateway/server-import-boundary.test.ts index 1f005d69e55..228d287bac5 100644 --- a/src/gateway/server-import-boundary.test.ts +++ b/src/gateway/server-import-boundary.test.ts @@ -16,6 +16,7 @@ describe("gateway startup import boundaries", () => { expect(serverImpl).not.toContain('from "./server-cron.js"'); expect(serverImpl).toContain('from "./server-cron-lazy.js"'); expect(serverImpl).not.toContain('from "./server-methods.js"'); + expect(serverImpl).not.toContain('from "../plugins/hook-runner-global.js"'); expect(validation).not.toContain("legacy-secretref-env-marker"); expect(validation).not.toContain("commands/doctor"); }); diff --git a/src/gateway/server-plugin-bootstrap.ts b/src/gateway/server-plugin-bootstrap.ts index b16136df213..c942997d71e 100644 --- a/src/gateway/server-plugin-bootstrap.ts +++ b/src/gateway/server-plugin-bootstrap.ts @@ -24,6 +24,10 @@ type GatewayPluginBootstrapLog = { debug: (msg: string) => void; }; +type GatewayStartupTrace = { + detail: (name: string, metrics: ReadonlyArray) => void; +}; + type GatewayPluginBootstrapParams = { cfg: OpenClawConfig; activationSourceConfig?: OpenClawConfig; @@ -37,6 +41,7 @@ type GatewayPluginBootstrapParams = { preferSetupRuntimeForChannelPlugins?: boolean; suppressPluginInfoLogs?: boolean; logDiagnostics?: boolean; + startupTrace?: GatewayStartupTrace; beforePrimeRegistry?: (pluginRegistry: PluginRegistry) => void; }; @@ -102,6 +107,7 @@ export function prepareGatewayPluginLoad(params: GatewayPluginBootstrapParams) { pluginLookUpTable: params.pluginLookUpTable, preferSetupRuntimeForChannelPlugins: params.preferSetupRuntimeForChannelPlugins, suppressPluginInfoLogs: params.suppressPluginInfoLogs, + startupTrace: params.startupTrace, }); params.beforePrimeRegistry?.(loaded.pluginRegistry); primeConfiguredBindingRegistry({ cfg: resolvedConfig }); diff --git a/src/gateway/server-plugins.ts b/src/gateway/server-plugins.ts index bb2f1e471ac..c8cf807dbf4 100644 --- a/src/gateway/server-plugins.ts +++ b/src/gateway/server-plugins.ts @@ -1,4 +1,5 @@ import { randomUUID } from "node:crypto"; +import { performance } from "node:perf_hooks"; import { normalizeModelRef, parseModelRef } from "../agents/model-selection.js"; import { applyPluginAutoEnable } from "../config/plugin-auto-enable.js"; import type { OpenClawConfig } from "../config/types.openclaw.js"; @@ -530,9 +531,13 @@ export function loadGatewayPlugins(params: { pluginLookUpTable?: PluginLookUpTable; preferSetupRuntimeForChannelPlugins?: boolean; suppressPluginInfoLogs?: boolean; + startupTrace?: { + detail: (name: string, metrics: ReadonlyArray) => void; + }; }) { + const started = performance.now(); const activationAutoEnabled = - params.activationSourceConfig !== undefined + params.activationSourceConfig !== undefined && params.autoEnabledReasons === undefined ? applyPluginAutoEnable({ config: params.activationSourceConfig, env: process.env, @@ -541,6 +546,7 @@ export function loadGatewayPlugins(params: { : {}), }) : undefined; + const autoEnableMs = performance.now() - started; const autoEnabled = params.activationSourceConfig !== undefined ? { @@ -562,6 +568,7 @@ export function loadGatewayPlugins(params: { ? { manifestRegistry: params.pluginLookUpTable.manifestRegistry } : {}), }); + const resolvedConfigMs = performance.now() - started; const resolvedConfig = autoEnabled.config; const pluginIds = params.pluginIds ?? [ ...( @@ -574,15 +581,24 @@ export function loadGatewayPlugins(params: { }) ).startup.pluginIds, ]; + const pluginIdsMs = performance.now() - started; if (pluginIds.length === 0) { clearActivatedPluginRuntimeState(); const pluginRegistry = createEmptyPluginRegistry(); setActivePluginRegistry(pluginRegistry, undefined, "gateway-bindable", params.workspaceDir); + params.startupTrace?.detail("plugins.gateway-load", [ + ["autoEnableMs", autoEnableMs], + ["resolvedConfigMs", resolvedConfigMs], + ["pluginIdsMs", pluginIdsMs], + ["loadMs", 0], + ["pluginIds", "0"], + ]); return { pluginRegistry, gatewayMethods: [...params.baseMethods], }; } + const beforeLoad = performance.now(); const pluginRegistry = loadOpenClawPlugins({ config: resolvedConfig, activationSourceConfig: params.activationSourceConfig ?? params.cfg, @@ -607,7 +623,16 @@ export function loadGatewayPlugins(params: { ? { manifestRegistry: params.pluginLookUpTable.manifestRegistry } : {}), }); + const loadMs = performance.now() - beforeLoad; const pluginMethods = Object.keys(pluginRegistry.gatewayHandlers); const gatewayMethods = Array.from(new Set([...params.baseMethods, ...pluginMethods])); + params.startupTrace?.detail("plugins.gateway-load", [ + ["autoEnableMs", autoEnableMs], + ["resolvedConfigMs", resolvedConfigMs], + ["pluginIdsMs", pluginIdsMs], + ["loadMs", loadMs], + ["pluginIds", String(pluginIds.length)], + ["gatewayHandlers", String(pluginMethods.length)], + ]); return { pluginRegistry, gatewayMethods }; } diff --git a/src/gateway/server-startup-early.ts b/src/gateway/server-startup-early.ts index 224ea8839ed..5b4a8c33183 100644 --- a/src/gateway/server-startup-early.ts +++ b/src/gateway/server-startup-early.ts @@ -1,20 +1,25 @@ -import { registerSkillsChangeListener } from "../agents/skills/refresh.js"; import type { GatewayTailscaleMode } from "../config/types.gateway.js"; import type { OpenClawConfig } from "../config/types.openclaw.js"; import { resolveCronStorePath } from "../cron/store.js"; -import { getMachineDisplayName } from "../infra/machine-name.js"; -import { - primeRemoteSkillsCache, - refreshRemoteBinsForConnectedNodes, - setSkillsRemoteRegistry, -} from "../infra/skills-remote.js"; import type { PluginRegistry } from "../plugins/registry-types.js"; -import { - configureTaskRegistryMaintenance, - startTaskRegistryMaintenance, -} from "../tasks/task-registry.maintenance.js"; -import { startGatewayDiscovery } from "./server-discovery-runtime.js"; -import { startGatewayMaintenanceTimers } from "./server-maintenance.js"; + +type Awaitable = T | Promise; + +type GatewayStartupTrace = { + measure: (name: string, run: () => Awaitable) => Promise; +}; + +type StartGatewayMaintenanceTimers = + typeof import("./server-maintenance.js").startGatewayMaintenanceTimers; +type GatewayMaintenanceParams = Parameters[0]; + +async function measureStartup( + startupTrace: GatewayStartupTrace | undefined, + name: string, + run: () => Awaitable, +): Promise { + return startupTrace ? startupTrace.measure(name, run) : await run(); +} export async function startGatewayPluginDiscovery(params: { minimalTestGateway: boolean; @@ -27,25 +32,33 @@ export async function startGatewayPluginDiscovery(params: { warn: (msg: string) => void; }; pluginRegistry?: PluginRegistry; + startupTrace?: GatewayStartupTrace; }): Promise<(() => Promise) | null> { if (params.minimalTestGateway) { return null; } - const machineDisplayName = await getMachineDisplayName(); - const discovery = await startGatewayDiscovery({ - machineDisplayName, - port: params.port, - gatewayTls: params.gatewayTls.enabled - ? { enabled: true, fingerprintSha256: params.gatewayTls.fingerprintSha256 } - : undefined, - wideAreaDiscoveryEnabled: params.cfgAtStart.discovery?.wideArea?.enabled === true, - wideAreaDiscoveryDomain: params.cfgAtStart.discovery?.wideArea?.domain, - tailscaleMode: params.tailscaleMode, - mdnsMode: params.cfgAtStart.discovery?.mdns?.mode, - gatewayDiscoveryServices: params.pluginRegistry?.gatewayDiscoveryServices, - logDiscovery: params.logDiscovery, + const machineDisplayName = await measureStartup( + params.startupTrace, + "runtime.early.discovery.machine-name", + async () => (await import("../infra/machine-name.js")).getMachineDisplayName(), + ); + return await measureStartup(params.startupTrace, "runtime.early.discovery.start", async () => { + const { startGatewayDiscovery } = await import("./server-discovery-runtime.js"); + const discovery = await startGatewayDiscovery({ + machineDisplayName, + port: params.port, + gatewayTls: params.gatewayTls.enabled + ? { enabled: true, fingerprintSha256: params.gatewayTls.fingerprintSha256 } + : undefined, + wideAreaDiscoveryEnabled: params.cfgAtStart.discovery?.wideArea?.enabled === true, + wideAreaDiscoveryDomain: params.cfgAtStart.discovery?.wideArea?.domain, + tailscaleMode: params.tailscaleMode, + mdnsMode: params.cfgAtStart.discovery?.mdns?.mode, + gatewayDiscoveryServices: params.pluginRegistry?.gatewayDiscoveryServices, + logDiscovery: params.logDiscovery, + }); + return discovery.bonjourStop; }); - return discovery.bonjourStop; } export async function startGatewayEarlyRuntime(params: { @@ -62,67 +75,82 @@ export async function startGatewayEarlyRuntime(params: { info: (msg: string) => void; warn: (msg: string) => void; }; - nodeRegistry: Parameters[0]; + nodeRegistry: Parameters[0]; pluginRegistry?: PluginRegistry; - broadcast: Parameters[0]["broadcast"]; - nodeSendToAllSubscribed: Parameters< - typeof startGatewayMaintenanceTimers - >[0]["nodeSendToAllSubscribed"]; - getPresenceVersion: Parameters[0]["getPresenceVersion"]; - getHealthVersion: Parameters[0]["getHealthVersion"]; - refreshGatewayHealthSnapshot: Parameters< - typeof startGatewayMaintenanceTimers - >[0]["refreshGatewayHealthSnapshot"]; - logHealth: Parameters[0]["logHealth"]; - dedupe: Parameters[0]["dedupe"]; - chatAbortControllers: Parameters[0]["chatAbortControllers"]; - chatRunState: Parameters[0]["chatRunState"]; - chatRunBuffers: Parameters[0]["chatRunBuffers"]; - chatDeltaSentAt: Parameters[0]["chatDeltaSentAt"]; - chatDeltaLastBroadcastLen: Parameters< - typeof startGatewayMaintenanceTimers - >[0]["chatDeltaLastBroadcastLen"]; - removeChatRun: Parameters[0]["removeChatRun"]; - agentRunSeq: Parameters[0]["agentRunSeq"]; - nodeSendToSession: Parameters[0]["nodeSendToSession"]; + broadcast: GatewayMaintenanceParams["broadcast"]; + nodeSendToAllSubscribed: Parameters[0]["nodeSendToAllSubscribed"]; + getPresenceVersion: GatewayMaintenanceParams["getPresenceVersion"]; + getHealthVersion: GatewayMaintenanceParams["getHealthVersion"]; + refreshGatewayHealthSnapshot: GatewayMaintenanceParams["refreshGatewayHealthSnapshot"]; + logHealth: GatewayMaintenanceParams["logHealth"]; + dedupe: GatewayMaintenanceParams["dedupe"]; + chatAbortControllers: GatewayMaintenanceParams["chatAbortControllers"]; + chatRunState: GatewayMaintenanceParams["chatRunState"]; + chatRunBuffers: GatewayMaintenanceParams["chatRunBuffers"]; + chatDeltaSentAt: GatewayMaintenanceParams["chatDeltaSentAt"]; + chatDeltaLastBroadcastLen: GatewayMaintenanceParams["chatDeltaLastBroadcastLen"]; + removeChatRun: GatewayMaintenanceParams["removeChatRun"]; + agentRunSeq: GatewayMaintenanceParams["agentRunSeq"]; + nodeSendToSession: GatewayMaintenanceParams["nodeSendToSession"]; mediaCleanupTtlMs?: number; skillsRefreshDelayMs: number; getSkillsRefreshTimer: () => ReturnType | null; setSkillsRefreshTimer: (timer: ReturnType | null) => void; getRuntimeConfig: () => OpenClawConfig; + startupTrace?: GatewayStartupTrace; }) { - const bonjourStop = await startGatewayPluginDiscovery(params); + const bonjourStop = await measureStartup(params.startupTrace, "runtime.early.discovery", () => + startGatewayPluginDiscovery(params), + ); if (!params.minimalTestGateway) { + const [{ primeRemoteSkillsCache, setSkillsRemoteRegistry }, taskRegistryMaintenance] = + await measureStartup(params.startupTrace, "runtime.early.lazy-runtime-imports", () => + Promise.all([ + import("../infra/skills-remote.js"), + import("../tasks/task-registry.maintenance.js"), + ]), + ); setSkillsRemoteRegistry(params.nodeRegistry); void primeRemoteSkillsCache(); - configureTaskRegistryMaintenance({ + taskRegistryMaintenance.configureTaskRegistryMaintenance({ cronStorePath: resolveCronStorePath(params.cfgAtStart.cron?.store), cronRuntimeAuthoritative: true, }); - startTaskRegistryMaintenance(); + taskRegistryMaintenance.startTaskRegistryMaintenance(); } const skillsChangeUnsub = params.minimalTestGateway ? () => {} - : registerSkillsChangeListener((event) => { - if (event.reason === "remote-node") { - return; - } - const existingTimer = params.getSkillsRefreshTimer(); - if (existingTimer) { - clearTimeout(existingTimer); - } - const nextTimer = setTimeout(() => { - params.setSkillsRefreshTimer(null); - void refreshRemoteBinsForConnectedNodes(params.getRuntimeConfig()); - }, params.skillsRefreshDelayMs); - params.setSkillsRefreshTimer(nextTimer); + : await measureStartup(params.startupTrace, "runtime.early.skills-listener", async () => { + const [{ registerSkillsChangeListener }, { refreshRemoteBinsForConnectedNodes }] = + await Promise.all([ + import("../agents/skills/refresh.js"), + import("../infra/skills-remote.js"), + ]); + return registerSkillsChangeListener((event) => { + if (event.reason === "remote-node") { + return; + } + const existingTimer = params.getSkillsRefreshTimer(); + if (existingTimer) { + clearTimeout(existingTimer); + } + const nextTimer = setTimeout(() => { + params.setSkillsRefreshTimer(null); + void refreshRemoteBinsForConnectedNodes(params.getRuntimeConfig()); + }, params.skillsRefreshDelayMs); + params.setSkillsRefreshTimer(nextTimer); + }); }); - const maintenance = params.minimalTestGateway - ? null - : startGatewayMaintenanceTimers({ + const startMaintenance = async () => { + if (params.minimalTestGateway) { + return null; + } + return await measureStartup(params.startupTrace, "post-ready.maintenance", async () => { + const { startGatewayMaintenanceTimers } = await import("./server-maintenance.js"); + return startGatewayMaintenanceTimers({ broadcast: params.broadcast, nodeSendToAllSubscribed: params.nodeSendToAllSubscribed, getPresenceVersion: params.getPresenceVersion, @@ -142,10 +170,12 @@ export async function startGatewayEarlyRuntime(params: { ? { mediaCleanupTtlMs: params.mediaCleanupTtlMs } : {}), }); + }); + }; return { bonjourStop, skillsChangeUnsub, - maintenance, + startMaintenance, }; } diff --git a/src/gateway/server-startup-plugins.ts b/src/gateway/server-startup-plugins.ts index 26f6f8c99ea..1b7cc8993b1 100644 --- a/src/gateway/server-startup-plugins.ts +++ b/src/gateway/server-startup-plugins.ts @@ -19,6 +19,10 @@ type GatewayPluginBootstrapLog = { debug: (message: string) => void; }; +type GatewayStartupTrace = { + detail: (name: string, metrics: ReadonlyArray) => void; +}; + export function resolveGatewayStartupMaintenanceConfig(params: { cfgAtStart: OpenClawConfig; startupRuntimeConfig: OpenClawConfig; @@ -151,6 +155,7 @@ export async function loadGatewayStartupPluginRuntime(params: { pluginLookUpTable?: ReturnType; preferSetupRuntimeForChannelPlugins?: boolean; suppressPluginInfoLogs?: boolean; + startupTrace?: GatewayStartupTrace; }) { return loadGatewayStartupPlugins({ cfg: params.cfg, @@ -163,5 +168,6 @@ export async function loadGatewayStartupPluginRuntime(params: { pluginLookUpTable: params.pluginLookUpTable, preferSetupRuntimeForChannelPlugins: params.preferSetupRuntimeForChannelPlugins, suppressPluginInfoLogs: params.suppressPluginInfoLogs, + startupTrace: params.startupTrace, }); } diff --git a/src/gateway/server.impl.ts b/src/gateway/server.impl.ts index af00deb2cb1..b9f1ee1b49c 100644 --- a/src/gateway/server.impl.ts +++ b/src/gateway/server.impl.ts @@ -42,7 +42,6 @@ import { clearCurrentPluginMetadataSnapshot, setCurrentPluginMetadataSnapshot, } from "../plugins/current-plugin-metadata-snapshot.js"; -import { runGlobalGatewayStopSafely } from "../plugins/hook-runner-global.js"; import type { PluginHookGatewayCronService } from "../plugins/hook-types.js"; import { pinActivePluginChannelRegistry, @@ -959,16 +958,11 @@ export async function startGatewayServer( runtimeState.skillsRefreshTimer = timer; }, getRuntimeConfig, + startupTrace, }), ); runtimeState.bonjourStop = earlyRuntime.bonjourStop; runtimeState.skillsChangeUnsub = earlyRuntime.skillsChangeUnsub; - if (earlyRuntime.maintenance) { - runtimeState.tickInterval = earlyRuntime.maintenance.tickInterval; - runtimeState.healthInterval = earlyRuntime.maintenance.healthInterval; - runtimeState.dedupeCleanup = earlyRuntime.maintenance.dedupeCleanup; - runtimeState.mediaCleanup = earlyRuntime.maintenance.mediaCleanup; - } Object.assign( runtimeState, @@ -1355,6 +1349,7 @@ export async function startGatewayServer( baseMethods, startupPluginIds, pluginLookUpTable, + startupTrace, }), onStartupPluginsLoading: () => { startupPendingReason = "startup-sidecars"; @@ -1426,6 +1421,13 @@ export async function startGatewayServer( log.warn(`gateway: failed to promote config last-known-good backup: ${String(err)}`); }); if (!minimalTestGateway) { + const maintenance = await earlyRuntime.startMaintenance(); + if (maintenance) { + runtimeState.tickInterval = maintenance.tickInterval; + runtimeState.healthInterval = maintenance.healthInterval; + runtimeState.dedupeCleanup = maintenance.dedupeCleanup; + runtimeState.mediaCleanup = maintenance.mediaCleanup; + } startGatewayCronWithLogging({ cron: runtimeState.cronState.cron, logCron, @@ -1442,6 +1444,7 @@ export async function startGatewayServer( close: async (opts) => { try { // Run gateway_stop plugin hook before shutdown + const { runGlobalGatewayStopSafely } = await import("../plugins/hook-runner-global.js"); await runGlobalGatewayStopSafely({ event: { reason: opts?.reason ?? "gateway stopping" }, ctx: { port },