From bd79b25bb748c70b90d4df05ae6743588222273e Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sun, 3 May 2026 14:47:06 +0100 Subject: [PATCH] perf(gateway): defer task registry startup imports --- scripts/bench-gateway-startup.ts | 26 ++++++++++++++++--- src/gateway/server-close.ts | 2 +- src/gateway/server-import-boundary.test.ts | 3 +++ src/gateway/server-startup-early.ts | 3 +++ src/gateway/server.impl.ts | 29 +++++++++++++++++----- 5 files changed, 52 insertions(+), 11 deletions(-) diff --git a/scripts/bench-gateway-startup.ts b/scripts/bench-gateway-startup.ts index e7867bfc1d9..84d7445fed5 100644 --- a/scripts/bench-gateway-startup.ts +++ b/scripts/bench-gateway-startup.ts @@ -369,6 +369,13 @@ function formatRatioStats(stats: SummaryStats | null): string { return `p50=${formatRatio(stats.p50)} avg=${formatRatio(stats.avg)} min=${formatRatio(stats.min)} max=${formatRatio(stats.max)}`; } +function getStartupTraceStat( + startupTrace: Record, + key: string, +): SummaryStats | null { + return startupTrace[key] ?? null; +} + async function getFreePort(): Promise { return new Promise((resolve, reject) => { const server = createServer(); @@ -571,7 +578,10 @@ function parseStartupTraceMetrics(raw: string): Array<{ key: string; value: numb } const key = metricMatch[1]; const value = Number(metricMatch[2]); - if (!Number.isFinite(value) || (key !== "eventLoopMax" && !key.endsWith("Ms"))) { + if ( + !Number.isFinite(value) || + (key !== "eventLoopMax" && !key.endsWith("Ms") && !key.endsWith("Mb")) + ) { continue; } metrics.push({ key, value }); @@ -806,12 +816,14 @@ async function runCase(options: { }); if (index >= options.warmup) { samples.push(sample); + const heapUsedMb = sample.startupTrace["memory.ready.heapUsedMb"] ?? null; console.log( - `[gateway-startup-bench] ${options.benchCase.id} run ${samples.length}/${options.runs}: healthz=${formatMs(sample.healthz.ms)} readyz=${formatMs(sample.readyz.ms)} readyLog=${formatMs(sample.readyLogMs)} cpu=${formatMs(sample.cpuMs)} cpuCore=${formatRatio(sample.cpuCoreRatio)} rss=${formatMb(sample.maxRssMb)}`, + `[gateway-startup-bench] ${options.benchCase.id} run ${samples.length}/${options.runs}: healthz=${formatMs(sample.healthz.ms)} readyz=${formatMs(sample.readyz.ms)} readyLog=${formatMs(sample.readyLogMs)} cpu=${formatMs(sample.cpuMs)} cpuCore=${formatRatio(sample.cpuCoreRatio)} rss=${formatMb(sample.maxRssMb)} heap=${formatMb(heapUsedMb)}`, ); } else { + const heapUsedMb = sample.startupTrace["memory.ready.heapUsedMb"] ?? null; console.log( - `[gateway-startup-bench] ${options.benchCase.id} warmup ${index + 1}/${options.warmup}: healthz=${formatMs(sample.healthz.ms)} readyz=${formatMs(sample.readyz.ms)} cpu=${formatMs(sample.cpuMs)} cpuCore=${formatRatio(sample.cpuCoreRatio)} rss=${formatMb(sample.maxRssMb)}`, + `[gateway-startup-bench] ${options.benchCase.id} warmup ${index + 1}/${options.warmup}: healthz=${formatMs(sample.healthz.ms)} readyz=${formatMs(sample.readyz.ms)} cpu=${formatMs(sample.cpuMs)} cpuCore=${formatRatio(sample.cpuCoreRatio)} rss=${formatMb(sample.maxRssMb)} heap=${formatMb(heapUsedMb)}`, ); } } @@ -827,8 +839,14 @@ function printResult(result: CaseResult): void { console.log(` ready log: ${formatStats(result.summary.readyLogMs)}`); console.log(` /readyz: ${formatStats(result.summary.readyzMs)}`); console.log(` max RSS: ${formatMemoryStats(result.summary.maxRssMb)}`); + console.log( + ` ready memory: rss=${formatMemoryStats(getStartupTraceStat(result.summary.startupTrace, "memory.ready.rssMb"))} heap=${formatMemoryStats(getStartupTraceStat(result.summary.startupTrace, "memory.ready.heapUsedMb"))} external=${formatMemoryStats(getStartupTraceStat(result.summary.startupTrace, "memory.ready.externalMb"))}`, + ); + console.log( + ` post-ready memory: rss=${formatMemoryStats(getStartupTraceStat(result.summary.startupTrace, "memory.post-ready.rssMb"))} heap=${formatMemoryStats(getStartupTraceStat(result.summary.startupTrace, "memory.post-ready.heapUsedMb"))} external=${formatMemoryStats(getStartupTraceStat(result.summary.startupTrace, "memory.post-ready.externalMb"))}`, + ); const trace = Object.entries(result.summary.startupTrace) - .filter(([name]) => !name.endsWith(".total")) + .filter(([name]) => !name.endsWith(".total") && !name.startsWith("memory.")) .toSorted((a, b) => (b[1].avg ?? 0) - (a[1].avg ?? 0)) .slice(0, 8); if (trace.length > 0) { diff --git a/src/gateway/server-close.ts b/src/gateway/server-close.ts index 1a35ec499ce..07e08fd666f 100644 --- a/src/gateway/server-close.ts +++ b/src/gateway/server-close.ts @@ -182,7 +182,7 @@ export function createGatewayCloseHandler(params: { cron: { stop: () => void }; heartbeatRunner: HeartbeatRunner; updateCheckStop?: (() => void) | null; - stopTaskRegistryMaintenance?: (() => void) | null; + stopTaskRegistryMaintenance?: (() => Promise | void) | null; nodePresenceTimers: Map>; broadcast: (event: string, payload: unknown, opts?: { dropIfSlow?: boolean }) => void; tickInterval: ReturnType; diff --git a/src/gateway/server-import-boundary.test.ts b/src/gateway/server-import-boundary.test.ts index 259fcaecf1f..5761aa2026d 100644 --- a/src/gateway/server-import-boundary.test.ts +++ b/src/gateway/server-import-boundary.test.ts @@ -27,6 +27,9 @@ describe("gateway startup import boundaries", () => { 'createCanvasHostHandler } from "../canvas-host/server.js"', ); expect(serverImpl).not.toContain('from "../plugins/hook-runner-global.js"'); + expect(serverImpl).not.toContain('from "../tasks/task-registry.js"'); + expect(serverImpl).not.toContain('from "../tasks/task-registry.maintenance.js"'); + expect(serverImpl).toContain('import("../tasks/task-registry.maintenance.js")'); expect(validation).not.toContain("legacy-secretref-env-marker"); expect(validation).not.toContain("commands/doctor"); }); diff --git a/src/gateway/server-startup-early.ts b/src/gateway/server-startup-early.ts index 5b4a8c33183..7153839b5cb 100644 --- a/src/gateway/server-startup-early.ts +++ b/src/gateway/server-startup-early.ts @@ -102,6 +102,7 @@ export async function startGatewayEarlyRuntime(params: { const bonjourStop = await measureStartup(params.startupTrace, "runtime.early.discovery", () => startGatewayPluginDiscovery(params), ); + let getActiveTaskCount = () => 0; if (!params.minimalTestGateway) { const [{ primeRemoteSkillsCache, setSkillsRemoteRegistry }, taskRegistryMaintenance] = @@ -118,6 +119,7 @@ export async function startGatewayEarlyRuntime(params: { cronRuntimeAuthoritative: true, }); taskRegistryMaintenance.startTaskRegistryMaintenance(); + getActiveTaskCount = () => taskRegistryMaintenance.getInspectableTaskRegistrySummary().active; } const skillsChangeUnsub = params.minimalTestGateway @@ -175,6 +177,7 @@ export async function startGatewayEarlyRuntime(params: { return { bonjourStop, + getActiveTaskCount, skillsChangeUnsub, startMaintenance, }; diff --git a/src/gateway/server.impl.ts b/src/gateway/server.impl.ts index b9f1ee1b49c..6f5df6476b7 100644 --- a/src/gateway/server.impl.ts +++ b/src/gateway/server.impl.ts @@ -54,10 +54,6 @@ import { clearSecretsRuntimeSnapshot, getActiveSecretsRuntimeSnapshot, } from "../secrets/runtime.js"; -import { - getInspectableTaskRegistrySummary, - stopTaskRegistryMaintenance, -} from "../tasks/task-registry.maintenance.js"; import { createAuthRateLimiter, type AuthRateLimiter } from "./auth-rate-limit.js"; import { resolveGatewayAuth } from "./auth.js"; import { @@ -381,6 +377,23 @@ function createGatewayStartupTrace() { }; } +function collectProcessMemoryUsageMb(): ReadonlyArray { + const usage = process.memoryUsage(); + const toMb = (bytes: number) => bytes / 1024 / 1024; + return [ + ["rssMb", toMb(usage.rss)], + ["heapTotalMb", toMb(usage.heapTotal)], + ["heapUsedMb", toMb(usage.heapUsed)], + ["externalMb", toMb(usage.external)], + ["arrayBuffersMb", toMb(usage.arrayBuffers)], + ]; +} + +async function stopTaskRegistryMaintenanceOnDemand(): Promise { + const { stopTaskRegistryMaintenance } = await import("../tasks/task-registry.maintenance.js"); + stopTaskRegistryMaintenance(); +} + type AuthRateLimitConfig = Parameters[0]; function createGatewayAuthRateLimiters(rateLimitConfig: AuthRateLimitConfig | undefined): { @@ -555,12 +568,13 @@ export async function startGatewayServer( startDiagnosticHeartbeat(undefined, { getConfig: getRuntimeConfig }); } setGatewaySigusr1RestartPolicy({ allowExternal: isRestartEnabled(cfgAtStart) }); + let getActiveTaskCount = () => 0; setPreRestartDeferralCheck( () => getTotalQueueSize() + getTotalPendingReplies() + getActiveEmbeddedRunCount() + - getInspectableTaskRegistrySummary().active, + getActiveTaskCount(), ); // Unconditional startup migration: seed gateway.controlUi.allowedOrigins for existing // non-loopback installs that upgraded to v2026.2.26+ without required origins. @@ -890,7 +904,7 @@ export async function startGatewayServer( cron: runtimeState.cronState.cron, heartbeatRunner: runtimeState.heartbeatRunner, updateCheckStop: runtimeState.stopGatewayUpdateCheck, - stopTaskRegistryMaintenance, + stopTaskRegistryMaintenance: stopTaskRegistryMaintenanceOnDemand, nodePresenceTimers, broadcast, tickInterval: runtimeState.tickInterval, @@ -962,6 +976,7 @@ export async function startGatewayServer( }), ); runtimeState.bonjourStop = earlyRuntime.bonjourStop; + getActiveTaskCount = earlyRuntime.getActiveTaskCount; runtimeState.skillsChangeUnsub = earlyRuntime.skillsChangeUnsub; Object.assign( @@ -1372,6 +1387,7 @@ export async function startGatewayServer( deferSidecars: opts.deferStartupSidecars === true, }), )); + startupTrace.detail("memory.ready", collectProcessMemoryUsageMb()); startupTrace.mark("ready"); postAttachRuntimeReturned = true; activateScheduledServicesWhenReady(); @@ -1433,6 +1449,7 @@ export async function startGatewayServer( logCron, }); } + startupTrace.detail("memory.post-ready", collectProcessMemoryUsageMb()); } catch (err) { await closeOnStartupFailure(); throw err;