perf(gateway): defer task registry startup imports

This commit is contained in:
Peter Steinberger
2026-05-03 14:47:06 +01:00
parent 0949f4fe51
commit bd79b25bb7
5 changed files with 52 additions and 11 deletions

View File

@@ -369,6 +369,13 @@ function formatRatioStats(stats: SummaryStats | null): string {
return `p50=${formatRatio(stats.p50)} avg=${formatRatio(stats.avg)} min=${formatRatio(stats.min)} max=${formatRatio(stats.max)}`;
}
function getStartupTraceStat(
startupTrace: Record<string, SummaryStats>,
key: string,
): SummaryStats | null {
return startupTrace[key] ?? null;
}
async function getFreePort(): Promise<number> {
return new Promise((resolve, reject) => {
const server = createServer();
@@ -571,7 +578,10 @@ function parseStartupTraceMetrics(raw: string): Array<{ key: string; value: numb
}
const key = metricMatch[1];
const value = Number(metricMatch[2]);
if (!Number.isFinite(value) || (key !== "eventLoopMax" && !key.endsWith("Ms"))) {
if (
!Number.isFinite(value) ||
(key !== "eventLoopMax" && !key.endsWith("Ms") && !key.endsWith("Mb"))
) {
continue;
}
metrics.push({ key, value });
@@ -806,12 +816,14 @@ async function runCase(options: {
});
if (index >= options.warmup) {
samples.push(sample);
const heapUsedMb = sample.startupTrace["memory.ready.heapUsedMb"] ?? null;
console.log(
`[gateway-startup-bench] ${options.benchCase.id} run ${samples.length}/${options.runs}: healthz=${formatMs(sample.healthz.ms)} readyz=${formatMs(sample.readyz.ms)} readyLog=${formatMs(sample.readyLogMs)} cpu=${formatMs(sample.cpuMs)} cpuCore=${formatRatio(sample.cpuCoreRatio)} rss=${formatMb(sample.maxRssMb)}`,
`[gateway-startup-bench] ${options.benchCase.id} run ${samples.length}/${options.runs}: healthz=${formatMs(sample.healthz.ms)} readyz=${formatMs(sample.readyz.ms)} readyLog=${formatMs(sample.readyLogMs)} cpu=${formatMs(sample.cpuMs)} cpuCore=${formatRatio(sample.cpuCoreRatio)} rss=${formatMb(sample.maxRssMb)} heap=${formatMb(heapUsedMb)}`,
);
} else {
const heapUsedMb = sample.startupTrace["memory.ready.heapUsedMb"] ?? null;
console.log(
`[gateway-startup-bench] ${options.benchCase.id} warmup ${index + 1}/${options.warmup}: healthz=${formatMs(sample.healthz.ms)} readyz=${formatMs(sample.readyz.ms)} cpu=${formatMs(sample.cpuMs)} cpuCore=${formatRatio(sample.cpuCoreRatio)} rss=${formatMb(sample.maxRssMb)}`,
`[gateway-startup-bench] ${options.benchCase.id} warmup ${index + 1}/${options.warmup}: healthz=${formatMs(sample.healthz.ms)} readyz=${formatMs(sample.readyz.ms)} cpu=${formatMs(sample.cpuMs)} cpuCore=${formatRatio(sample.cpuCoreRatio)} rss=${formatMb(sample.maxRssMb)} heap=${formatMb(heapUsedMb)}`,
);
}
}
@@ -827,8 +839,14 @@ function printResult(result: CaseResult): void {
console.log(` ready log: ${formatStats(result.summary.readyLogMs)}`);
console.log(` /readyz: ${formatStats(result.summary.readyzMs)}`);
console.log(` max RSS: ${formatMemoryStats(result.summary.maxRssMb)}`);
console.log(
` ready memory: rss=${formatMemoryStats(getStartupTraceStat(result.summary.startupTrace, "memory.ready.rssMb"))} heap=${formatMemoryStats(getStartupTraceStat(result.summary.startupTrace, "memory.ready.heapUsedMb"))} external=${formatMemoryStats(getStartupTraceStat(result.summary.startupTrace, "memory.ready.externalMb"))}`,
);
console.log(
` post-ready memory: rss=${formatMemoryStats(getStartupTraceStat(result.summary.startupTrace, "memory.post-ready.rssMb"))} heap=${formatMemoryStats(getStartupTraceStat(result.summary.startupTrace, "memory.post-ready.heapUsedMb"))} external=${formatMemoryStats(getStartupTraceStat(result.summary.startupTrace, "memory.post-ready.externalMb"))}`,
);
const trace = Object.entries(result.summary.startupTrace)
.filter(([name]) => !name.endsWith(".total"))
.filter(([name]) => !name.endsWith(".total") && !name.startsWith("memory."))
.toSorted((a, b) => (b[1].avg ?? 0) - (a[1].avg ?? 0))
.slice(0, 8);
if (trace.length > 0) {

View File

@@ -182,7 +182,7 @@ export function createGatewayCloseHandler(params: {
cron: { stop: () => void };
heartbeatRunner: HeartbeatRunner;
updateCheckStop?: (() => void) | null;
stopTaskRegistryMaintenance?: (() => void) | null;
stopTaskRegistryMaintenance?: (() => Promise<void> | void) | null;
nodePresenceTimers: Map<string, ReturnType<typeof setInterval>>;
broadcast: (event: string, payload: unknown, opts?: { dropIfSlow?: boolean }) => void;
tickInterval: ReturnType<typeof setInterval>;

View File

@@ -27,6 +27,9 @@ describe("gateway startup import boundaries", () => {
'createCanvasHostHandler } from "../canvas-host/server.js"',
);
expect(serverImpl).not.toContain('from "../plugins/hook-runner-global.js"');
expect(serverImpl).not.toContain('from "../tasks/task-registry.js"');
expect(serverImpl).not.toContain('from "../tasks/task-registry.maintenance.js"');
expect(serverImpl).toContain('import("../tasks/task-registry.maintenance.js")');
expect(validation).not.toContain("legacy-secretref-env-marker");
expect(validation).not.toContain("commands/doctor");
});

View File

@@ -102,6 +102,7 @@ export async function startGatewayEarlyRuntime(params: {
const bonjourStop = await measureStartup(params.startupTrace, "runtime.early.discovery", () =>
startGatewayPluginDiscovery(params),
);
let getActiveTaskCount = () => 0;
if (!params.minimalTestGateway) {
const [{ primeRemoteSkillsCache, setSkillsRemoteRegistry }, taskRegistryMaintenance] =
@@ -118,6 +119,7 @@ export async function startGatewayEarlyRuntime(params: {
cronRuntimeAuthoritative: true,
});
taskRegistryMaintenance.startTaskRegistryMaintenance();
getActiveTaskCount = () => taskRegistryMaintenance.getInspectableTaskRegistrySummary().active;
}
const skillsChangeUnsub = params.minimalTestGateway
@@ -175,6 +177,7 @@ export async function startGatewayEarlyRuntime(params: {
return {
bonjourStop,
getActiveTaskCount,
skillsChangeUnsub,
startMaintenance,
};

View File

@@ -54,10 +54,6 @@ import {
clearSecretsRuntimeSnapshot,
getActiveSecretsRuntimeSnapshot,
} from "../secrets/runtime.js";
import {
getInspectableTaskRegistrySummary,
stopTaskRegistryMaintenance,
} from "../tasks/task-registry.maintenance.js";
import { createAuthRateLimiter, type AuthRateLimiter } from "./auth-rate-limit.js";
import { resolveGatewayAuth } from "./auth.js";
import {
@@ -381,6 +377,23 @@ function createGatewayStartupTrace() {
};
}
function collectProcessMemoryUsageMb(): ReadonlyArray<readonly [string, number]> {
const usage = process.memoryUsage();
const toMb = (bytes: number) => bytes / 1024 / 1024;
return [
["rssMb", toMb(usage.rss)],
["heapTotalMb", toMb(usage.heapTotal)],
["heapUsedMb", toMb(usage.heapUsed)],
["externalMb", toMb(usage.external)],
["arrayBuffersMb", toMb(usage.arrayBuffers)],
];
}
async function stopTaskRegistryMaintenanceOnDemand(): Promise<void> {
const { stopTaskRegistryMaintenance } = await import("../tasks/task-registry.maintenance.js");
stopTaskRegistryMaintenance();
}
type AuthRateLimitConfig = Parameters<typeof createAuthRateLimiter>[0];
function createGatewayAuthRateLimiters(rateLimitConfig: AuthRateLimitConfig | undefined): {
@@ -555,12 +568,13 @@ export async function startGatewayServer(
startDiagnosticHeartbeat(undefined, { getConfig: getRuntimeConfig });
}
setGatewaySigusr1RestartPolicy({ allowExternal: isRestartEnabled(cfgAtStart) });
let getActiveTaskCount = () => 0;
setPreRestartDeferralCheck(
() =>
getTotalQueueSize() +
getTotalPendingReplies() +
getActiveEmbeddedRunCount() +
getInspectableTaskRegistrySummary().active,
getActiveTaskCount(),
);
// Unconditional startup migration: seed gateway.controlUi.allowedOrigins for existing
// non-loopback installs that upgraded to v2026.2.26+ without required origins.
@@ -890,7 +904,7 @@ export async function startGatewayServer(
cron: runtimeState.cronState.cron,
heartbeatRunner: runtimeState.heartbeatRunner,
updateCheckStop: runtimeState.stopGatewayUpdateCheck,
stopTaskRegistryMaintenance,
stopTaskRegistryMaintenance: stopTaskRegistryMaintenanceOnDemand,
nodePresenceTimers,
broadcast,
tickInterval: runtimeState.tickInterval,
@@ -962,6 +976,7 @@ export async function startGatewayServer(
}),
);
runtimeState.bonjourStop = earlyRuntime.bonjourStop;
getActiveTaskCount = earlyRuntime.getActiveTaskCount;
runtimeState.skillsChangeUnsub = earlyRuntime.skillsChangeUnsub;
Object.assign(
@@ -1372,6 +1387,7 @@ export async function startGatewayServer(
deferSidecars: opts.deferStartupSidecars === true,
}),
));
startupTrace.detail("memory.ready", collectProcessMemoryUsageMb());
startupTrace.mark("ready");
postAttachRuntimeReturned = true;
activateScheduledServicesWhenReady();
@@ -1433,6 +1449,7 @@ export async function startGatewayServer(
logCron,
});
}
startupTrace.detail("memory.post-ready", collectProcessMemoryUsageMb());
} catch (err) {
await closeOnStartupFailure();
throw err;