From 7f3dead33564df924b13b7a44f57e1b888dc60f5 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Tue, 28 Apr 2026 02:49:00 +0100 Subject: [PATCH] perf: keep gateway cold paths out of startup --- docs/help/testing.md | 2 +- scripts/check-cli-bootstrap-imports.mjs | 91 ++++++++++++++++++- src/gateway/server.impl.ts | 74 ++++++++------- .../check-cli-bootstrap-imports.test.ts | 47 ++++++++++ 4 files changed, 180 insertions(+), 34 deletions(-) diff --git a/docs/help/testing.md b/docs/help/testing.md index 435cba63acd..f3c66ed98b7 100644 --- a/docs/help/testing.md +++ b/docs/help/testing.md @@ -576,7 +576,7 @@ These Docker runners split into two buckets: explicitly want the larger exhaustive scan. - `test:docker:all` builds the live Docker image once via `test:docker:live-build`, packs OpenClaw once as an npm tarball through `scripts/package-openclaw-for-docker.mjs`, then builds/reuses two `scripts/e2e/Dockerfile` images. The bare image is only the Node/Git runner for install/update/plugin-dependency lanes; those lanes mount the prebuilt tarball. The functional image installs the same tarball into `/app` for built-app functionality lanes. Docker lane definitions live in `scripts/lib/docker-e2e-scenarios.mjs`; planner logic lives in `scripts/lib/docker-e2e-plan.mjs`; `scripts/test-docker-all.mjs` executes the selected plan. The aggregate uses a weighted local scheduler: `OPENCLAW_DOCKER_ALL_PARALLELISM` controls process slots, while resource caps keep heavy live, npm-install, and multi-service lanes from all starting at once. If a single lane is heavier than the active caps, the scheduler can still start it when the pool is empty and then keeps it running alone until capacity is available again. Defaults are 10 slots, `OPENCLAW_DOCKER_ALL_LIVE_LIMIT=9`, `OPENCLAW_DOCKER_ALL_NPM_LIMIT=10`, and `OPENCLAW_DOCKER_ALL_SERVICE_LIMIT=7`; tune `OPENCLAW_DOCKER_ALL_WEIGHT_LIMIT` or `OPENCLAW_DOCKER_ALL_DOCKER_LIMIT` only when the Docker host has more headroom. The runner performs a Docker preflight by default, removes stale OpenClaw E2E containers, prints status every 30 seconds, stores successful lane timings in `.artifacts/docker-tests/lane-timings.json`, and uses those timings to start longer lanes first on later runs. Use `OPENCLAW_DOCKER_ALL_DRY_RUN=1` to print the weighted lane manifest without building or running Docker, or `node scripts/test-docker-all.mjs --plan-json` to print the CI plan for selected lanes, package/image needs, and credentials. - `Package Acceptance` is the GitHub-native package gate for "does this installable tarball work as a product?" It resolves one candidate package from `source=npm`, `source=ref`, `source=url`, or `source=artifact`, uploads it as `package-under-test`, then runs the reusable Docker E2E lanes against that exact tarball instead of repacking the selected ref. `workflow_ref` selects the trusted workflow/harness scripts, while `package_ref` selects the source commit/branch/tag to pack when `source=ref`; this lets current acceptance logic validate older trusted commits. Profiles are ordered by breadth: `smoke` is quick install/channel/agent plus gateway/config, `package` is the package/update/plugin contract and the default native replacement for most Parallels package/update coverage, `product` adds MCP channels, cron/subagent cleanup, OpenAI web search, and OpenWebUI, and `full` runs the release-path Docker chunks with OpenWebUI. Release validation runs a custom package delta (`bundled-channel-deps-compat plugins-offline`) plus Telegram package QA because the release-path Docker chunks already cover the overlapping package/update/plugin lanes. Targeted GitHub Docker rerun commands generated from artifacts include prior package artifact and prepared image inputs when available, so failed lanes can avoid rebuilding the package and images. -- Build and release checks run `scripts/check-cli-bootstrap-imports.mjs` after tsdown. The guard walks the static built graph from `dist/entry.js` and `dist/cli/run-main.js` and fails if pre-dispatch startup imports package dependencies such as Commander, prompt UI, undici, or logging before command dispatch. Packaged CLI smoke also covers root help, onboard help, doctor help, status, config schema, and a model-list command. +- Build and release checks run `scripts/check-cli-bootstrap-imports.mjs` after tsdown. The guard walks the static built graph from `dist/entry.js` and `dist/cli/run-main.js` and fails if pre-dispatch startup imports package dependencies such as Commander, prompt UI, undici, or logging before command dispatch; it also keeps the bundled gateway run chunk under budget and rejects static imports of known cold gateway paths. Packaged CLI smoke also covers root help, onboard help, doctor help, status, config schema, and a model-list command. - Package Acceptance legacy compatibility is capped at `2026.4.25` (`2026.4.25-beta.*` included). Through that cutoff, the harness tolerates only shipped-package metadata gaps: omitted private QA inventory entries, missing `gateway install --wrapper`, missing patch files in the tarball-derived git fixture, missing persisted `update.channel`, legacy plugin install-record locations, missing marketplace install-record persistence, and config metadata migration during `plugins update`. For packages after `2026.4.25`, those paths are strict failures. - Container smoke runners: `test:docker:openwebui`, `test:docker:onboard`, `test:docker:npm-onboard-channel-agent`, `test:docker:update-channel-switch`, `test:docker:session-runtime-context`, `test:docker:agents-delete-shared-workspace`, `test:docker:gateway-network`, `test:docker:browser-cdp-snapshot`, `test:docker:mcp-channels`, `test:docker:pi-bundle-mcp-tools`, `test:docker:cron-mcp-cleanup`, `test:docker:plugins`, `test:docker:plugin-update`, and `test:docker:config-reload` boot one or more real containers and verify higher-level integration paths. diff --git a/scripts/check-cli-bootstrap-imports.mjs b/scripts/check-cli-bootstrap-imports.mjs index 8404ec0b607..82374278da5 100644 --- a/scripts/check-cli-bootstrap-imports.mjs +++ b/scripts/check-cli-bootstrap-imports.mjs @@ -6,6 +6,17 @@ import path from "node:path"; import { fileURLToPath } from "node:url"; const DEFAULT_ENTRYPOINTS = ["dist/entry.js", "dist/cli/run-main.js"]; +const DEFAULT_GATEWAY_RUN_CHUNK_MAX_BYTES = 70 * 1024; +const GATEWAY_RUN_CHUNK_MARKERS = ["const GATEWAY_RUN_VALUE_KEYS", "function addGatewayRunCommand"]; +const GATEWAY_RUN_FORBIDDEN_STATIC_IMPORTS = [ + "control-ui-assets", + "diagnostic-stability-bundle", + "onboard-helpers", + "process-respawn", + "restart-sentinel", + "server-close", + "server-reload-handlers", +]; const STATIC_IMPORT_RE = /\b(?:import|export)\s+(?:(?:[^'"()]*?\s+from\s+)|)["'](?[^"']+)["']/gu; @@ -104,8 +115,86 @@ export function collectCliBootstrapExternalImportErrors(params = {}) { return errors.toSorted((left, right) => left.localeCompare(right)); } +function listJsFiles(dirPath, fsImpl = fs) { + let entries; + try { + entries = fsImpl.readdirSync(dirPath, { withFileTypes: true }); + } catch { + return []; + } + const files = []; + for (const entry of entries) { + const fullPath = path.join(dirPath, entry.name); + if (entry.isDirectory()) { + files.push(...listJsFiles(fullPath, fsImpl)); + continue; + } + if (entry.isFile() && entry.name.endsWith(".js")) { + files.push(fullPath); + } + } + return files; +} + +export function collectGatewayRunChunkBudgetErrors(params = {}) { + const rootDir = params.rootDir ?? process.cwd(); + const fsImpl = params.fs ?? fs; + const distDir = path.resolve(rootDir, params.distDir ?? "dist"); + const maxBytes = params.gatewayRunChunkMaxBytes ?? DEFAULT_GATEWAY_RUN_CHUNK_MAX_BYTES; + const chunks = []; + + for (const filePath of listJsFiles(distDir, fsImpl)) { + let source; + try { + source = fsImpl.readFileSync(filePath, "utf8"); + } catch { + continue; + } + if (GATEWAY_RUN_CHUNK_MARKERS.every((marker) => source.includes(marker))) { + chunks.push({ filePath, source }); + } + } + + if (chunks.length === 0) { + return [ + "CLI bootstrap import guard could not find the bundled gateway run chunk. Run pnpm build first.", + ]; + } + + const errors = []; + for (const { filePath, source } of chunks) { + const relativePath = path.relative(rootDir, filePath) || filePath; + let size = Buffer.byteLength(source, "utf8"); + try { + size = fsImpl.statSync(filePath).size; + } catch { + // Fall back to source byte length for in-memory test fixtures. + } + if (size > maxBytes) { + errors.push( + `Gateway run chunk ${relativePath} is ${size} bytes, above budget ${maxBytes} bytes.`, + ); + } + + for (const specifier of listStaticImportSpecifiers(source)) { + for (const forbidden of GATEWAY_RUN_FORBIDDEN_STATIC_IMPORTS) { + if (specifier.includes(forbidden)) { + errors.push( + `Gateway run chunk ${relativePath} statically imports cold path "${specifier}".`, + ); + } + } + } + } + + return errors.toSorted((left, right) => left.localeCompare(right)); +} + export function checkCliBootstrapExternalImports(params = {}) { - const errors = collectCliBootstrapExternalImportErrors(params); + const errors = [ + ...collectCliBootstrapExternalImportErrors(params), + ...collectGatewayRunChunkBudgetErrors(params), + ]; if (errors.length === 0) { return; } diff --git a/src/gateway/server.impl.ts b/src/gateway/server.impl.ts index 1cc59c7afd2..971768f9c23 100644 --- a/src/gateway/server.impl.ts +++ b/src/gateway/server.impl.ts @@ -53,7 +53,6 @@ import { createAuthRateLimiter, type AuthRateLimiter } from "./auth-rate-limit.j import { resolveGatewayAuth } from "./auth.js"; import { createGatewayAuxHandlers } from "./server-aux-handlers.js"; import { createChannelManager } from "./server-channels.js"; -import { createGatewayCloseHandler, runGatewayClosePrelude } from "./server-close.js"; import { resolveGatewayControlUiRootState } from "./server-control-ui-root.js"; import { buildGatewayCronService } from "./server-cron.js"; import { applyGatewayLaneConcurrency } from "./server-lanes.js"; @@ -63,7 +62,6 @@ import { loadGatewayModelCatalog } from "./server-model-catalog.js"; import { bootstrapGatewayNetworkRuntime } from "./server-network-runtime.js"; import { createGatewayNodeSessionRuntime } from "./server-node-session-runtime.js"; import { setFallbackGatewayContextResolver } from "./server-plugins.js"; -import { startManagedGatewayConfigReloader } from "./server-reload-handlers.js"; import { createGatewayRequestContext } from "./server-request-context.js"; import { resolveGatewayRuntimeConfig } from "./server-runtime-config.js"; import { @@ -147,6 +145,13 @@ async function closeMcpLoopbackServerOnDemand(): Promise { await closeMcpLoopbackServer(); } +let gatewayCloseModulePromise: Promise | null = null; + +function loadGatewayCloseModule(): Promise { + gatewayCloseModulePromise ??= import("./server-close.js"); + return gatewayCloseModulePromise; +} + const logHealth = log.child("health"); const logCron = log.child("cron"); const logReload = log.child("reload"); @@ -662,6 +667,7 @@ export async function startGatewayServer( const runClosePrelude = async () => { clearCurrentPluginMetadataSnapshot(); + const { runGatewayClosePrelude } = await loadGatewayCloseModule(); await runGatewayClosePrelude({ ...(diagnosticsEnabled ? { stopDiagnostics: stopDiagnosticHeartbeat } : {}), clearSkillsRefreshTimer: () => { @@ -687,36 +693,39 @@ export async function startGatewayServer( ...opts, getRuntimeSnapshot, }); - const createCloseHandler = () => - createGatewayCloseHandler({ - bonjourStop: runtimeState.bonjourStop, - tailscaleCleanup: runtimeState.tailscaleCleanup, - canvasHost, - canvasHostServer, - releasePluginRouteRegistry, - stopChannel, - pluginServices: runtimeState.pluginServices, - cron: runtimeState.cronState.cron, - heartbeatRunner: runtimeState.heartbeatRunner, - updateCheckStop: runtimeState.stopGatewayUpdateCheck, - stopTaskRegistryMaintenance, - nodePresenceTimers, - broadcast, - tickInterval: runtimeState.tickInterval, - healthInterval: runtimeState.healthInterval, - dedupeCleanup: runtimeState.dedupeCleanup, - mediaCleanup: runtimeState.mediaCleanup, - agentUnsub: runtimeState.agentUnsub, - heartbeatUnsub: runtimeState.heartbeatUnsub, - transcriptUnsub: runtimeState.transcriptUnsub, - lifecycleUnsub: runtimeState.lifecycleUnsub, - chatRunState, - clients, - configReloader: runtimeState.configReloader, - wss, - httpServer, - httpServers, - }); + const createCloseHandler = + () => async (opts?: { reason?: string; restartExpectedMs?: number | null }) => { + const { createGatewayCloseHandler } = await loadGatewayCloseModule(); + await createGatewayCloseHandler({ + bonjourStop: runtimeState.bonjourStop, + tailscaleCleanup: runtimeState.tailscaleCleanup, + canvasHost, + canvasHostServer, + releasePluginRouteRegistry, + stopChannel, + pluginServices: runtimeState.pluginServices, + cron: runtimeState.cronState.cron, + heartbeatRunner: runtimeState.heartbeatRunner, + updateCheckStop: runtimeState.stopGatewayUpdateCheck, + stopTaskRegistryMaintenance, + nodePresenceTimers, + broadcast, + tickInterval: runtimeState.tickInterval, + healthInterval: runtimeState.healthInterval, + dedupeCleanup: runtimeState.dedupeCleanup, + mediaCleanup: runtimeState.mediaCleanup, + agentUnsub: runtimeState.agentUnsub, + heartbeatUnsub: runtimeState.heartbeatUnsub, + transcriptUnsub: runtimeState.transcriptUnsub, + lifecycleUnsub: runtimeState.lifecycleUnsub, + chatRunState, + clients, + configReloader: runtimeState.configReloader, + wss, + httpServer, + httpServers, + })(opts); + }; const closeOnStartupFailure = async () => { await runClosePrelude(); await createCloseHandler()({ reason: "gateway startup failed" }); @@ -978,6 +987,7 @@ export async function startGatewayServer( }); runtimeState.heartbeatRunner = activated.heartbeatRunner; + const { startManagedGatewayConfigReloader } = await import("./server-reload-handlers.js"); runtimeState.configReloader = startManagedGatewayConfigReloader({ minimalTestGateway, initialConfig: cfgAtStart, diff --git a/test/scripts/check-cli-bootstrap-imports.test.ts b/test/scripts/check-cli-bootstrap-imports.test.ts index d611e7c8081..8faccef76ea 100644 --- a/test/scripts/check-cli-bootstrap-imports.test.ts +++ b/test/scripts/check-cli-bootstrap-imports.test.ts @@ -4,6 +4,7 @@ import { dirname, join } from "node:path"; import { afterEach, describe, expect, it } from "vitest"; import { collectCliBootstrapExternalImportErrors, + collectGatewayRunChunkBudgetErrors, listStaticImportSpecifiers, } from "../../scripts/check-cli-bootstrap-imports.mjs"; @@ -22,6 +23,19 @@ function writeFixture(root: string, relativePath: string, source: string): void writeFileSync(target, source, "utf8"); } +function writeGatewayRunChunk(root: string, source = ""): void { + writeFixture( + root, + "dist/run-gateway.js", + [ + 'import "./string-coerce.js";', + "const GATEWAY_RUN_VALUE_KEYS = [];", + "function addGatewayRunCommand(cmd) { return cmd; }", + source, + ].join("\n"), + ); +} + afterEach(() => { for (const root of tempRoots.splice(0)) { rmSync(root, { recursive: true, force: true }); @@ -53,8 +67,10 @@ describe("check-cli-bootstrap-imports", () => { `import "../light.js";\nexport async function run() { return import("tslog"); }\n`, ); writeFixture(root, "dist/light.js", `import path from "node:path";\nvoid path;\n`); + writeGatewayRunChunk(root); expect(collectCliBootstrapExternalImportErrors({ rootDir: root })).toEqual([]); + expect(collectGatewayRunChunkBudgetErrors({ rootDir: root })).toEqual([]); }); it("reports external packages in the static bootstrap graph", () => { @@ -62,9 +78,40 @@ describe("check-cli-bootstrap-imports", () => { writeFixture(root, "dist/entry.js", `import "./cli/run-main.js";\n`); writeFixture(root, "dist/cli/run-main.js", `import "../heavy.js";\n`); writeFixture(root, "dist/heavy.js", `import { Logger } from "tslog";\nvoid Logger;\n`); + writeGatewayRunChunk(root); expect(collectCliBootstrapExternalImportErrors({ rootDir: root })).toEqual([ 'CLI bootstrap static graph imports external package "tslog" from dist/heavy.js.', ]); }); + + it("reports missing gateway run chunk", () => { + const root = makeTempRoot(); + + expect(collectGatewayRunChunkBudgetErrors({ rootDir: root })).toEqual([ + "CLI bootstrap import guard could not find the bundled gateway run chunk. Run pnpm build first.", + ]); + }); + + it("reports cold static imports in the gateway run chunk", () => { + const root = makeTempRoot(); + writeGatewayRunChunk(root, 'import "./restart-sentinel-abc123.js";'); + + expect(collectGatewayRunChunkBudgetErrors({ rootDir: root })).toEqual([ + 'Gateway run chunk dist/run-gateway.js statically imports cold path "./restart-sentinel-abc123.js".', + ]); + }); + + it("reports oversized gateway run chunks", () => { + const root = makeTempRoot(); + writeGatewayRunChunk(root, "x".repeat(10)); + + expect( + collectGatewayRunChunkBudgetErrors({ rootDir: root, gatewayRunChunkMaxBytes: 50 }), + ).toEqual([ + expect.stringMatching( + /^Gateway run chunk dist\/run-gateway\.js is \d+ bytes, above budget 50 bytes\.$/, + ), + ]); + }); });