From 45b0d5ccc20399b8a9566cab08eb87d4dd3d47b5 Mon Sep 17 00:00:00 2001 From: Shakker Date: Mon, 27 Apr 2026 08:07:22 +0100 Subject: [PATCH] chore: add plugin lookup startup trace metrics --- CHANGELOG.md | 1 + docs/cli/gateway.md | 4 +- scripts/bench-gateway-startup.ts | 35 ++++++++++++-- src/gateway/server-plugins.test.ts | 11 +++++ src/gateway/server-startup-plugins.test.ts | 13 ++++++ src/gateway/server.impl.ts | 53 +++++++++++++++++++++- src/plugins/plugin-lookup-table.test.ts | 11 +++++ src/plugins/plugin-lookup-table.ts | 34 ++++++++++++++ 8 files changed, 154 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2742c095b26..0d39a4b3edd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ Docs: https://docs.openclaw.ai - Plugins/startup: use a `PluginLookUpTable` during Gateway startup so channel ownership, deferred channel loading, and startup plugin IDs reuse the same installed manifest registry instead of rebuilding manifest metadata on the boot path. Thanks @shakkernerd. - Plugins/startup: pass the Gateway `PluginLookUpTable` through plugin loading so auto-enable checks and startup-scope fallback reuse the same manifest registry instead of doing another manifest pass. Thanks @shakkernerd. - Plugins/startup: carry the Gateway `PluginLookUpTable` into deferred channel full-runtime reloads so post-listen startup does not rebuild manifest metadata after the provisional setup-runtime load. Thanks @shakkernerd. +- Gateway/startup: extend `OPENCLAW_GATEWAY_STARTUP_TRACE=1` with per-phase event-loop delay plus plugin lookup-table timing and count metrics for installed-index, manifest, startup-plan, and owner-map work, and include the new timing fields in startup benchmark summaries. Thanks @shakkernerd. - Plugins/registry: resolve lookup-table owner maps for providers, CLI backends, setup providers, command aliases, model catalogs, channel configs, and manifest contracts while preserving setup-only CLI backend ownership. Thanks @shakkernerd. - Process/Windows: decode command stdout and stderr from raw bytes with console-codepage awareness, while preserving valid UTF-8 output and multibyte characters split across chunks. Fixes #50519. Thanks @iready, @kevinten10, @zhangyongjie1997, @knightplat-blip, @heiqishi666, and @slepybear. - Agents/bootstrap: dedupe hook-injected bootstrap context files by workspace-relative path and store normalized resolved paths so duplicate relative and absolute hook paths no longer depend on the process cwd. (#59344; fixes #59319; related #56721, #56725, and #57587) Thanks @koen666. diff --git a/docs/cli/gateway.md b/docs/cli/gateway.md index 6a10c04660e..7c43a08375f 100644 --- a/docs/cli/gateway.md +++ b/docs/cli/gateway.md @@ -110,8 +110,8 @@ Inline `--password` can be exposed in local process listings. Prefer `--password ### Startup profiling -- Set `OPENCLAW_GATEWAY_STARTUP_TRACE=1` to log phase timings during Gateway startup. -- Run `pnpm test:startup:gateway -- --runs 5 --warmup 1` to benchmark Gateway startup. The benchmark records first process output, `/healthz`, `/readyz`, and startup trace timings. +- Set `OPENCLAW_GATEWAY_STARTUP_TRACE=1` to log phase timings during Gateway startup, including per-phase `eventLoopMax` delay and plugin lookup-table timings for installed-index, manifest registry, startup planning, and owner-map work. +- Run `pnpm test:startup:gateway -- --runs 5 --warmup 1` to benchmark Gateway startup. The benchmark records first process output, `/healthz`, `/readyz`, startup trace timings, event-loop delay, and plugin lookup-table timing details. ## Query a running Gateway diff --git a/scripts/bench-gateway-startup.ts b/scripts/bench-gateway-startup.ts index 4b76a896538..98f59703725 100644 --- a/scripts/bench-gateway-startup.ts +++ b/scripts/bench-gateway-startup.ts @@ -440,12 +440,39 @@ function killProcessTree(child: ChildProcessWithoutNullStreams, signal: NodeJS.S } function collectStartupTrace(line: string, startupTrace: Record): void { - const match = /startup trace: ([^ ]+) ([0-9.]+)ms total=([0-9.]+)ms/u.exec(line); - if (!match) { + const phaseMatch = /startup trace: ([^ ]+) ([0-9.]+)ms total=([0-9.]+)ms(?: (.*))?/u.exec(line); + if (phaseMatch) { + startupTrace[phaseMatch[1]] = Number(phaseMatch[2]); + startupTrace[`${phaseMatch[1]}.total`] = Number(phaseMatch[3]); + for (const metric of parseStartupTraceMetrics(phaseMatch[4] ?? "")) { + startupTrace[`${phaseMatch[1]}.${metric.key}`] = metric.value; + } return; } - startupTrace[match[1]] = Number(match[2]); - startupTrace[`${match[1]}.total`] = Number(match[3]); + const detailMatch = /startup trace: ([^ ]+) (.*)/u.exec(line); + if (!detailMatch) { + return; + } + for (const metric of parseStartupTraceMetrics(detailMatch[2])) { + startupTrace[`${detailMatch[1]}.${metric.key}`] = metric.value; + } +} + +function parseStartupTraceMetrics(raw: string): Array<{ key: string; value: number }> { + const metrics: Array<{ key: string; value: number }> = []; + for (const part of raw.trim().split(/\s+/u)) { + const metricMatch = /^([A-Za-z][A-Za-z0-9]*)=([0-9.]+)(?:ms)?$/u.exec(part); + if (!metricMatch) { + continue; + } + const key = metricMatch[1]; + const value = Number(metricMatch[2]); + if (!Number.isFinite(value) || (key !== "eventLoopMax" && !key.endsWith("Ms"))) { + continue; + } + metrics.push({ key, value }); + } + return metrics; } async function runGatewaySample(options: { diff --git a/src/gateway/server-plugins.test.ts b/src/gateway/server-plugins.test.ts index 8b5e17b62d2..0807496b8b2 100644 --- a/src/gateway/server-plugins.test.ts +++ b/src/gateway/server-plugins.test.ts @@ -147,6 +147,17 @@ function createLookUpTableForTest(params: { configuredDeferredChannelPluginIds: [], pluginIds: params.pluginIds ?? [], }, + metrics: { + registrySnapshotMs: 0, + manifestRegistryMs: 0, + startupPlanMs: 0, + ownerMapsMs: 0, + totalMs: 0, + indexPluginCount: 0, + manifestPluginCount: 0, + startupPluginCount: params.pluginIds?.length ?? 0, + deferredChannelPluginCount: 0, + }, }; } diff --git a/src/gateway/server-startup-plugins.test.ts b/src/gateway/server-startup-plugins.test.ts index 79e364d965d..cb22868a4f2 100644 --- a/src/gateway/server-startup-plugins.test.ts +++ b/src/gateway/server-startup-plugins.test.ts @@ -21,6 +21,17 @@ const resolveBundledRuntimeDependencyPackageInstallRoot = vi.hoisted(() => vi.fn((_packageRoot: string, _params: unknown) => "/runtime"), ); const pluginManifestRegistry = vi.hoisted(() => ({ plugins: [], diagnostics: [] })); +const pluginLookUpTableMetrics = vi.hoisted(() => ({ + registrySnapshotMs: 0, + manifestRegistryMs: 0, + startupPlanMs: 0, + ownerMapsMs: 0, + totalMs: 0, + indexPluginCount: 0, + manifestPluginCount: 0, + startupPluginCount: 1, + deferredChannelPluginCount: 0, +})); const loadPluginLookUpTable = vi.hoisted(() => vi.fn((_params: unknown) => ({ manifestRegistry: pluginManifestRegistry, @@ -28,6 +39,7 @@ const loadPluginLookUpTable = vi.hoisted(() => configuredDeferredChannelPluginIds: [], pluginIds: ["telegram"], }, + metrics: pluginLookUpTableMetrics, })), ); const resolveOpenClawPackageRootSync = vi.hoisted(() => vi.fn((_params: unknown) => "/package")); @@ -124,6 +136,7 @@ describe("prepareGatewayPluginBootstrap runtime-deps staging", () => { configuredDeferredChannelPluginIds: [], pluginIds: ["telegram"], }, + metrics: pluginLookUpTableMetrics, }); resolveOpenClawPackageRootSync.mockClear().mockReturnValue("/package"); runChannelPluginStartupMaintenance.mockClear(); diff --git a/src/gateway/server.impl.ts b/src/gateway/server.impl.ts index 588994da4d8..5c3d97bb7fa 100644 --- a/src/gateway/server.impl.ts +++ b/src/gateway/server.impl.ts @@ -1,3 +1,4 @@ +import { monitorEventLoopDelay } from "node:perf_hooks"; import { getActiveEmbeddedRunCount } from "../agents/pi-embedded-runner/run-state.js"; import { getTotalPendingReplies } from "../auto-reply/reply/dispatcher-registry.js"; import type { CanvasHostServer } from "../canvas-host/server.js"; @@ -142,11 +143,34 @@ const canvasRuntime = runtimeForLogger(logCanvas); function createGatewayStartupTrace() { const enabled = isTruthyEnvValue(process.env.OPENCLAW_GATEWAY_STARTUP_TRACE); + const eventLoopDelay = enabled ? monitorEventLoopDelay({ resolution: 10 }) : undefined; + eventLoopDelay?.enable(); const started = performance.now(); let last = started; - const emit = (name: string, durationMs: number, totalMs: number) => { + const formatMetric = (key: string, value: number | string) => + `${key}=${typeof value === "number" ? value.toFixed(1) : value}`; + const readEventLoopMaxMs = () => { + if (!eventLoopDelay) { + return 0; + } + const maxMs = eventLoopDelay.max / 1_000_000; + eventLoopDelay.reset(); + return maxMs; + }; + const emit = ( + name: string, + durationMs: number, + totalMs: number, + extras: ReadonlyArray = [], + ) => { if (enabled) { - log.info(`startup trace: ${name} ${durationMs.toFixed(1)}ms total=${totalMs.toFixed(1)}ms`); + const metrics = [ + `eventLoopMax=${readEventLoopMaxMs().toFixed(1)}ms`, + ...extras.map(([key, value]) => formatMetric(key, value)), + ].join(" "); + log.info( + `startup trace: ${name} ${durationMs.toFixed(1)}ms total=${totalMs.toFixed(1)}ms ${metrics}`, + ); } }; return { @@ -154,6 +178,17 @@ function createGatewayStartupTrace() { const now = performance.now(); emit(name, now - last, now - started); last = now; + if (name === "ready") { + eventLoopDelay?.disable(); + } + }, + detail(name: string, metrics: ReadonlyArray) { + if (!enabled) { + return; + } + log.info( + `startup trace: ${name} ${metrics.map(([key, value]) => formatMetric(key, value)).join(" ")}`, + ); }, async measure(name: string, run: () => Promise | T): Promise { const before = performance.now(); @@ -381,6 +416,20 @@ export async function startGatewayServer( pluginLookUpTable, baseMethods, } = pluginBootstrap; + if (pluginLookUpTable) { + const metrics = pluginLookUpTable.metrics; + startupTrace.detail("plugins.lookup-table", [ + ["registrySnapshotMs", metrics.registrySnapshotMs], + ["manifestRegistryMs", metrics.manifestRegistryMs], + ["startupPlanMs", metrics.startupPlanMs], + ["ownerMapsMs", metrics.ownerMapsMs], + ["totalMs", metrics.totalMs], + ["indexPlugins", String(metrics.indexPluginCount)], + ["manifestPlugins", String(metrics.manifestPluginCount)], + ["startupPlugins", String(metrics.startupPluginCount)], + ["deferredChannelPlugins", String(metrics.deferredChannelPluginCount)], + ]); + } let { pluginRegistry, baseGatewayMethods } = pluginBootstrap; const channelLogs = Object.fromEntries( listChannelPlugins().map((plugin) => [plugin.id, logChannels.child(plugin.id)]), diff --git a/src/plugins/plugin-lookup-table.test.ts b/src/plugins/plugin-lookup-table.test.ts index e12175d9770..77c209e479c 100644 --- a/src/plugins/plugin-lookup-table.test.ts +++ b/src/plugins/plugin-lookup-table.test.ts @@ -159,6 +159,17 @@ describe("loadPluginLookUpTable", () => { expect(table.manifestRegistry).toBe(manifestRegistry); expect(table.diagnostics).toEqual([indexDiagnostic, manifestDiagnostic]); + expect(table.metrics).toMatchObject({ + registrySnapshotMs: expect.any(Number), + manifestRegistryMs: expect.any(Number), + startupPlanMs: expect.any(Number), + ownerMapsMs: expect.any(Number), + totalMs: expect.any(Number), + indexPluginCount: 2, + manifestPluginCount: 2, + startupPluginCount: 1, + deferredChannelPluginCount: 0, + }); expect(table.byPluginId.get("telegram")?.id).toBe("telegram"); expect(table.normalizePluginId("openai-codex")).toBe("openai"); expect(table.owners.channels.get("telegram")).toEqual(["telegram"]); diff --git a/src/plugins/plugin-lookup-table.ts b/src/plugins/plugin-lookup-table.ts index 473019d8de1..2853b0bb4e8 100644 --- a/src/plugins/plugin-lookup-table.ts +++ b/src/plugins/plugin-lookup-table.ts @@ -32,6 +32,18 @@ export type PluginLookUpTableStartupPlan = { pluginIds: readonly string[]; }; +export type PluginLookUpTableMetrics = { + registrySnapshotMs: number; + manifestRegistryMs: number; + startupPlanMs: number; + ownerMapsMs: number; + totalMs: number; + indexPluginCount: number; + manifestPluginCount: number; + startupPluginCount: number; + deferredChannelPluginCount: number; +}; + export type PluginLookUpTable = { key: string; index: PluginRegistrySnapshot; @@ -43,6 +55,7 @@ export type PluginLookUpTable = { normalizePluginId: (pluginId: string) => string; owners: PluginLookUpTableOwnerMaps; startup: PluginLookUpTableStartupPlan; + metrics: PluginLookUpTableMetrics; }; export type LoadPluginLookUpTableParams = { @@ -123,13 +136,17 @@ function buildOwnerMaps(plugins: readonly PluginManifestRecord[]): PluginLookUpT } export function loadPluginLookUpTable(params: LoadPluginLookUpTableParams): PluginLookUpTable { + const totalStartedAt = performance.now(); + const registryStartedAt = performance.now(); const registryResult = loadPluginRegistrySnapshotWithMetadata({ config: params.config, workspaceDir: params.workspaceDir, env: params.env, ...(params.index ? { index: params.index } : {}), }); + const registrySnapshotMs = performance.now() - registryStartedAt; const index = registryResult.snapshot; + const manifestStartedAt = performance.now(); const manifestRegistry = loadPluginManifestRegistryForInstalledIndex({ index, config: params.config, @@ -137,6 +154,8 @@ export function loadPluginLookUpTable(params: LoadPluginLookUpTableParams): Plug env: params.env, includeDisabled: true, }); + const manifestRegistryMs = performance.now() - manifestStartedAt; + const startupPlanStartedAt = performance.now(); const channelPluginIds = resolveChannelPluginIdsFromRegistry({ manifestRegistry }); const configuredDeferredChannelPluginIds = resolveConfiguredDeferredChannelPluginIdsFromRegistry({ config: params.config, @@ -153,14 +172,18 @@ export function loadPluginLookUpTable(params: LoadPluginLookUpTableParams): Plug index, manifestRegistry, }); + const startupPlanMs = performance.now() - startupPlanStartedAt; const normalizePluginId = createPluginRegistryIdNormalizer(index, { manifestRegistry }); const byPluginId = new Map(manifestRegistry.plugins.map((plugin) => [plugin.id, plugin])); + const ownerMapsStartedAt = performance.now(); const owners = buildOwnerMaps(manifestRegistry.plugins); + const ownerMapsMs = performance.now() - ownerMapsStartedAt; const startup = { channelPluginIds, configuredDeferredChannelPluginIds, pluginIds, }; + const totalMs = performance.now() - totalStartedAt; return { key: hashJson({ @@ -182,5 +205,16 @@ export function loadPluginLookUpTable(params: LoadPluginLookUpTableParams): Plug normalizePluginId, owners, startup, + metrics: { + registrySnapshotMs, + manifestRegistryMs, + startupPlanMs, + ownerMapsMs, + totalMs, + indexPluginCount: index.plugins.length, + manifestPluginCount: manifestRegistry.plugins.length, + startupPluginCount: pluginIds.length, + deferredChannelPluginCount: configuredDeferredChannelPluginIds.length, + }, }; }