From e69da9d5781c0f60ee10ffcec6aa78e1994a416c Mon Sep 17 00:00:00 2001 From: Shakker Date: Wed, 29 Apr 2026 19:50:30 +0100 Subject: [PATCH] fix: honor config timeline diagnostics --- docs/cli/gateway.md | 2 +- docs/diagnostics/flags.md | 15 +++ src/gateway/server-startup-plugins.ts | 1 + src/gateway/server.impl.ts | 151 +++++++++++++++--------- src/infra/diagnostics-timeline.test.ts | 53 +++++++-- src/infra/diagnostics-timeline.ts | 44 +++++-- src/plugins/loader.ts | 1 + src/plugins/plugin-metadata-snapshot.ts | 1 + 8 files changed, 185 insertions(+), 83 deletions(-) diff --git a/docs/cli/gateway.md b/docs/cli/gateway.md index 5756d2cf76a..01d66251610 100644 --- a/docs/cli/gateway.md +++ b/docs/cli/gateway.md @@ -112,7 +112,7 @@ Inline `--password` can be exposed in local process listings. Prefer `--password ### Startup profiling - Set `OPENCLAW_GATEWAY_STARTUP_TRACE=1` to log phase timings during Gateway startup, including per-phase `eventLoopMax` delay and plugin lookup-table timings for installed-index, manifest registry, startup planning, and owner-map work. -- Set `OPENCLAW_DIAGNOSTICS=timeline` with `OPENCLAW_DIAGNOSTICS_TIMELINE_PATH=` to write a best-effort JSONL startup diagnostics timeline for external QA harnesses. Add `OPENCLAW_DIAGNOSTICS_EVENT_LOOP=1` to include event-loop samples. +- Set `OPENCLAW_DIAGNOSTICS=timeline` with `OPENCLAW_DIAGNOSTICS_TIMELINE_PATH=` to write a best-effort JSONL startup diagnostics timeline for external QA harnesses. You can also enable the flag with `diagnostics.flags: ["timeline"]` in config; the path is still env-provided. Add `OPENCLAW_DIAGNOSTICS_EVENT_LOOP=1` to include event-loop samples. - Run `pnpm test:startup:gateway -- --runs 5 --warmup 1` to benchmark Gateway startup. The benchmark records first process output, `/healthz`, `/readyz`, startup trace timings, event-loop delay, and plugin lookup-table timing details. ## Query a running Gateway diff --git a/docs/diagnostics/flags.md b/docs/diagnostics/flags.md index 50dd6955f23..9856b23a301 100644 --- a/docs/diagnostics/flags.md +++ b/docs/diagnostics/flags.md @@ -61,6 +61,21 @@ OPENCLAW_DIAGNOSTICS_TIMELINE_PATH=/tmp/openclaw-timeline.jsonl \ openclaw gateway run ``` +You can also enable it in config: + +```json +{ + "diagnostics": { + "flags": ["timeline"] + } +} +``` + +The timeline file path still comes from +`OPENCLAW_DIAGNOSTICS_TIMELINE_PATH`. When `timeline` is enabled only from +config, the earliest config-loading spans are not emitted because OpenClaw has +not read config yet; subsequent startup spans use the config flag. + `OPENCLAW_DIAGNOSTICS=1`, `OPENCLAW_DIAGNOSTICS=all`, and `OPENCLAW_DIAGNOSTICS=*` also enable the timeline because they enable every diagnostics flag. Prefer `timeline` when you only want the JSONL timing diff --git a/src/gateway/server-startup-plugins.ts b/src/gateway/server-startup-plugins.ts index 90e0d8c05fe..e2de6cd9659 100644 --- a/src/gateway/server-startup-plugins.ts +++ b/src/gateway/server-startup-plugins.ts @@ -50,6 +50,7 @@ async function prestageGatewayBundledRuntimeDeps(params: { () => prestageGatewayBundledRuntimeDepsImpl(params), { phase: "startup", + config: params.cfg, attributes: { pluginCount: params.pluginIds.length, }, diff --git a/src/gateway/server.impl.ts b/src/gateway/server.impl.ts index d1eea3b2403..bec904292e8 100644 --- a/src/gateway/server.impl.ts +++ b/src/gateway/server.impl.ts @@ -169,12 +169,23 @@ const canvasRuntime = runtimeForLogger(logCanvas); function createGatewayStartupTrace() { const logEnabled = isTruthyEnvValue(process.env.OPENCLAW_GATEWAY_STARTUP_TRACE); - const timelineEnabled = isDiagnosticsTimelineEnabled(); - const eventLoopTimelineEnabled = - timelineEnabled && isTruthyEnvValue(process.env.OPENCLAW_DIAGNOSTICS_EVENT_LOOP); - const eventLoopDelay = - logEnabled || eventLoopTimelineEnabled ? monitorEventLoopDelay({ resolution: 10 }) : undefined; - eventLoopDelay?.enable(); + let timelineConfig: OpenClawConfig | undefined; + let eventLoopDelay: ReturnType | undefined; + const timelineOptions = () => ({ + ...(timelineConfig ? { config: timelineConfig } : {}), + env: process.env, + }); + const eventLoopTimelineEnabled = () => + isDiagnosticsTimelineEnabled(timelineOptions()) && + isTruthyEnvValue(process.env.OPENCLAW_DIAGNOSTICS_EVENT_LOOP); + const ensureEventLoopDelay = () => { + if (eventLoopDelay || (!logEnabled && !eventLoopTimelineEnabled())) { + return; + } + eventLoopDelay = monitorEventLoopDelay({ resolution: 10 }); + eventLoopDelay.enable(); + }; + ensureEventLoopDelay(); const started = performance.now(); let last = started; let spanSequence = 0; @@ -214,23 +225,26 @@ function createGatewayStartupTrace() { activeSpanName: string, sample: ReturnType, ) => { - if (!eventLoopTimelineEnabled) { + if (!eventLoopTimelineEnabled()) { return; } if (!sample) { return; } - emitDiagnosticsTimelineEvent({ - type: "eventLoop.sample", - name: "eventLoop", - phase: "startup", - activeSpanName: mapTimelineName(activeSpanName), - attributes: - activeSpanName === mapTimelineName(activeSpanName) - ? undefined - : { traceName: activeSpanName }, - ...sample, - }); + emitDiagnosticsTimelineEvent( + { + type: "eventLoop.sample", + name: "eventLoop", + phase: "startup", + activeSpanName: mapTimelineName(activeSpanName), + attributes: + activeSpanName === mapTimelineName(activeSpanName) + ? undefined + : { traceName: activeSpanName }, + ...sample, + }, + timelineOptions(), + ); }; const emit = ( name: string, @@ -250,17 +264,24 @@ function createGatewayStartupTrace() { } }; return { + setConfig(config: OpenClawConfig) { + timelineConfig = config; + ensureEventLoopDelay(); + }, mark(name: string) { const now = performance.now(); const eventLoopSample = takeEventLoopSample(); emit(name, now - last, now - started, eventLoopSample); - emitDiagnosticsTimelineEvent({ - type: "mark", - name: mapTimelineName(name), - phase: "startup", - durationMs: now - started, - attributes: name === mapTimelineName(name) ? undefined : { traceName: name }, - }); + emitDiagnosticsTimelineEvent( + { + type: "mark", + name: mapTimelineName(name), + phase: "startup", + durationMs: now - started, + attributes: name === mapTimelineName(name) ? undefined : { traceName: name }, + }, + timelineOptions(), + ); emitEventLoopTimelineSample(name, eventLoopSample); last = now; if (name === "ready") { @@ -274,50 +295,62 @@ function createGatewayStartupTrace() { `startup trace: ${name} ${metrics.map(([key, value]) => formatMetric(key, value)).join(" ")}`, ); } - emitDiagnosticsTimelineEvent({ - type: "mark", - name: mapTimelineName(name), - phase: "startup", - attributes: { - traceName: name, - ...attributes, + emitDiagnosticsTimelineEvent( + { + type: "mark", + name: mapTimelineName(name), + phase: "startup", + attributes: { + traceName: name, + ...attributes, + }, }, - }); + timelineOptions(), + ); }, async measure(name: string, run: () => Promise | T): Promise { const before = performance.now(); const spanId = `gateway-startup-${++spanSequence}`; - emitDiagnosticsTimelineEvent({ - type: "span.start", - name: mapTimelineName(name), - phase: "startup", - spanId, - attributes: name === mapTimelineName(name) ? undefined : { traceName: name }, - }); + emitDiagnosticsTimelineEvent( + { + type: "span.start", + name: mapTimelineName(name), + phase: "startup", + spanId, + attributes: name === mapTimelineName(name) ? undefined : { traceName: name }, + }, + timelineOptions(), + ); try { const result = await run(); const now = performance.now(); - emitDiagnosticsTimelineEvent({ - type: "span.end", - name: mapTimelineName(name), - phase: "startup", - spanId, - durationMs: now - before, - attributes: name === mapTimelineName(name) ? undefined : { traceName: name }, - }); + emitDiagnosticsTimelineEvent( + { + type: "span.end", + name: mapTimelineName(name), + phase: "startup", + spanId, + durationMs: now - before, + attributes: name === mapTimelineName(name) ? undefined : { traceName: name }, + }, + timelineOptions(), + ); return result; } catch (error) { const now = performance.now(); - emitDiagnosticsTimelineEvent({ - type: "span.error", - name: mapTimelineName(name), - phase: "startup", - spanId, - durationMs: now - before, - attributes: name === mapTimelineName(name) ? undefined : { traceName: name }, - errorName: error instanceof Error ? error.name : typeof error, - errorMessage: error instanceof Error ? error.message : String(error), - }); + emitDiagnosticsTimelineEvent( + { + type: "span.error", + name: mapTimelineName(name), + phase: "startup", + spanId, + durationMs: now - before, + attributes: name === mapTimelineName(name) ? undefined : { traceName: name }, + errorName: error instanceof Error ? error.name : typeof error, + errorMessage: error instanceof Error ? error.message : String(error), + }, + timelineOptions(), + ); throw error; } finally { const now = performance.now(); @@ -466,6 +499,7 @@ export async function startGatewayServer( let startupLastGoodSnapshot = configSnapshot; const startupActivationSourceConfig = configSnapshot.sourceConfig; const startupRuntimeConfig = applyConfigOverrides(configSnapshot.config); + startupTrace.setConfig(startupRuntimeConfig); const authBootstrap = await startupTrace.measure("config.auth", () => prepareGatewayStartupConfig({ configSnapshot, @@ -476,6 +510,7 @@ export async function startGatewayServer( }), ); cfgAtStart = authBootstrap.cfg; + startupTrace.setConfig(cfgAtStart); if (authBootstrap.generatedToken) { if (authBootstrap.persistedGeneratedToken) { log.info( diff --git a/src/infra/diagnostics-timeline.test.ts b/src/infra/diagnostics-timeline.test.ts index 96a8a9346c4..7ba11252876 100644 --- a/src/infra/diagnostics-timeline.test.ts +++ b/src/infra/diagnostics-timeline.test.ts @@ -2,6 +2,7 @@ import { mkdtemp, readFile, rm } from "node:fs/promises"; import { tmpdir } from "node:os"; import { join } from "node:path"; import { afterEach, describe, expect, it } from "vitest"; +import type { OpenClawConfig } from "../config/types.openclaw.js"; import { emitDiagnosticsTimelineEvent, flushDiagnosticsTimelineForTest, @@ -42,20 +43,47 @@ describe("diagnostics timeline", () => { it("detects when timeline output is enabled", async () => { const { env } = await createTimelineEnv(); - expect(isDiagnosticsTimelineEnabled(env)).toBe(true); - expect(isDiagnosticsTimelineEnabled({ ...env, OPENCLAW_DIAGNOSTICS: "1" })).toBe(true); - expect(isDiagnosticsTimelineEnabled({ ...env, OPENCLAW_DIAGNOSTICS: "all" })).toBe(true); - expect(isDiagnosticsTimelineEnabled({ ...env, OPENCLAW_DIAGNOSTICS: "*" })).toBe(true); + expect(isDiagnosticsTimelineEnabled({ env })).toBe(true); + expect(isDiagnosticsTimelineEnabled({ env: { ...env, OPENCLAW_DIAGNOSTICS: "1" } })).toBe(true); + expect(isDiagnosticsTimelineEnabled({ env: { ...env, OPENCLAW_DIAGNOSTICS: "all" } })).toBe( + true, + ); + expect(isDiagnosticsTimelineEnabled({ env: { ...env, OPENCLAW_DIAGNOSTICS: "*" } })).toBe(true); expect( - isDiagnosticsTimelineEnabled({ ...env, OPENCLAW_DIAGNOSTICS: "diagnostics.timeline" }), + isDiagnosticsTimelineEnabled({ + env: { ...env, OPENCLAW_DIAGNOSTICS: "diagnostics.timeline" }, + }), ).toBe(true); - expect(isDiagnosticsTimelineEnabled({ ...env, OPENCLAW_DIAGNOSTICS: "telegram.http" })).toBe( + expect( + isDiagnosticsTimelineEnabled({ env: { ...env, OPENCLAW_DIAGNOSTICS: "telegram.http" } }), + ).toBe(false); + expect(isDiagnosticsTimelineEnabled({ env: { ...env, OPENCLAW_DIAGNOSTICS: "0" } })).toBe( false, ); - expect(isDiagnosticsTimelineEnabled({ ...env, OPENCLAW_DIAGNOSTICS: "0" })).toBe(false); - expect(isDiagnosticsTimelineEnabled({ ...env, OPENCLAW_DIAGNOSTICS_TIMELINE_PATH: "" })).toBe( - false, + expect( + isDiagnosticsTimelineEnabled({ + env: { ...env, OPENCLAW_DIAGNOSTICS_TIMELINE_PATH: "" }, + }), + ).toBe(false); + }); + + it("honors config diagnostics flags after config is available", async () => { + const { env } = await createTimelineEnv(); + const envWithoutFlag = { ...env }; + delete envWithoutFlag.OPENCLAW_DIAGNOSTICS; + const configWithTimeline = { diagnostics: { flags: ["timeline"] } } as OpenClawConfig; + const configWithWildcard = { diagnostics: { flags: ["*"] } } as OpenClawConfig; + const configWithoutTimeline = { diagnostics: { flags: ["telegram.http"] } } as OpenClawConfig; + + expect(isDiagnosticsTimelineEnabled({ config: configWithTimeline, env: envWithoutFlag })).toBe( + true, ); + expect(isDiagnosticsTimelineEnabled({ config: configWithWildcard, env: envWithoutFlag })).toBe( + true, + ); + expect( + isDiagnosticsTimelineEnabled({ config: configWithoutTimeline, env: envWithoutFlag }), + ).toBe(false); }); it("writes JSONL diagnostic events with the stable envelope", async () => { @@ -72,7 +100,7 @@ describe("diagnostics timeline", () => { ignored: Number.NaN, }, }, - env, + { env }, ); const [event] = await readTimeline(path); @@ -95,12 +123,15 @@ describe("diagnostics timeline", () => { it("records span start and end events around successful work", async () => { const { env, path } = await createTimelineEnv(); + const configOnlyEnv = { ...env }; + delete configOnlyEnv.OPENCLAW_DIAGNOSTICS; await expect( measureDiagnosticsTimelineSpan("runtimeDeps.stage", () => "ok", { - env, phase: "startup", attributes: { pluginCount: 3 }, + config: { diagnostics: { flags: ["timeline"] } } as OpenClawConfig, + env: configOnlyEnv, }), ).resolves.toBe("ok"); diff --git a/src/infra/diagnostics-timeline.ts b/src/infra/diagnostics-timeline.ts index 168b37c7100..a7720085366 100644 --- a/src/infra/diagnostics-timeline.ts +++ b/src/infra/diagnostics-timeline.ts @@ -2,6 +2,7 @@ import { randomUUID } from "node:crypto"; import { appendFileSync, mkdirSync } from "node:fs"; import { dirname } from "node:path"; import { performance } from "node:perf_hooks"; +import type { OpenClawConfig } from "../config/types.openclaw.js"; import { isDiagnosticFlagEnabled } from "./diagnostic-flags.js"; export const OPENCLAW_DIAGNOSTICS_TIMELINE_SCHEMA_VERSION = "openclaw.diagnostics.v1"; @@ -48,16 +49,32 @@ type DiagnosticsTimelineSpanOptions = { phase?: string; parentSpanId?: string; attributes?: DiagnosticsTimelineAttributes; + config?: OpenClawConfig; + env?: NodeJS.ProcessEnv; +}; + +type DiagnosticsTimelineOptions = { + config?: OpenClawConfig; env?: NodeJS.ProcessEnv; }; let warnedAboutTimelineWrite = false; const createdTimelineDirs = new Set(); -export function isDiagnosticsTimelineEnabled(env: NodeJS.ProcessEnv = process.env): boolean { +function resolveDiagnosticsTimelineOptions( + options: DiagnosticsTimelineOptions = {}, +): Required> & Pick { + return { + env: options.env ?? process.env, + ...(options.config ? { config: options.config } : {}), + }; +} + +export function isDiagnosticsTimelineEnabled(options: DiagnosticsTimelineOptions = {}): boolean { + const { config, env } = resolveDiagnosticsTimelineOptions(options); return ( - (isDiagnosticFlagEnabled("timeline", undefined, env) || - isDiagnosticFlagEnabled("diagnostics.timeline", undefined, env)) && + (isDiagnosticFlagEnabled("timeline", config, env) || + isDiagnosticFlagEnabled("diagnostics.timeline", config, env)) && typeof env.OPENCLAW_DIAGNOSTICS_TIMELINE_PATH === "string" && env.OPENCLAW_DIAGNOSTICS_TIMELINE_PATH.trim().length > 0 ); @@ -131,9 +148,10 @@ function serializeTimelineEvent(event: DiagnosticsTimelineEvent, env: NodeJS.Pro export function emitDiagnosticsTimelineEvent( event: DiagnosticsTimelineEvent, - env: NodeJS.ProcessEnv = process.env, + options: DiagnosticsTimelineOptions = {}, ): void { - if (!isDiagnosticsTimelineEnabled(env)) { + const { env } = resolveDiagnosticsTimelineOptions(options); + if (!isDiagnosticsTimelineEnabled(options)) { return; } const path = env.OPENCLAW_DIAGNOSTICS_TIMELINE_PATH?.trim(); @@ -162,7 +180,7 @@ export async function measureDiagnosticsTimelineSpan( options: DiagnosticsTimelineSpanOptions = {}, ): Promise { const env = options.env ?? process.env; - if (!isDiagnosticsTimelineEnabled(env)) { + if (!isDiagnosticsTimelineEnabled({ config: options.config, env })) { return await run(); } const spanId = randomUUID(); @@ -176,7 +194,7 @@ export async function measureDiagnosticsTimelineSpan( parentSpanId: options.parentSpanId, attributes: options.attributes, }, - env, + { config: options.config, env }, ); try { const result = await run(); @@ -190,7 +208,7 @@ export async function measureDiagnosticsTimelineSpan( durationMs: performance.now() - startedAt, attributes: options.attributes, }, - env, + { config: options.config, env }, ); return result; } catch (error) { @@ -206,7 +224,7 @@ export async function measureDiagnosticsTimelineSpan( errorName: error instanceof Error ? error.name : typeof error, errorMessage: error instanceof Error ? error.message : String(error), }, - env, + { config: options.config, env }, ); throw error; } @@ -218,7 +236,7 @@ export function measureDiagnosticsTimelineSpanSync( options: DiagnosticsTimelineSpanOptions = {}, ): T { const env = options.env ?? process.env; - if (!isDiagnosticsTimelineEnabled(env)) { + if (!isDiagnosticsTimelineEnabled({ config: options.config, env })) { return run(); } const spanId = randomUUID(); @@ -232,7 +250,7 @@ export function measureDiagnosticsTimelineSpanSync( parentSpanId: options.parentSpanId, attributes: options.attributes, }, - env, + { config: options.config, env }, ); try { const result = run(); @@ -246,7 +264,7 @@ export function measureDiagnosticsTimelineSpanSync( durationMs: performance.now() - startedAt, attributes: options.attributes, }, - env, + { config: options.config, env }, ); return result; } catch (error) { @@ -262,7 +280,7 @@ export function measureDiagnosticsTimelineSpanSync( errorName: error instanceof Error ? error.name : typeof error, errorMessage: error instanceof Error ? error.message : String(error), }, - env, + { config: options.config, env }, ); throw error; } diff --git a/src/plugins/loader.ts b/src/plugins/loader.ts index d57c83651b8..701022bb31c 100644 --- a/src/plugins/loader.ts +++ b/src/plugins/loader.ts @@ -2142,6 +2142,7 @@ export function loadOpenClawPlugins(options: PluginLoadOptions = {}): PluginRegi () => installer(installParams), { phase: "startup", + config: cfg, env, attributes: { pluginId: record.id, diff --git a/src/plugins/plugin-metadata-snapshot.ts b/src/plugins/plugin-metadata-snapshot.ts index bea78f37a2f..472c5172084 100644 --- a/src/plugins/plugin-metadata-snapshot.ts +++ b/src/plugins/plugin-metadata-snapshot.ts @@ -130,6 +130,7 @@ export function loadPluginMetadataSnapshot( () => loadPluginMetadataSnapshotImpl(params), { phase: "startup", + config: params.config, env: params.env, attributes: { hasWorkspaceDir: params.workspaceDir !== undefined,