fix: honor config timeline diagnostics

This commit is contained in:
Shakker
2026-04-29 19:50:30 +01:00
parent d001c3436b
commit e69da9d578
8 changed files with 185 additions and 83 deletions

View File

@@ -112,7 +112,7 @@ Inline `--password` can be exposed in local process listings. Prefer `--password
### Startup profiling ### Startup profiling
- Set `OPENCLAW_GATEWAY_STARTUP_TRACE=1` to log phase timings during Gateway startup, including per-phase `eventLoopMax` delay and plugin lookup-table timings for installed-index, manifest registry, startup planning, and owner-map work. - Set `OPENCLAW_GATEWAY_STARTUP_TRACE=1` to log phase timings during Gateway startup, including per-phase `eventLoopMax` delay and plugin lookup-table timings for installed-index, manifest registry, startup planning, and owner-map work.
- Set `OPENCLAW_DIAGNOSTICS=timeline` with `OPENCLAW_DIAGNOSTICS_TIMELINE_PATH=<path>` to write a best-effort JSONL startup diagnostics timeline for external QA harnesses. Add `OPENCLAW_DIAGNOSTICS_EVENT_LOOP=1` to include event-loop samples. - Set `OPENCLAW_DIAGNOSTICS=timeline` with `OPENCLAW_DIAGNOSTICS_TIMELINE_PATH=<path>` to write a best-effort JSONL startup diagnostics timeline for external QA harnesses. You can also enable the flag with `diagnostics.flags: ["timeline"]` in config; the path is still env-provided. Add `OPENCLAW_DIAGNOSTICS_EVENT_LOOP=1` to include event-loop samples.
- Run `pnpm test:startup:gateway -- --runs 5 --warmup 1` to benchmark Gateway startup. The benchmark records first process output, `/healthz`, `/readyz`, startup trace timings, event-loop delay, and plugin lookup-table timing details. - Run `pnpm test:startup:gateway -- --runs 5 --warmup 1` to benchmark Gateway startup. The benchmark records first process output, `/healthz`, `/readyz`, startup trace timings, event-loop delay, and plugin lookup-table timing details.
## Query a running Gateway ## Query a running Gateway

View File

@@ -61,6 +61,21 @@ OPENCLAW_DIAGNOSTICS_TIMELINE_PATH=/tmp/openclaw-timeline.jsonl \
openclaw gateway run openclaw gateway run
``` ```
You can also enable it in config:
```json
{
"diagnostics": {
"flags": ["timeline"]
}
}
```
The timeline file path still comes from
`OPENCLAW_DIAGNOSTICS_TIMELINE_PATH`. When `timeline` is enabled only from
config, the earliest config-loading spans are not emitted because OpenClaw has
not read config yet; subsequent startup spans use the config flag.
`OPENCLAW_DIAGNOSTICS=1`, `OPENCLAW_DIAGNOSTICS=all`, and `OPENCLAW_DIAGNOSTICS=1`, `OPENCLAW_DIAGNOSTICS=all`, and
`OPENCLAW_DIAGNOSTICS=*` also enable the timeline because they enable every `OPENCLAW_DIAGNOSTICS=*` also enable the timeline because they enable every
diagnostics flag. Prefer `timeline` when you only want the JSONL timing diagnostics flag. Prefer `timeline` when you only want the JSONL timing

View File

@@ -50,6 +50,7 @@ async function prestageGatewayBundledRuntimeDeps(params: {
() => prestageGatewayBundledRuntimeDepsImpl(params), () => prestageGatewayBundledRuntimeDepsImpl(params),
{ {
phase: "startup", phase: "startup",
config: params.cfg,
attributes: { attributes: {
pluginCount: params.pluginIds.length, pluginCount: params.pluginIds.length,
}, },

View File

@@ -169,12 +169,23 @@ const canvasRuntime = runtimeForLogger(logCanvas);
function createGatewayStartupTrace() { function createGatewayStartupTrace() {
const logEnabled = isTruthyEnvValue(process.env.OPENCLAW_GATEWAY_STARTUP_TRACE); const logEnabled = isTruthyEnvValue(process.env.OPENCLAW_GATEWAY_STARTUP_TRACE);
const timelineEnabled = isDiagnosticsTimelineEnabled(); let timelineConfig: OpenClawConfig | undefined;
const eventLoopTimelineEnabled = let eventLoopDelay: ReturnType<typeof monitorEventLoopDelay> | undefined;
timelineEnabled && isTruthyEnvValue(process.env.OPENCLAW_DIAGNOSTICS_EVENT_LOOP); const timelineOptions = () => ({
const eventLoopDelay = ...(timelineConfig ? { config: timelineConfig } : {}),
logEnabled || eventLoopTimelineEnabled ? monitorEventLoopDelay({ resolution: 10 }) : undefined; env: process.env,
eventLoopDelay?.enable(); });
const eventLoopTimelineEnabled = () =>
isDiagnosticsTimelineEnabled(timelineOptions()) &&
isTruthyEnvValue(process.env.OPENCLAW_DIAGNOSTICS_EVENT_LOOP);
const ensureEventLoopDelay = () => {
if (eventLoopDelay || (!logEnabled && !eventLoopTimelineEnabled())) {
return;
}
eventLoopDelay = monitorEventLoopDelay({ resolution: 10 });
eventLoopDelay.enable();
};
ensureEventLoopDelay();
const started = performance.now(); const started = performance.now();
let last = started; let last = started;
let spanSequence = 0; let spanSequence = 0;
@@ -214,23 +225,26 @@ function createGatewayStartupTrace() {
activeSpanName: string, activeSpanName: string,
sample: ReturnType<typeof takeEventLoopSample>, sample: ReturnType<typeof takeEventLoopSample>,
) => { ) => {
if (!eventLoopTimelineEnabled) { if (!eventLoopTimelineEnabled()) {
return; return;
} }
if (!sample) { if (!sample) {
return; return;
} }
emitDiagnosticsTimelineEvent({ emitDiagnosticsTimelineEvent(
type: "eventLoop.sample", {
name: "eventLoop", type: "eventLoop.sample",
phase: "startup", name: "eventLoop",
activeSpanName: mapTimelineName(activeSpanName), phase: "startup",
attributes: activeSpanName: mapTimelineName(activeSpanName),
activeSpanName === mapTimelineName(activeSpanName) attributes:
? undefined activeSpanName === mapTimelineName(activeSpanName)
: { traceName: activeSpanName }, ? undefined
...sample, : { traceName: activeSpanName },
}); ...sample,
},
timelineOptions(),
);
}; };
const emit = ( const emit = (
name: string, name: string,
@@ -250,17 +264,24 @@ function createGatewayStartupTrace() {
} }
}; };
return { return {
setConfig(config: OpenClawConfig) {
timelineConfig = config;
ensureEventLoopDelay();
},
mark(name: string) { mark(name: string) {
const now = performance.now(); const now = performance.now();
const eventLoopSample = takeEventLoopSample(); const eventLoopSample = takeEventLoopSample();
emit(name, now - last, now - started, eventLoopSample); emit(name, now - last, now - started, eventLoopSample);
emitDiagnosticsTimelineEvent({ emitDiagnosticsTimelineEvent(
type: "mark", {
name: mapTimelineName(name), type: "mark",
phase: "startup", name: mapTimelineName(name),
durationMs: now - started, phase: "startup",
attributes: name === mapTimelineName(name) ? undefined : { traceName: name }, durationMs: now - started,
}); attributes: name === mapTimelineName(name) ? undefined : { traceName: name },
},
timelineOptions(),
);
emitEventLoopTimelineSample(name, eventLoopSample); emitEventLoopTimelineSample(name, eventLoopSample);
last = now; last = now;
if (name === "ready") { if (name === "ready") {
@@ -274,50 +295,62 @@ function createGatewayStartupTrace() {
`startup trace: ${name} ${metrics.map(([key, value]) => formatMetric(key, value)).join(" ")}`, `startup trace: ${name} ${metrics.map(([key, value]) => formatMetric(key, value)).join(" ")}`,
); );
} }
emitDiagnosticsTimelineEvent({ emitDiagnosticsTimelineEvent(
type: "mark", {
name: mapTimelineName(name), type: "mark",
phase: "startup", name: mapTimelineName(name),
attributes: { phase: "startup",
traceName: name, attributes: {
...attributes, traceName: name,
...attributes,
},
}, },
}); timelineOptions(),
);
}, },
async measure<T>(name: string, run: () => Promise<T> | T): Promise<T> { async measure<T>(name: string, run: () => Promise<T> | T): Promise<T> {
const before = performance.now(); const before = performance.now();
const spanId = `gateway-startup-${++spanSequence}`; const spanId = `gateway-startup-${++spanSequence}`;
emitDiagnosticsTimelineEvent({ emitDiagnosticsTimelineEvent(
type: "span.start", {
name: mapTimelineName(name), type: "span.start",
phase: "startup", name: mapTimelineName(name),
spanId, phase: "startup",
attributes: name === mapTimelineName(name) ? undefined : { traceName: name }, spanId,
}); attributes: name === mapTimelineName(name) ? undefined : { traceName: name },
},
timelineOptions(),
);
try { try {
const result = await run(); const result = await run();
const now = performance.now(); const now = performance.now();
emitDiagnosticsTimelineEvent({ emitDiagnosticsTimelineEvent(
type: "span.end", {
name: mapTimelineName(name), type: "span.end",
phase: "startup", name: mapTimelineName(name),
spanId, phase: "startup",
durationMs: now - before, spanId,
attributes: name === mapTimelineName(name) ? undefined : { traceName: name }, durationMs: now - before,
}); attributes: name === mapTimelineName(name) ? undefined : { traceName: name },
},
timelineOptions(),
);
return result; return result;
} catch (error) { } catch (error) {
const now = performance.now(); const now = performance.now();
emitDiagnosticsTimelineEvent({ emitDiagnosticsTimelineEvent(
type: "span.error", {
name: mapTimelineName(name), type: "span.error",
phase: "startup", name: mapTimelineName(name),
spanId, phase: "startup",
durationMs: now - before, spanId,
attributes: name === mapTimelineName(name) ? undefined : { traceName: name }, durationMs: now - before,
errorName: error instanceof Error ? error.name : typeof error, attributes: name === mapTimelineName(name) ? undefined : { traceName: name },
errorMessage: error instanceof Error ? error.message : String(error), errorName: error instanceof Error ? error.name : typeof error,
}); errorMessage: error instanceof Error ? error.message : String(error),
},
timelineOptions(),
);
throw error; throw error;
} finally { } finally {
const now = performance.now(); const now = performance.now();
@@ -466,6 +499,7 @@ export async function startGatewayServer(
let startupLastGoodSnapshot = configSnapshot; let startupLastGoodSnapshot = configSnapshot;
const startupActivationSourceConfig = configSnapshot.sourceConfig; const startupActivationSourceConfig = configSnapshot.sourceConfig;
const startupRuntimeConfig = applyConfigOverrides(configSnapshot.config); const startupRuntimeConfig = applyConfigOverrides(configSnapshot.config);
startupTrace.setConfig(startupRuntimeConfig);
const authBootstrap = await startupTrace.measure("config.auth", () => const authBootstrap = await startupTrace.measure("config.auth", () =>
prepareGatewayStartupConfig({ prepareGatewayStartupConfig({
configSnapshot, configSnapshot,
@@ -476,6 +510,7 @@ export async function startGatewayServer(
}), }),
); );
cfgAtStart = authBootstrap.cfg; cfgAtStart = authBootstrap.cfg;
startupTrace.setConfig(cfgAtStart);
if (authBootstrap.generatedToken) { if (authBootstrap.generatedToken) {
if (authBootstrap.persistedGeneratedToken) { if (authBootstrap.persistedGeneratedToken) {
log.info( log.info(

View File

@@ -2,6 +2,7 @@ import { mkdtemp, readFile, rm } from "node:fs/promises";
import { tmpdir } from "node:os"; import { tmpdir } from "node:os";
import { join } from "node:path"; import { join } from "node:path";
import { afterEach, describe, expect, it } from "vitest"; import { afterEach, describe, expect, it } from "vitest";
import type { OpenClawConfig } from "../config/types.openclaw.js";
import { import {
emitDiagnosticsTimelineEvent, emitDiagnosticsTimelineEvent,
flushDiagnosticsTimelineForTest, flushDiagnosticsTimelineForTest,
@@ -42,20 +43,47 @@ describe("diagnostics timeline", () => {
it("detects when timeline output is enabled", async () => { it("detects when timeline output is enabled", async () => {
const { env } = await createTimelineEnv(); const { env } = await createTimelineEnv();
expect(isDiagnosticsTimelineEnabled(env)).toBe(true); expect(isDiagnosticsTimelineEnabled({ env })).toBe(true);
expect(isDiagnosticsTimelineEnabled({ ...env, OPENCLAW_DIAGNOSTICS: "1" })).toBe(true); expect(isDiagnosticsTimelineEnabled({ env: { ...env, OPENCLAW_DIAGNOSTICS: "1" } })).toBe(true);
expect(isDiagnosticsTimelineEnabled({ ...env, OPENCLAW_DIAGNOSTICS: "all" })).toBe(true); expect(isDiagnosticsTimelineEnabled({ env: { ...env, OPENCLAW_DIAGNOSTICS: "all" } })).toBe(
expect(isDiagnosticsTimelineEnabled({ ...env, OPENCLAW_DIAGNOSTICS: "*" })).toBe(true); true,
);
expect(isDiagnosticsTimelineEnabled({ env: { ...env, OPENCLAW_DIAGNOSTICS: "*" } })).toBe(true);
expect( expect(
isDiagnosticsTimelineEnabled({ ...env, OPENCLAW_DIAGNOSTICS: "diagnostics.timeline" }), isDiagnosticsTimelineEnabled({
env: { ...env, OPENCLAW_DIAGNOSTICS: "diagnostics.timeline" },
}),
).toBe(true); ).toBe(true);
expect(isDiagnosticsTimelineEnabled({ ...env, OPENCLAW_DIAGNOSTICS: "telegram.http" })).toBe( expect(
isDiagnosticsTimelineEnabled({ env: { ...env, OPENCLAW_DIAGNOSTICS: "telegram.http" } }),
).toBe(false);
expect(isDiagnosticsTimelineEnabled({ env: { ...env, OPENCLAW_DIAGNOSTICS: "0" } })).toBe(
false, false,
); );
expect(isDiagnosticsTimelineEnabled({ ...env, OPENCLAW_DIAGNOSTICS: "0" })).toBe(false); expect(
expect(isDiagnosticsTimelineEnabled({ ...env, OPENCLAW_DIAGNOSTICS_TIMELINE_PATH: "" })).toBe( isDiagnosticsTimelineEnabled({
false, env: { ...env, OPENCLAW_DIAGNOSTICS_TIMELINE_PATH: "" },
}),
).toBe(false);
});
it("honors config diagnostics flags after config is available", async () => {
const { env } = await createTimelineEnv();
const envWithoutFlag = { ...env };
delete envWithoutFlag.OPENCLAW_DIAGNOSTICS;
const configWithTimeline = { diagnostics: { flags: ["timeline"] } } as OpenClawConfig;
const configWithWildcard = { diagnostics: { flags: ["*"] } } as OpenClawConfig;
const configWithoutTimeline = { diagnostics: { flags: ["telegram.http"] } } as OpenClawConfig;
expect(isDiagnosticsTimelineEnabled({ config: configWithTimeline, env: envWithoutFlag })).toBe(
true,
); );
expect(isDiagnosticsTimelineEnabled({ config: configWithWildcard, env: envWithoutFlag })).toBe(
true,
);
expect(
isDiagnosticsTimelineEnabled({ config: configWithoutTimeline, env: envWithoutFlag }),
).toBe(false);
}); });
it("writes JSONL diagnostic events with the stable envelope", async () => { it("writes JSONL diagnostic events with the stable envelope", async () => {
@@ -72,7 +100,7 @@ describe("diagnostics timeline", () => {
ignored: Number.NaN, ignored: Number.NaN,
}, },
}, },
env, { env },
); );
const [event] = await readTimeline(path); const [event] = await readTimeline(path);
@@ -95,12 +123,15 @@ describe("diagnostics timeline", () => {
it("records span start and end events around successful work", async () => { it("records span start and end events around successful work", async () => {
const { env, path } = await createTimelineEnv(); const { env, path } = await createTimelineEnv();
const configOnlyEnv = { ...env };
delete configOnlyEnv.OPENCLAW_DIAGNOSTICS;
await expect( await expect(
measureDiagnosticsTimelineSpan("runtimeDeps.stage", () => "ok", { measureDiagnosticsTimelineSpan("runtimeDeps.stage", () => "ok", {
env,
phase: "startup", phase: "startup",
attributes: { pluginCount: 3 }, attributes: { pluginCount: 3 },
config: { diagnostics: { flags: ["timeline"] } } as OpenClawConfig,
env: configOnlyEnv,
}), }),
).resolves.toBe("ok"); ).resolves.toBe("ok");

View File

@@ -2,6 +2,7 @@ import { randomUUID } from "node:crypto";
import { appendFileSync, mkdirSync } from "node:fs"; import { appendFileSync, mkdirSync } from "node:fs";
import { dirname } from "node:path"; import { dirname } from "node:path";
import { performance } from "node:perf_hooks"; import { performance } from "node:perf_hooks";
import type { OpenClawConfig } from "../config/types.openclaw.js";
import { isDiagnosticFlagEnabled } from "./diagnostic-flags.js"; import { isDiagnosticFlagEnabled } from "./diagnostic-flags.js";
export const OPENCLAW_DIAGNOSTICS_TIMELINE_SCHEMA_VERSION = "openclaw.diagnostics.v1"; export const OPENCLAW_DIAGNOSTICS_TIMELINE_SCHEMA_VERSION = "openclaw.diagnostics.v1";
@@ -48,16 +49,32 @@ type DiagnosticsTimelineSpanOptions = {
phase?: string; phase?: string;
parentSpanId?: string; parentSpanId?: string;
attributes?: DiagnosticsTimelineAttributes; attributes?: DiagnosticsTimelineAttributes;
config?: OpenClawConfig;
env?: NodeJS.ProcessEnv;
};
type DiagnosticsTimelineOptions = {
config?: OpenClawConfig;
env?: NodeJS.ProcessEnv; env?: NodeJS.ProcessEnv;
}; };
let warnedAboutTimelineWrite = false; let warnedAboutTimelineWrite = false;
const createdTimelineDirs = new Set<string>(); const createdTimelineDirs = new Set<string>();
export function isDiagnosticsTimelineEnabled(env: NodeJS.ProcessEnv = process.env): boolean { function resolveDiagnosticsTimelineOptions(
options: DiagnosticsTimelineOptions = {},
): Required<Pick<DiagnosticsTimelineOptions, "env">> & Pick<DiagnosticsTimelineOptions, "config"> {
return {
env: options.env ?? process.env,
...(options.config ? { config: options.config } : {}),
};
}
export function isDiagnosticsTimelineEnabled(options: DiagnosticsTimelineOptions = {}): boolean {
const { config, env } = resolveDiagnosticsTimelineOptions(options);
return ( return (
(isDiagnosticFlagEnabled("timeline", undefined, env) || (isDiagnosticFlagEnabled("timeline", config, env) ||
isDiagnosticFlagEnabled("diagnostics.timeline", undefined, env)) && isDiagnosticFlagEnabled("diagnostics.timeline", config, env)) &&
typeof env.OPENCLAW_DIAGNOSTICS_TIMELINE_PATH === "string" && typeof env.OPENCLAW_DIAGNOSTICS_TIMELINE_PATH === "string" &&
env.OPENCLAW_DIAGNOSTICS_TIMELINE_PATH.trim().length > 0 env.OPENCLAW_DIAGNOSTICS_TIMELINE_PATH.trim().length > 0
); );
@@ -131,9 +148,10 @@ function serializeTimelineEvent(event: DiagnosticsTimelineEvent, env: NodeJS.Pro
export function emitDiagnosticsTimelineEvent( export function emitDiagnosticsTimelineEvent(
event: DiagnosticsTimelineEvent, event: DiagnosticsTimelineEvent,
env: NodeJS.ProcessEnv = process.env, options: DiagnosticsTimelineOptions = {},
): void { ): void {
if (!isDiagnosticsTimelineEnabled(env)) { const { env } = resolveDiagnosticsTimelineOptions(options);
if (!isDiagnosticsTimelineEnabled(options)) {
return; return;
} }
const path = env.OPENCLAW_DIAGNOSTICS_TIMELINE_PATH?.trim(); const path = env.OPENCLAW_DIAGNOSTICS_TIMELINE_PATH?.trim();
@@ -162,7 +180,7 @@ export async function measureDiagnosticsTimelineSpan<T>(
options: DiagnosticsTimelineSpanOptions = {}, options: DiagnosticsTimelineSpanOptions = {},
): Promise<T> { ): Promise<T> {
const env = options.env ?? process.env; const env = options.env ?? process.env;
if (!isDiagnosticsTimelineEnabled(env)) { if (!isDiagnosticsTimelineEnabled({ config: options.config, env })) {
return await run(); return await run();
} }
const spanId = randomUUID(); const spanId = randomUUID();
@@ -176,7 +194,7 @@ export async function measureDiagnosticsTimelineSpan<T>(
parentSpanId: options.parentSpanId, parentSpanId: options.parentSpanId,
attributes: options.attributes, attributes: options.attributes,
}, },
env, { config: options.config, env },
); );
try { try {
const result = await run(); const result = await run();
@@ -190,7 +208,7 @@ export async function measureDiagnosticsTimelineSpan<T>(
durationMs: performance.now() - startedAt, durationMs: performance.now() - startedAt,
attributes: options.attributes, attributes: options.attributes,
}, },
env, { config: options.config, env },
); );
return result; return result;
} catch (error) { } catch (error) {
@@ -206,7 +224,7 @@ export async function measureDiagnosticsTimelineSpan<T>(
errorName: error instanceof Error ? error.name : typeof error, errorName: error instanceof Error ? error.name : typeof error,
errorMessage: error instanceof Error ? error.message : String(error), errorMessage: error instanceof Error ? error.message : String(error),
}, },
env, { config: options.config, env },
); );
throw error; throw error;
} }
@@ -218,7 +236,7 @@ export function measureDiagnosticsTimelineSpanSync<T>(
options: DiagnosticsTimelineSpanOptions = {}, options: DiagnosticsTimelineSpanOptions = {},
): T { ): T {
const env = options.env ?? process.env; const env = options.env ?? process.env;
if (!isDiagnosticsTimelineEnabled(env)) { if (!isDiagnosticsTimelineEnabled({ config: options.config, env })) {
return run(); return run();
} }
const spanId = randomUUID(); const spanId = randomUUID();
@@ -232,7 +250,7 @@ export function measureDiagnosticsTimelineSpanSync<T>(
parentSpanId: options.parentSpanId, parentSpanId: options.parentSpanId,
attributes: options.attributes, attributes: options.attributes,
}, },
env, { config: options.config, env },
); );
try { try {
const result = run(); const result = run();
@@ -246,7 +264,7 @@ export function measureDiagnosticsTimelineSpanSync<T>(
durationMs: performance.now() - startedAt, durationMs: performance.now() - startedAt,
attributes: options.attributes, attributes: options.attributes,
}, },
env, { config: options.config, env },
); );
return result; return result;
} catch (error) { } catch (error) {
@@ -262,7 +280,7 @@ export function measureDiagnosticsTimelineSpanSync<T>(
errorName: error instanceof Error ? error.name : typeof error, errorName: error instanceof Error ? error.name : typeof error,
errorMessage: error instanceof Error ? error.message : String(error), errorMessage: error instanceof Error ? error.message : String(error),
}, },
env, { config: options.config, env },
); );
throw error; throw error;
} }

View File

@@ -2142,6 +2142,7 @@ export function loadOpenClawPlugins(options: PluginLoadOptions = {}): PluginRegi
() => installer(installParams), () => installer(installParams),
{ {
phase: "startup", phase: "startup",
config: cfg,
env, env,
attributes: { attributes: {
pluginId: record.id, pluginId: record.id,

View File

@@ -130,6 +130,7 @@ export function loadPluginMetadataSnapshot(
() => loadPluginMetadataSnapshotImpl(params), () => loadPluginMetadataSnapshotImpl(params),
{ {
phase: "startup", phase: "startup",
config: params.config,
env: params.env, env: params.env,
attributes: { attributes: {
hasWorkspaceDir: params.workspaceDir !== undefined, hasWorkspaceDir: params.workspaceDir !== undefined,