fix: honor config timeline diagnostics

This commit is contained in:
Shakker
2026-04-29 19:50:30 +01:00
parent d001c3436b
commit e69da9d578
8 changed files with 185 additions and 83 deletions

View File

@@ -112,7 +112,7 @@ Inline `--password` can be exposed in local process listings. Prefer `--password
### Startup profiling
- Set `OPENCLAW_GATEWAY_STARTUP_TRACE=1` to log phase timings during Gateway startup, including per-phase `eventLoopMax` delay and plugin lookup-table timings for installed-index, manifest registry, startup planning, and owner-map work.
- Set `OPENCLAW_DIAGNOSTICS=timeline` with `OPENCLAW_DIAGNOSTICS_TIMELINE_PATH=<path>` to write a best-effort JSONL startup diagnostics timeline for external QA harnesses. Add `OPENCLAW_DIAGNOSTICS_EVENT_LOOP=1` to include event-loop samples.
- Set `OPENCLAW_DIAGNOSTICS=timeline` with `OPENCLAW_DIAGNOSTICS_TIMELINE_PATH=<path>` to write a best-effort JSONL startup diagnostics timeline for external QA harnesses. You can also enable the flag with `diagnostics.flags: ["timeline"]` in config; the path is still env-provided. Add `OPENCLAW_DIAGNOSTICS_EVENT_LOOP=1` to include event-loop samples.
- Run `pnpm test:startup:gateway -- --runs 5 --warmup 1` to benchmark Gateway startup. The benchmark records first process output, `/healthz`, `/readyz`, startup trace timings, event-loop delay, and plugin lookup-table timing details.
## Query a running Gateway

View File

@@ -61,6 +61,21 @@ OPENCLAW_DIAGNOSTICS_TIMELINE_PATH=/tmp/openclaw-timeline.jsonl \
openclaw gateway run
```
You can also enable it in config:
```json
{
"diagnostics": {
"flags": ["timeline"]
}
}
```
The timeline file path still comes from
`OPENCLAW_DIAGNOSTICS_TIMELINE_PATH`. When `timeline` is enabled only from
config, the earliest config-loading spans are not emitted because OpenClaw has
not read config yet; subsequent startup spans use the config flag.
`OPENCLAW_DIAGNOSTICS=1`, `OPENCLAW_DIAGNOSTICS=all`, and
`OPENCLAW_DIAGNOSTICS=*` also enable the timeline because they enable every
diagnostics flag. Prefer `timeline` when you only want the JSONL timing

View File

@@ -50,6 +50,7 @@ async function prestageGatewayBundledRuntimeDeps(params: {
() => prestageGatewayBundledRuntimeDepsImpl(params),
{
phase: "startup",
config: params.cfg,
attributes: {
pluginCount: params.pluginIds.length,
},

View File

@@ -169,12 +169,23 @@ const canvasRuntime = runtimeForLogger(logCanvas);
function createGatewayStartupTrace() {
const logEnabled = isTruthyEnvValue(process.env.OPENCLAW_GATEWAY_STARTUP_TRACE);
const timelineEnabled = isDiagnosticsTimelineEnabled();
const eventLoopTimelineEnabled =
timelineEnabled && isTruthyEnvValue(process.env.OPENCLAW_DIAGNOSTICS_EVENT_LOOP);
const eventLoopDelay =
logEnabled || eventLoopTimelineEnabled ? monitorEventLoopDelay({ resolution: 10 }) : undefined;
eventLoopDelay?.enable();
let timelineConfig: OpenClawConfig | undefined;
let eventLoopDelay: ReturnType<typeof monitorEventLoopDelay> | undefined;
const timelineOptions = () => ({
...(timelineConfig ? { config: timelineConfig } : {}),
env: process.env,
});
const eventLoopTimelineEnabled = () =>
isDiagnosticsTimelineEnabled(timelineOptions()) &&
isTruthyEnvValue(process.env.OPENCLAW_DIAGNOSTICS_EVENT_LOOP);
const ensureEventLoopDelay = () => {
if (eventLoopDelay || (!logEnabled && !eventLoopTimelineEnabled())) {
return;
}
eventLoopDelay = monitorEventLoopDelay({ resolution: 10 });
eventLoopDelay.enable();
};
ensureEventLoopDelay();
const started = performance.now();
let last = started;
let spanSequence = 0;
@@ -214,23 +225,26 @@ function createGatewayStartupTrace() {
activeSpanName: string,
sample: ReturnType<typeof takeEventLoopSample>,
) => {
if (!eventLoopTimelineEnabled) {
if (!eventLoopTimelineEnabled()) {
return;
}
if (!sample) {
return;
}
emitDiagnosticsTimelineEvent({
type: "eventLoop.sample",
name: "eventLoop",
phase: "startup",
activeSpanName: mapTimelineName(activeSpanName),
attributes:
activeSpanName === mapTimelineName(activeSpanName)
? undefined
: { traceName: activeSpanName },
...sample,
});
emitDiagnosticsTimelineEvent(
{
type: "eventLoop.sample",
name: "eventLoop",
phase: "startup",
activeSpanName: mapTimelineName(activeSpanName),
attributes:
activeSpanName === mapTimelineName(activeSpanName)
? undefined
: { traceName: activeSpanName },
...sample,
},
timelineOptions(),
);
};
const emit = (
name: string,
@@ -250,17 +264,24 @@ function createGatewayStartupTrace() {
}
};
return {
setConfig(config: OpenClawConfig) {
timelineConfig = config;
ensureEventLoopDelay();
},
mark(name: string) {
const now = performance.now();
const eventLoopSample = takeEventLoopSample();
emit(name, now - last, now - started, eventLoopSample);
emitDiagnosticsTimelineEvent({
type: "mark",
name: mapTimelineName(name),
phase: "startup",
durationMs: now - started,
attributes: name === mapTimelineName(name) ? undefined : { traceName: name },
});
emitDiagnosticsTimelineEvent(
{
type: "mark",
name: mapTimelineName(name),
phase: "startup",
durationMs: now - started,
attributes: name === mapTimelineName(name) ? undefined : { traceName: name },
},
timelineOptions(),
);
emitEventLoopTimelineSample(name, eventLoopSample);
last = now;
if (name === "ready") {
@@ -274,50 +295,62 @@ function createGatewayStartupTrace() {
`startup trace: ${name} ${metrics.map(([key, value]) => formatMetric(key, value)).join(" ")}`,
);
}
emitDiagnosticsTimelineEvent({
type: "mark",
name: mapTimelineName(name),
phase: "startup",
attributes: {
traceName: name,
...attributes,
emitDiagnosticsTimelineEvent(
{
type: "mark",
name: mapTimelineName(name),
phase: "startup",
attributes: {
traceName: name,
...attributes,
},
},
});
timelineOptions(),
);
},
async measure<T>(name: string, run: () => Promise<T> | T): Promise<T> {
const before = performance.now();
const spanId = `gateway-startup-${++spanSequence}`;
emitDiagnosticsTimelineEvent({
type: "span.start",
name: mapTimelineName(name),
phase: "startup",
spanId,
attributes: name === mapTimelineName(name) ? undefined : { traceName: name },
});
emitDiagnosticsTimelineEvent(
{
type: "span.start",
name: mapTimelineName(name),
phase: "startup",
spanId,
attributes: name === mapTimelineName(name) ? undefined : { traceName: name },
},
timelineOptions(),
);
try {
const result = await run();
const now = performance.now();
emitDiagnosticsTimelineEvent({
type: "span.end",
name: mapTimelineName(name),
phase: "startup",
spanId,
durationMs: now - before,
attributes: name === mapTimelineName(name) ? undefined : { traceName: name },
});
emitDiagnosticsTimelineEvent(
{
type: "span.end",
name: mapTimelineName(name),
phase: "startup",
spanId,
durationMs: now - before,
attributes: name === mapTimelineName(name) ? undefined : { traceName: name },
},
timelineOptions(),
);
return result;
} catch (error) {
const now = performance.now();
emitDiagnosticsTimelineEvent({
type: "span.error",
name: mapTimelineName(name),
phase: "startup",
spanId,
durationMs: now - before,
attributes: name === mapTimelineName(name) ? undefined : { traceName: name },
errorName: error instanceof Error ? error.name : typeof error,
errorMessage: error instanceof Error ? error.message : String(error),
});
emitDiagnosticsTimelineEvent(
{
type: "span.error",
name: mapTimelineName(name),
phase: "startup",
spanId,
durationMs: now - before,
attributes: name === mapTimelineName(name) ? undefined : { traceName: name },
errorName: error instanceof Error ? error.name : typeof error,
errorMessage: error instanceof Error ? error.message : String(error),
},
timelineOptions(),
);
throw error;
} finally {
const now = performance.now();
@@ -466,6 +499,7 @@ export async function startGatewayServer(
let startupLastGoodSnapshot = configSnapshot;
const startupActivationSourceConfig = configSnapshot.sourceConfig;
const startupRuntimeConfig = applyConfigOverrides(configSnapshot.config);
startupTrace.setConfig(startupRuntimeConfig);
const authBootstrap = await startupTrace.measure("config.auth", () =>
prepareGatewayStartupConfig({
configSnapshot,
@@ -476,6 +510,7 @@ export async function startGatewayServer(
}),
);
cfgAtStart = authBootstrap.cfg;
startupTrace.setConfig(cfgAtStart);
if (authBootstrap.generatedToken) {
if (authBootstrap.persistedGeneratedToken) {
log.info(

View File

@@ -2,6 +2,7 @@ import { mkdtemp, readFile, rm } from "node:fs/promises";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { afterEach, describe, expect, it } from "vitest";
import type { OpenClawConfig } from "../config/types.openclaw.js";
import {
emitDiagnosticsTimelineEvent,
flushDiagnosticsTimelineForTest,
@@ -42,20 +43,47 @@ describe("diagnostics timeline", () => {
it("detects when timeline output is enabled", async () => {
const { env } = await createTimelineEnv();
expect(isDiagnosticsTimelineEnabled(env)).toBe(true);
expect(isDiagnosticsTimelineEnabled({ ...env, OPENCLAW_DIAGNOSTICS: "1" })).toBe(true);
expect(isDiagnosticsTimelineEnabled({ ...env, OPENCLAW_DIAGNOSTICS: "all" })).toBe(true);
expect(isDiagnosticsTimelineEnabled({ ...env, OPENCLAW_DIAGNOSTICS: "*" })).toBe(true);
expect(isDiagnosticsTimelineEnabled({ env })).toBe(true);
expect(isDiagnosticsTimelineEnabled({ env: { ...env, OPENCLAW_DIAGNOSTICS: "1" } })).toBe(true);
expect(isDiagnosticsTimelineEnabled({ env: { ...env, OPENCLAW_DIAGNOSTICS: "all" } })).toBe(
true,
);
expect(isDiagnosticsTimelineEnabled({ env: { ...env, OPENCLAW_DIAGNOSTICS: "*" } })).toBe(true);
expect(
isDiagnosticsTimelineEnabled({ ...env, OPENCLAW_DIAGNOSTICS: "diagnostics.timeline" }),
isDiagnosticsTimelineEnabled({
env: { ...env, OPENCLAW_DIAGNOSTICS: "diagnostics.timeline" },
}),
).toBe(true);
expect(isDiagnosticsTimelineEnabled({ ...env, OPENCLAW_DIAGNOSTICS: "telegram.http" })).toBe(
expect(
isDiagnosticsTimelineEnabled({ env: { ...env, OPENCLAW_DIAGNOSTICS: "telegram.http" } }),
).toBe(false);
expect(isDiagnosticsTimelineEnabled({ env: { ...env, OPENCLAW_DIAGNOSTICS: "0" } })).toBe(
false,
);
expect(isDiagnosticsTimelineEnabled({ ...env, OPENCLAW_DIAGNOSTICS: "0" })).toBe(false);
expect(isDiagnosticsTimelineEnabled({ ...env, OPENCLAW_DIAGNOSTICS_TIMELINE_PATH: "" })).toBe(
false,
expect(
isDiagnosticsTimelineEnabled({
env: { ...env, OPENCLAW_DIAGNOSTICS_TIMELINE_PATH: "" },
}),
).toBe(false);
});
it("honors config diagnostics flags after config is available", async () => {
const { env } = await createTimelineEnv();
const envWithoutFlag = { ...env };
delete envWithoutFlag.OPENCLAW_DIAGNOSTICS;
const configWithTimeline = { diagnostics: { flags: ["timeline"] } } as OpenClawConfig;
const configWithWildcard = { diagnostics: { flags: ["*"] } } as OpenClawConfig;
const configWithoutTimeline = { diagnostics: { flags: ["telegram.http"] } } as OpenClawConfig;
expect(isDiagnosticsTimelineEnabled({ config: configWithTimeline, env: envWithoutFlag })).toBe(
true,
);
expect(isDiagnosticsTimelineEnabled({ config: configWithWildcard, env: envWithoutFlag })).toBe(
true,
);
expect(
isDiagnosticsTimelineEnabled({ config: configWithoutTimeline, env: envWithoutFlag }),
).toBe(false);
});
it("writes JSONL diagnostic events with the stable envelope", async () => {
@@ -72,7 +100,7 @@ describe("diagnostics timeline", () => {
ignored: Number.NaN,
},
},
env,
{ env },
);
const [event] = await readTimeline(path);
@@ -95,12 +123,15 @@ describe("diagnostics timeline", () => {
it("records span start and end events around successful work", async () => {
const { env, path } = await createTimelineEnv();
const configOnlyEnv = { ...env };
delete configOnlyEnv.OPENCLAW_DIAGNOSTICS;
await expect(
measureDiagnosticsTimelineSpan("runtimeDeps.stage", () => "ok", {
env,
phase: "startup",
attributes: { pluginCount: 3 },
config: { diagnostics: { flags: ["timeline"] } } as OpenClawConfig,
env: configOnlyEnv,
}),
).resolves.toBe("ok");

View File

@@ -2,6 +2,7 @@ import { randomUUID } from "node:crypto";
import { appendFileSync, mkdirSync } from "node:fs";
import { dirname } from "node:path";
import { performance } from "node:perf_hooks";
import type { OpenClawConfig } from "../config/types.openclaw.js";
import { isDiagnosticFlagEnabled } from "./diagnostic-flags.js";
export const OPENCLAW_DIAGNOSTICS_TIMELINE_SCHEMA_VERSION = "openclaw.diagnostics.v1";
@@ -48,16 +49,32 @@ type DiagnosticsTimelineSpanOptions = {
phase?: string;
parentSpanId?: string;
attributes?: DiagnosticsTimelineAttributes;
config?: OpenClawConfig;
env?: NodeJS.ProcessEnv;
};
type DiagnosticsTimelineOptions = {
config?: OpenClawConfig;
env?: NodeJS.ProcessEnv;
};
let warnedAboutTimelineWrite = false;
const createdTimelineDirs = new Set<string>();
export function isDiagnosticsTimelineEnabled(env: NodeJS.ProcessEnv = process.env): boolean {
function resolveDiagnosticsTimelineOptions(
options: DiagnosticsTimelineOptions = {},
): Required<Pick<DiagnosticsTimelineOptions, "env">> & Pick<DiagnosticsTimelineOptions, "config"> {
return {
env: options.env ?? process.env,
...(options.config ? { config: options.config } : {}),
};
}
export function isDiagnosticsTimelineEnabled(options: DiagnosticsTimelineOptions = {}): boolean {
const { config, env } = resolveDiagnosticsTimelineOptions(options);
return (
(isDiagnosticFlagEnabled("timeline", undefined, env) ||
isDiagnosticFlagEnabled("diagnostics.timeline", undefined, env)) &&
(isDiagnosticFlagEnabled("timeline", config, env) ||
isDiagnosticFlagEnabled("diagnostics.timeline", config, env)) &&
typeof env.OPENCLAW_DIAGNOSTICS_TIMELINE_PATH === "string" &&
env.OPENCLAW_DIAGNOSTICS_TIMELINE_PATH.trim().length > 0
);
@@ -131,9 +148,10 @@ function serializeTimelineEvent(event: DiagnosticsTimelineEvent, env: NodeJS.Pro
export function emitDiagnosticsTimelineEvent(
event: DiagnosticsTimelineEvent,
env: NodeJS.ProcessEnv = process.env,
options: DiagnosticsTimelineOptions = {},
): void {
if (!isDiagnosticsTimelineEnabled(env)) {
const { env } = resolveDiagnosticsTimelineOptions(options);
if (!isDiagnosticsTimelineEnabled(options)) {
return;
}
const path = env.OPENCLAW_DIAGNOSTICS_TIMELINE_PATH?.trim();
@@ -162,7 +180,7 @@ export async function measureDiagnosticsTimelineSpan<T>(
options: DiagnosticsTimelineSpanOptions = {},
): Promise<T> {
const env = options.env ?? process.env;
if (!isDiagnosticsTimelineEnabled(env)) {
if (!isDiagnosticsTimelineEnabled({ config: options.config, env })) {
return await run();
}
const spanId = randomUUID();
@@ -176,7 +194,7 @@ export async function measureDiagnosticsTimelineSpan<T>(
parentSpanId: options.parentSpanId,
attributes: options.attributes,
},
env,
{ config: options.config, env },
);
try {
const result = await run();
@@ -190,7 +208,7 @@ export async function measureDiagnosticsTimelineSpan<T>(
durationMs: performance.now() - startedAt,
attributes: options.attributes,
},
env,
{ config: options.config, env },
);
return result;
} catch (error) {
@@ -206,7 +224,7 @@ export async function measureDiagnosticsTimelineSpan<T>(
errorName: error instanceof Error ? error.name : typeof error,
errorMessage: error instanceof Error ? error.message : String(error),
},
env,
{ config: options.config, env },
);
throw error;
}
@@ -218,7 +236,7 @@ export function measureDiagnosticsTimelineSpanSync<T>(
options: DiagnosticsTimelineSpanOptions = {},
): T {
const env = options.env ?? process.env;
if (!isDiagnosticsTimelineEnabled(env)) {
if (!isDiagnosticsTimelineEnabled({ config: options.config, env })) {
return run();
}
const spanId = randomUUID();
@@ -232,7 +250,7 @@ export function measureDiagnosticsTimelineSpanSync<T>(
parentSpanId: options.parentSpanId,
attributes: options.attributes,
},
env,
{ config: options.config, env },
);
try {
const result = run();
@@ -246,7 +264,7 @@ export function measureDiagnosticsTimelineSpanSync<T>(
durationMs: performance.now() - startedAt,
attributes: options.attributes,
},
env,
{ config: options.config, env },
);
return result;
} catch (error) {
@@ -262,7 +280,7 @@ export function measureDiagnosticsTimelineSpanSync<T>(
errorName: error instanceof Error ? error.name : typeof error,
errorMessage: error instanceof Error ? error.message : String(error),
},
env,
{ config: options.config, env },
);
throw error;
}

View File

@@ -2142,6 +2142,7 @@ export function loadOpenClawPlugins(options: PluginLoadOptions = {}): PluginRegi
() => installer(installParams),
{
phase: "startup",
config: cfg,
env,
attributes: {
pluginId: record.id,

View File

@@ -130,6 +130,7 @@ export function loadPluginMetadataSnapshot(
() => loadPluginMetadataSnapshotImpl(params),
{
phase: "startup",
config: params.config,
env: params.env,
attributes: {
hasWorkspaceDir: params.workspaceDir !== undefined,