mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 09:50:42 +00:00
feat: emit diagnostics timeline
This commit is contained in:
@@ -112,6 +112,7 @@ Inline `--password` can be exposed in local process listings. Prefer `--password
|
|||||||
### Startup profiling
|
### Startup profiling
|
||||||
|
|
||||||
- Set `OPENCLAW_GATEWAY_STARTUP_TRACE=1` to log phase timings during Gateway startup, including per-phase `eventLoopMax` delay and plugin lookup-table timings for installed-index, manifest registry, startup planning, and owner-map work.
|
- Set `OPENCLAW_GATEWAY_STARTUP_TRACE=1` to log phase timings during Gateway startup, including per-phase `eventLoopMax` delay and plugin lookup-table timings for installed-index, manifest registry, startup planning, and owner-map work.
|
||||||
|
- Set `OPENCLAW_DIAGNOSTICS=1` with `OPENCLAW_DIAGNOSTICS_TIMELINE_PATH=<path>` to write a best-effort JSONL startup diagnostics timeline for external QA harnesses. Add `OPENCLAW_DIAGNOSTICS_EVENT_LOOP=1` to include event-loop samples.
|
||||||
- Run `pnpm test:startup:gateway -- --runs 5 --warmup 1` to benchmark Gateway startup. The benchmark records first process output, `/healthz`, `/readyz`, startup trace timings, event-loop delay, and plugin lookup-table timing details.
|
- Run `pnpm test:startup:gateway -- --runs 5 --warmup 1` to benchmark Gateway startup. The benchmark records first process output, `/healthz`, `/readyz`, startup trace timings, event-loop delay, and plugin lookup-table timing details.
|
||||||
|
|
||||||
## Query a running Gateway
|
## Query a running Gateway
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ import { initSubagentRegistry } from "../agents/subagent-registry.js";
|
|||||||
import { runChannelPluginStartupMaintenance } from "../channels/plugins/lifecycle-startup.js";
|
import { runChannelPluginStartupMaintenance } from "../channels/plugins/lifecycle-startup.js";
|
||||||
import { applyPluginAutoEnable } from "../config/plugin-auto-enable.js";
|
import { applyPluginAutoEnable } from "../config/plugin-auto-enable.js";
|
||||||
import type { OpenClawConfig } from "../config/types.openclaw.js";
|
import type { OpenClawConfig } from "../config/types.openclaw.js";
|
||||||
|
import { measureDiagnosticsTimelineSpan } from "../infra/diagnostics-timeline.js";
|
||||||
import { resolveOpenClawPackageRootSync } from "../infra/openclaw-root.js";
|
import { resolveOpenClawPackageRootSync } from "../infra/openclaw-root.js";
|
||||||
import {
|
import {
|
||||||
pruneUnknownBundledRuntimeDepsRoots,
|
pruneUnknownBundledRuntimeDepsRoots,
|
||||||
@@ -43,6 +44,23 @@ async function prestageGatewayBundledRuntimeDeps(params: {
|
|||||||
cfg: OpenClawConfig;
|
cfg: OpenClawConfig;
|
||||||
pluginIds: readonly string[];
|
pluginIds: readonly string[];
|
||||||
log: GatewayPluginBootstrapLog;
|
log: GatewayPluginBootstrapLog;
|
||||||
|
}): Promise<void> {
|
||||||
|
await measureDiagnosticsTimelineSpan(
|
||||||
|
"runtimeDeps.stage",
|
||||||
|
() => prestageGatewayBundledRuntimeDepsImpl(params),
|
||||||
|
{
|
||||||
|
phase: "startup",
|
||||||
|
attributes: {
|
||||||
|
pluginCount: params.pluginIds.length,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function prestageGatewayBundledRuntimeDepsImpl(params: {
|
||||||
|
cfg: OpenClawConfig;
|
||||||
|
pluginIds: readonly string[];
|
||||||
|
log: GatewayPluginBootstrapLog;
|
||||||
}): Promise<void> {
|
}): Promise<void> {
|
||||||
if (params.pluginIds.length === 0) {
|
if (params.pluginIds.length === 0) {
|
||||||
return;
|
return;
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
import { monitorEventLoopDelay } from "node:perf_hooks";
|
import { monitorEventLoopDelay, performance } from "node:perf_hooks";
|
||||||
import { getActiveEmbeddedRunCount } from "../agents/pi-embedded-runner/run-state.js";
|
import { getActiveEmbeddedRunCount } from "../agents/pi-embedded-runner/run-state.js";
|
||||||
import { getTotalPendingReplies } from "../auto-reply/reply/dispatcher-registry.js";
|
import { getTotalPendingReplies } from "../auto-reply/reply/dispatcher-registry.js";
|
||||||
import type { CanvasHostServer } from "../canvas-host/server.js";
|
import type { CanvasHostServer } from "../canvas-host/server.js";
|
||||||
@@ -24,6 +24,10 @@ import {
|
|||||||
isDiagnosticsEnabled,
|
isDiagnosticsEnabled,
|
||||||
setDiagnosticsEnabledForProcess,
|
setDiagnosticsEnabledForProcess,
|
||||||
} from "../infra/diagnostic-events.js";
|
} from "../infra/diagnostic-events.js";
|
||||||
|
import {
|
||||||
|
emitDiagnosticsTimelineEvent,
|
||||||
|
isDiagnosticsTimelineEnabled,
|
||||||
|
} from "../infra/diagnostics-timeline.js";
|
||||||
import { isTruthyEnvValue, isVitestRuntimeEnv, logAcceptedEnvOption } from "../infra/env.js";
|
import { isTruthyEnvValue, isVitestRuntimeEnv, logAcceptedEnvOption } from "../infra/env.js";
|
||||||
import { ensureOpenClawCliOnPath } from "../infra/path-env.js";
|
import { ensureOpenClawCliOnPath } from "../infra/path-env.js";
|
||||||
import { setGatewaySigusr1RestartPolicy, setPreRestartDeferralCheck } from "../infra/restart.js";
|
import { setGatewaySigusr1RestartPolicy, setPreRestartDeferralCheck } from "../infra/restart.js";
|
||||||
@@ -164,30 +168,80 @@ const gatewayRuntime = runtimeForLogger(log);
|
|||||||
const canvasRuntime = runtimeForLogger(logCanvas);
|
const canvasRuntime = runtimeForLogger(logCanvas);
|
||||||
|
|
||||||
function createGatewayStartupTrace() {
|
function createGatewayStartupTrace() {
|
||||||
const enabled = isTruthyEnvValue(process.env.OPENCLAW_GATEWAY_STARTUP_TRACE);
|
const logEnabled = isTruthyEnvValue(process.env.OPENCLAW_GATEWAY_STARTUP_TRACE);
|
||||||
const eventLoopDelay = enabled ? monitorEventLoopDelay({ resolution: 10 }) : undefined;
|
const timelineEnabled = isDiagnosticsTimelineEnabled();
|
||||||
|
const eventLoopTimelineEnabled =
|
||||||
|
timelineEnabled && isTruthyEnvValue(process.env.OPENCLAW_DIAGNOSTICS_EVENT_LOOP);
|
||||||
|
const eventLoopDelay =
|
||||||
|
logEnabled || eventLoopTimelineEnabled ? monitorEventLoopDelay({ resolution: 10 }) : undefined;
|
||||||
eventLoopDelay?.enable();
|
eventLoopDelay?.enable();
|
||||||
const started = performance.now();
|
const started = performance.now();
|
||||||
let last = started;
|
let last = started;
|
||||||
|
let spanSequence = 0;
|
||||||
const formatMetric = (key: string, value: number | string) =>
|
const formatMetric = (key: string, value: number | string) =>
|
||||||
`${key}=${typeof value === "number" ? value.toFixed(1) : value}`;
|
`${key}=${typeof value === "number" ? value.toFixed(1) : value}`;
|
||||||
const readEventLoopMaxMs = () => {
|
const mapTimelineName = (name: string) => {
|
||||||
if (!eventLoopDelay) {
|
switch (name) {
|
||||||
return 0;
|
case "config.snapshot":
|
||||||
|
return "config.load";
|
||||||
|
case "config.auth":
|
||||||
|
case "config.final-snapshot":
|
||||||
|
case "runtime.config":
|
||||||
|
return "config.normalize";
|
||||||
|
case "plugins.bootstrap":
|
||||||
|
return "plugins.load";
|
||||||
|
case "runtime.post-attach":
|
||||||
|
case "ready":
|
||||||
|
return "gateway.ready";
|
||||||
|
default:
|
||||||
|
return name;
|
||||||
}
|
}
|
||||||
const maxMs = eventLoopDelay.max / 1_000_000;
|
};
|
||||||
|
const takeEventLoopSample = () => {
|
||||||
|
if (!eventLoopDelay) {
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
const sample = {
|
||||||
|
p50Ms: eventLoopDelay.percentile(50) / 1_000_000,
|
||||||
|
p95Ms: eventLoopDelay.percentile(95) / 1_000_000,
|
||||||
|
p99Ms: eventLoopDelay.percentile(99) / 1_000_000,
|
||||||
|
maxMs: eventLoopDelay.max / 1_000_000,
|
||||||
|
};
|
||||||
eventLoopDelay.reset();
|
eventLoopDelay.reset();
|
||||||
return maxMs;
|
return sample;
|
||||||
|
};
|
||||||
|
const emitEventLoopTimelineSample = (
|
||||||
|
activeSpanName: string,
|
||||||
|
sample: ReturnType<typeof takeEventLoopSample>,
|
||||||
|
) => {
|
||||||
|
if (!eventLoopTimelineEnabled) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (!sample) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
emitDiagnosticsTimelineEvent({
|
||||||
|
type: "eventLoop.sample",
|
||||||
|
name: "eventLoop",
|
||||||
|
phase: "startup",
|
||||||
|
activeSpanName: mapTimelineName(activeSpanName),
|
||||||
|
attributes:
|
||||||
|
activeSpanName === mapTimelineName(activeSpanName)
|
||||||
|
? undefined
|
||||||
|
: { traceName: activeSpanName },
|
||||||
|
...sample,
|
||||||
|
});
|
||||||
};
|
};
|
||||||
const emit = (
|
const emit = (
|
||||||
name: string,
|
name: string,
|
||||||
durationMs: number,
|
durationMs: number,
|
||||||
totalMs: number,
|
totalMs: number,
|
||||||
|
eventLoopSample: ReturnType<typeof takeEventLoopSample>,
|
||||||
extras: ReadonlyArray<readonly [string, number | string]> = [],
|
extras: ReadonlyArray<readonly [string, number | string]> = [],
|
||||||
) => {
|
) => {
|
||||||
if (enabled) {
|
if (logEnabled) {
|
||||||
const metrics = [
|
const metrics = [
|
||||||
`eventLoopMax=${readEventLoopMaxMs().toFixed(1)}ms`,
|
`eventLoopMax=${(eventLoopSample?.maxMs ?? 0).toFixed(1)}ms`,
|
||||||
...extras.map(([key, value]) => formatMetric(key, value)),
|
...extras.map(([key, value]) => formatMetric(key, value)),
|
||||||
].join(" ");
|
].join(" ");
|
||||||
log.info(
|
log.info(
|
||||||
@@ -198,27 +252,78 @@ function createGatewayStartupTrace() {
|
|||||||
return {
|
return {
|
||||||
mark(name: string) {
|
mark(name: string) {
|
||||||
const now = performance.now();
|
const now = performance.now();
|
||||||
emit(name, now - last, now - started);
|
const eventLoopSample = takeEventLoopSample();
|
||||||
|
emit(name, now - last, now - started, eventLoopSample);
|
||||||
|
emitDiagnosticsTimelineEvent({
|
||||||
|
type: "mark",
|
||||||
|
name: mapTimelineName(name),
|
||||||
|
phase: "startup",
|
||||||
|
durationMs: now - started,
|
||||||
|
attributes: name === mapTimelineName(name) ? undefined : { traceName: name },
|
||||||
|
});
|
||||||
|
emitEventLoopTimelineSample(name, eventLoopSample);
|
||||||
last = now;
|
last = now;
|
||||||
if (name === "ready") {
|
if (name === "ready") {
|
||||||
eventLoopDelay?.disable();
|
eventLoopDelay?.disable();
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
detail(name: string, metrics: ReadonlyArray<readonly [string, number | string]>) {
|
detail(name: string, metrics: ReadonlyArray<readonly [string, number | string]>) {
|
||||||
if (!enabled) {
|
const attributes = Object.fromEntries(metrics);
|
||||||
return;
|
if (logEnabled) {
|
||||||
|
log.info(
|
||||||
|
`startup trace: ${name} ${metrics.map(([key, value]) => formatMetric(key, value)).join(" ")}`,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
log.info(
|
emitDiagnosticsTimelineEvent({
|
||||||
`startup trace: ${name} ${metrics.map(([key, value]) => formatMetric(key, value)).join(" ")}`,
|
type: "mark",
|
||||||
);
|
name: mapTimelineName(name),
|
||||||
|
phase: "startup",
|
||||||
|
attributes: {
|
||||||
|
traceName: name,
|
||||||
|
...attributes,
|
||||||
|
},
|
||||||
|
});
|
||||||
},
|
},
|
||||||
async measure<T>(name: string, run: () => Promise<T> | T): Promise<T> {
|
async measure<T>(name: string, run: () => Promise<T> | T): Promise<T> {
|
||||||
const before = performance.now();
|
const before = performance.now();
|
||||||
|
const spanId = `gateway-startup-${++spanSequence}`;
|
||||||
|
emitDiagnosticsTimelineEvent({
|
||||||
|
type: "span.start",
|
||||||
|
name: mapTimelineName(name),
|
||||||
|
phase: "startup",
|
||||||
|
spanId,
|
||||||
|
attributes: name === mapTimelineName(name) ? undefined : { traceName: name },
|
||||||
|
});
|
||||||
try {
|
try {
|
||||||
return await run();
|
const result = await run();
|
||||||
|
const now = performance.now();
|
||||||
|
emitDiagnosticsTimelineEvent({
|
||||||
|
type: "span.end",
|
||||||
|
name: mapTimelineName(name),
|
||||||
|
phase: "startup",
|
||||||
|
spanId,
|
||||||
|
durationMs: now - before,
|
||||||
|
attributes: name === mapTimelineName(name) ? undefined : { traceName: name },
|
||||||
|
});
|
||||||
|
return result;
|
||||||
|
} catch (error) {
|
||||||
|
const now = performance.now();
|
||||||
|
emitDiagnosticsTimelineEvent({
|
||||||
|
type: "span.error",
|
||||||
|
name: mapTimelineName(name),
|
||||||
|
phase: "startup",
|
||||||
|
spanId,
|
||||||
|
durationMs: now - before,
|
||||||
|
attributes: name === mapTimelineName(name) ? undefined : { traceName: name },
|
||||||
|
errorName: error instanceof Error ? error.name : typeof error,
|
||||||
|
errorMessage: error instanceof Error ? error.message : String(error),
|
||||||
|
});
|
||||||
|
throw error;
|
||||||
} finally {
|
} finally {
|
||||||
const now = performance.now();
|
const now = performance.now();
|
||||||
emit(name, now - before, now - started);
|
const eventLoopSample = takeEventLoopSample();
|
||||||
|
emit(name, now - before, now - started, eventLoopSample);
|
||||||
|
emitEventLoopTimelineSample(name, eventLoopSample);
|
||||||
last = now;
|
last = now;
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|||||||
160
src/infra/diagnostics-timeline.test.ts
Normal file
160
src/infra/diagnostics-timeline.test.ts
Normal file
@@ -0,0 +1,160 @@
|
|||||||
|
import { mkdtemp, readFile, rm } from "node:fs/promises";
|
||||||
|
import { tmpdir } from "node:os";
|
||||||
|
import { join } from "node:path";
|
||||||
|
import { afterEach, describe, expect, it } from "vitest";
|
||||||
|
import {
|
||||||
|
emitDiagnosticsTimelineEvent,
|
||||||
|
flushDiagnosticsTimelineForTest,
|
||||||
|
isDiagnosticsTimelineEnabled,
|
||||||
|
measureDiagnosticsTimelineSpan,
|
||||||
|
measureDiagnosticsTimelineSpanSync,
|
||||||
|
} from "./diagnostics-timeline.js";
|
||||||
|
|
||||||
|
const tempDirs: string[] = [];
|
||||||
|
|
||||||
|
async function createTimelineEnv() {
|
||||||
|
const dir = await mkdtemp(join(tmpdir(), "openclaw-diagnostics-timeline-"));
|
||||||
|
tempDirs.push(dir);
|
||||||
|
return {
|
||||||
|
env: {
|
||||||
|
OPENCLAW_DIAGNOSTICS: "1",
|
||||||
|
OPENCLAW_DIAGNOSTICS_RUN_ID: "run-1",
|
||||||
|
OPENCLAW_DIAGNOSTICS_ENV: "env-1",
|
||||||
|
OPENCLAW_DIAGNOSTICS_TIMELINE_PATH: join(dir, "nested", "timeline.jsonl"),
|
||||||
|
} as NodeJS.ProcessEnv,
|
||||||
|
path: join(dir, "nested", "timeline.jsonl"),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
async function readTimeline(path: string) {
|
||||||
|
await flushDiagnosticsTimelineForTest();
|
||||||
|
return (await readFile(path, "utf8"))
|
||||||
|
.trim()
|
||||||
|
.split("\n")
|
||||||
|
.map((line) => JSON.parse(line) as Record<string, unknown>);
|
||||||
|
}
|
||||||
|
|
||||||
|
afterEach(async () => {
|
||||||
|
await Promise.all(tempDirs.splice(0).map((dir) => rm(dir, { recursive: true, force: true })));
|
||||||
|
});
|
||||||
|
|
||||||
|
describe("diagnostics timeline", () => {
|
||||||
|
it("detects when timeline output is enabled", async () => {
|
||||||
|
const { env } = await createTimelineEnv();
|
||||||
|
|
||||||
|
expect(isDiagnosticsTimelineEnabled(env)).toBe(true);
|
||||||
|
expect(isDiagnosticsTimelineEnabled({ ...env, OPENCLAW_DIAGNOSTICS: "0" })).toBe(false);
|
||||||
|
expect(isDiagnosticsTimelineEnabled({ ...env, OPENCLAW_DIAGNOSTICS_TIMELINE_PATH: "" })).toBe(
|
||||||
|
false,
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("writes JSONL diagnostic events with the stable envelope", async () => {
|
||||||
|
const { env, path } = await createTimelineEnv();
|
||||||
|
|
||||||
|
emitDiagnosticsTimelineEvent(
|
||||||
|
{
|
||||||
|
type: "mark",
|
||||||
|
name: "gateway.ready",
|
||||||
|
phase: "startup",
|
||||||
|
attributes: {
|
||||||
|
ok: true,
|
||||||
|
count: 2,
|
||||||
|
ignored: Number.NaN,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
env,
|
||||||
|
);
|
||||||
|
|
||||||
|
const [event] = await readTimeline(path);
|
||||||
|
expect(event).toMatchObject({
|
||||||
|
schemaVersion: "openclaw.diagnostics.v1",
|
||||||
|
type: "mark",
|
||||||
|
name: "gateway.ready",
|
||||||
|
runId: "run-1",
|
||||||
|
envName: "env-1",
|
||||||
|
phase: "startup",
|
||||||
|
attributes: {
|
||||||
|
ok: true,
|
||||||
|
count: 2,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
expect(event?.timestamp).toEqual(expect.any(String));
|
||||||
|
expect(event?.pid).toEqual(expect.any(Number));
|
||||||
|
expect((event?.attributes as Record<string, unknown>).ignored).toBeUndefined();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("records span start and end events around successful work", async () => {
|
||||||
|
const { env, path } = await createTimelineEnv();
|
||||||
|
|
||||||
|
await expect(
|
||||||
|
measureDiagnosticsTimelineSpan("runtimeDeps.stage", () => "ok", {
|
||||||
|
env,
|
||||||
|
phase: "startup",
|
||||||
|
attributes: { pluginCount: 3 },
|
||||||
|
}),
|
||||||
|
).resolves.toBe("ok");
|
||||||
|
|
||||||
|
const events = await readTimeline(path);
|
||||||
|
expect(events).toHaveLength(2);
|
||||||
|
expect(events[0]).toMatchObject({
|
||||||
|
type: "span.start",
|
||||||
|
name: "runtimeDeps.stage",
|
||||||
|
phase: "startup",
|
||||||
|
attributes: { pluginCount: 3 },
|
||||||
|
});
|
||||||
|
expect(events[1]).toMatchObject({
|
||||||
|
type: "span.end",
|
||||||
|
name: "runtimeDeps.stage",
|
||||||
|
phase: "startup",
|
||||||
|
attributes: { pluginCount: 3 },
|
||||||
|
});
|
||||||
|
expect(events[1]?.spanId).toBe(events[0]?.spanId);
|
||||||
|
expect(events[1]?.durationMs).toEqual(expect.any(Number));
|
||||||
|
});
|
||||||
|
|
||||||
|
it("records span error events and rethrows failures", async () => {
|
||||||
|
const { env, path } = await createTimelineEnv();
|
||||||
|
|
||||||
|
await expect(
|
||||||
|
measureDiagnosticsTimelineSpan(
|
||||||
|
"plugins.load",
|
||||||
|
() => {
|
||||||
|
throw new TypeError("bad plugin");
|
||||||
|
},
|
||||||
|
{ env, phase: "startup" },
|
||||||
|
),
|
||||||
|
).rejects.toThrow("bad plugin");
|
||||||
|
|
||||||
|
const events = await readTimeline(path);
|
||||||
|
expect(events).toHaveLength(2);
|
||||||
|
expect(events[1]).toMatchObject({
|
||||||
|
type: "span.error",
|
||||||
|
name: "plugins.load",
|
||||||
|
phase: "startup",
|
||||||
|
errorName: "TypeError",
|
||||||
|
errorMessage: "bad plugin",
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it("records synchronous spans", async () => {
|
||||||
|
const { env, path } = await createTimelineEnv();
|
||||||
|
|
||||||
|
const result = measureDiagnosticsTimelineSpanSync("plugins.metadata.scan", () => 42, {
|
||||||
|
env,
|
||||||
|
phase: "startup",
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(result).toBe(42);
|
||||||
|
const events = await readTimeline(path);
|
||||||
|
expect(events).toHaveLength(2);
|
||||||
|
expect(events[0]).toMatchObject({
|
||||||
|
type: "span.start",
|
||||||
|
name: "plugins.metadata.scan",
|
||||||
|
});
|
||||||
|
expect(events[1]).toMatchObject({
|
||||||
|
type: "span.end",
|
||||||
|
name: "plugins.metadata.scan",
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
272
src/infra/diagnostics-timeline.ts
Normal file
272
src/infra/diagnostics-timeline.ts
Normal file
@@ -0,0 +1,272 @@
|
|||||||
|
import { randomUUID } from "node:crypto";
|
||||||
|
import { appendFileSync, mkdirSync } from "node:fs";
|
||||||
|
import { dirname } from "node:path";
|
||||||
|
import { performance } from "node:perf_hooks";
|
||||||
|
import { isTruthyEnvValue } from "./env.js";
|
||||||
|
|
||||||
|
export const OPENCLAW_DIAGNOSTICS_TIMELINE_SCHEMA_VERSION = "openclaw.diagnostics.v1";
|
||||||
|
|
||||||
|
export type DiagnosticsTimelineEventType =
|
||||||
|
| "span.start"
|
||||||
|
| "span.end"
|
||||||
|
| "span.error"
|
||||||
|
| "mark"
|
||||||
|
| "eventLoop.sample"
|
||||||
|
| "provider.request"
|
||||||
|
| "childProcess.exit";
|
||||||
|
|
||||||
|
export type DiagnosticsTimelineAttributes = Record<string, string | number | boolean | null>;
|
||||||
|
|
||||||
|
export type DiagnosticsTimelineEvent = {
|
||||||
|
type: DiagnosticsTimelineEventType;
|
||||||
|
name: string;
|
||||||
|
timestamp?: string;
|
||||||
|
runId?: string;
|
||||||
|
envName?: string;
|
||||||
|
pid?: number;
|
||||||
|
phase?: string;
|
||||||
|
spanId?: string;
|
||||||
|
parentSpanId?: string;
|
||||||
|
durationMs?: number;
|
||||||
|
attributes?: DiagnosticsTimelineAttributes;
|
||||||
|
errorName?: string;
|
||||||
|
errorMessage?: string;
|
||||||
|
p50Ms?: number;
|
||||||
|
p95Ms?: number;
|
||||||
|
p99Ms?: number;
|
||||||
|
maxMs?: number;
|
||||||
|
activeSpanName?: string;
|
||||||
|
provider?: string;
|
||||||
|
operation?: string;
|
||||||
|
ok?: boolean;
|
||||||
|
command?: string;
|
||||||
|
exitCode?: number | null;
|
||||||
|
signal?: string | null;
|
||||||
|
};
|
||||||
|
|
||||||
|
type DiagnosticsTimelineSpanOptions = {
|
||||||
|
phase?: string;
|
||||||
|
parentSpanId?: string;
|
||||||
|
attributes?: DiagnosticsTimelineAttributes;
|
||||||
|
env?: NodeJS.ProcessEnv;
|
||||||
|
};
|
||||||
|
|
||||||
|
let warnedAboutTimelineWrite = false;
|
||||||
|
const createdTimelineDirs = new Set<string>();
|
||||||
|
|
||||||
|
export function isDiagnosticsTimelineEnabled(env: NodeJS.ProcessEnv = process.env): boolean {
|
||||||
|
return (
|
||||||
|
isTruthyEnvValue(env.OPENCLAW_DIAGNOSTICS) &&
|
||||||
|
typeof env.OPENCLAW_DIAGNOSTICS_TIMELINE_PATH === "string" &&
|
||||||
|
env.OPENCLAW_DIAGNOSTICS_TIMELINE_PATH.trim().length > 0
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function normalizeNumber(value: number | undefined): number | undefined {
|
||||||
|
if (typeof value !== "number" || !Number.isFinite(value)) {
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
return Math.max(0, Math.round(value * 1000) / 1000);
|
||||||
|
}
|
||||||
|
|
||||||
|
function normalizeAttributes(
|
||||||
|
attributes: DiagnosticsTimelineAttributes | undefined,
|
||||||
|
): DiagnosticsTimelineAttributes | undefined {
|
||||||
|
if (!attributes) {
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
const normalized: DiagnosticsTimelineAttributes = {};
|
||||||
|
for (const [key, value] of Object.entries(attributes)) {
|
||||||
|
if (typeof value === "number") {
|
||||||
|
if (Number.isFinite(value)) {
|
||||||
|
normalized[key] = normalizeNumber(value) ?? 0;
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (typeof value === "string" || typeof value === "boolean" || value === null) {
|
||||||
|
normalized[key] = value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return Object.keys(normalized).length > 0 ? normalized : undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
function serializeTimelineEvent(event: DiagnosticsTimelineEvent, env: NodeJS.ProcessEnv): string {
|
||||||
|
const normalized = {
|
||||||
|
schemaVersion: OPENCLAW_DIAGNOSTICS_TIMELINE_SCHEMA_VERSION,
|
||||||
|
type: event.type,
|
||||||
|
timestamp: event.timestamp ?? new Date().toISOString(),
|
||||||
|
name: event.name,
|
||||||
|
...(env.OPENCLAW_DIAGNOSTICS_RUN_ID ? { runId: env.OPENCLAW_DIAGNOSTICS_RUN_ID } : {}),
|
||||||
|
...(env.OPENCLAW_DIAGNOSTICS_ENV ? { envName: env.OPENCLAW_DIAGNOSTICS_ENV } : {}),
|
||||||
|
pid: process.pid,
|
||||||
|
...(event.runId ? { runId: event.runId } : {}),
|
||||||
|
...(event.envName ? { envName: event.envName } : {}),
|
||||||
|
...(typeof event.pid === "number" ? { pid: event.pid } : {}),
|
||||||
|
...(event.phase ? { phase: event.phase } : {}),
|
||||||
|
...(event.spanId ? { spanId: event.spanId } : {}),
|
||||||
|
...(event.parentSpanId ? { parentSpanId: event.parentSpanId } : {}),
|
||||||
|
...(typeof event.durationMs === "number"
|
||||||
|
? { durationMs: normalizeNumber(event.durationMs) }
|
||||||
|
: {}),
|
||||||
|
...(event.errorName ? { errorName: event.errorName } : {}),
|
||||||
|
...(event.errorMessage ? { errorMessage: event.errorMessage } : {}),
|
||||||
|
...(typeof event.p50Ms === "number" ? { p50Ms: normalizeNumber(event.p50Ms) } : {}),
|
||||||
|
...(typeof event.p95Ms === "number" ? { p95Ms: normalizeNumber(event.p95Ms) } : {}),
|
||||||
|
...(typeof event.p99Ms === "number" ? { p99Ms: normalizeNumber(event.p99Ms) } : {}),
|
||||||
|
...(typeof event.maxMs === "number" ? { maxMs: normalizeNumber(event.maxMs) } : {}),
|
||||||
|
...(event.activeSpanName ? { activeSpanName: event.activeSpanName } : {}),
|
||||||
|
...(event.provider ? { provider: event.provider } : {}),
|
||||||
|
...(event.operation ? { operation: event.operation } : {}),
|
||||||
|
...(typeof event.ok === "boolean" ? { ok: event.ok } : {}),
|
||||||
|
...(event.command ? { command: event.command } : {}),
|
||||||
|
...(event.exitCode !== undefined ? { exitCode: event.exitCode } : {}),
|
||||||
|
...(event.signal !== undefined ? { signal: event.signal } : {}),
|
||||||
|
...(normalizeAttributes(event.attributes)
|
||||||
|
? { attributes: normalizeAttributes(event.attributes) }
|
||||||
|
: {}),
|
||||||
|
};
|
||||||
|
return `${JSON.stringify(normalized)}\n`;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function emitDiagnosticsTimelineEvent(
|
||||||
|
event: DiagnosticsTimelineEvent,
|
||||||
|
env: NodeJS.ProcessEnv = process.env,
|
||||||
|
): void {
|
||||||
|
if (!isDiagnosticsTimelineEnabled(env)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const path = env.OPENCLAW_DIAGNOSTICS_TIMELINE_PATH?.trim();
|
||||||
|
if (!path) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const line = serializeTimelineEvent(event, env);
|
||||||
|
try {
|
||||||
|
const dir = dirname(path);
|
||||||
|
if (!createdTimelineDirs.has(dir)) {
|
||||||
|
mkdirSync(dir, { recursive: true });
|
||||||
|
createdTimelineDirs.add(dir);
|
||||||
|
}
|
||||||
|
appendFileSync(path, line, "utf8");
|
||||||
|
} catch (error) {
|
||||||
|
if (!warnedAboutTimelineWrite) {
|
||||||
|
warnedAboutTimelineWrite = true;
|
||||||
|
process.stderr.write(`[diagnostics] failed to write timeline event: ${String(error)}\n`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function measureDiagnosticsTimelineSpan<T>(
|
||||||
|
name: string,
|
||||||
|
run: () => Promise<T> | T,
|
||||||
|
options: DiagnosticsTimelineSpanOptions = {},
|
||||||
|
): Promise<T> {
|
||||||
|
const env = options.env ?? process.env;
|
||||||
|
if (!isDiagnosticsTimelineEnabled(env)) {
|
||||||
|
return await run();
|
||||||
|
}
|
||||||
|
const spanId = randomUUID();
|
||||||
|
const startedAt = performance.now();
|
||||||
|
emitDiagnosticsTimelineEvent(
|
||||||
|
{
|
||||||
|
type: "span.start",
|
||||||
|
name,
|
||||||
|
phase: options.phase,
|
||||||
|
spanId,
|
||||||
|
parentSpanId: options.parentSpanId,
|
||||||
|
attributes: options.attributes,
|
||||||
|
},
|
||||||
|
env,
|
||||||
|
);
|
||||||
|
try {
|
||||||
|
const result = await run();
|
||||||
|
emitDiagnosticsTimelineEvent(
|
||||||
|
{
|
||||||
|
type: "span.end",
|
||||||
|
name,
|
||||||
|
phase: options.phase,
|
||||||
|
spanId,
|
||||||
|
parentSpanId: options.parentSpanId,
|
||||||
|
durationMs: performance.now() - startedAt,
|
||||||
|
attributes: options.attributes,
|
||||||
|
},
|
||||||
|
env,
|
||||||
|
);
|
||||||
|
return result;
|
||||||
|
} catch (error) {
|
||||||
|
emitDiagnosticsTimelineEvent(
|
||||||
|
{
|
||||||
|
type: "span.error",
|
||||||
|
name,
|
||||||
|
phase: options.phase,
|
||||||
|
spanId,
|
||||||
|
parentSpanId: options.parentSpanId,
|
||||||
|
durationMs: performance.now() - startedAt,
|
||||||
|
attributes: options.attributes,
|
||||||
|
errorName: error instanceof Error ? error.name : typeof error,
|
||||||
|
errorMessage: error instanceof Error ? error.message : String(error),
|
||||||
|
},
|
||||||
|
env,
|
||||||
|
);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export function measureDiagnosticsTimelineSpanSync<T>(
|
||||||
|
name: string,
|
||||||
|
run: () => T,
|
||||||
|
options: DiagnosticsTimelineSpanOptions = {},
|
||||||
|
): T {
|
||||||
|
const env = options.env ?? process.env;
|
||||||
|
if (!isDiagnosticsTimelineEnabled(env)) {
|
||||||
|
return run();
|
||||||
|
}
|
||||||
|
const spanId = randomUUID();
|
||||||
|
const startedAt = performance.now();
|
||||||
|
emitDiagnosticsTimelineEvent(
|
||||||
|
{
|
||||||
|
type: "span.start",
|
||||||
|
name,
|
||||||
|
phase: options.phase,
|
||||||
|
spanId,
|
||||||
|
parentSpanId: options.parentSpanId,
|
||||||
|
attributes: options.attributes,
|
||||||
|
},
|
||||||
|
env,
|
||||||
|
);
|
||||||
|
try {
|
||||||
|
const result = run();
|
||||||
|
emitDiagnosticsTimelineEvent(
|
||||||
|
{
|
||||||
|
type: "span.end",
|
||||||
|
name,
|
||||||
|
phase: options.phase,
|
||||||
|
spanId,
|
||||||
|
parentSpanId: options.parentSpanId,
|
||||||
|
durationMs: performance.now() - startedAt,
|
||||||
|
attributes: options.attributes,
|
||||||
|
},
|
||||||
|
env,
|
||||||
|
);
|
||||||
|
return result;
|
||||||
|
} catch (error) {
|
||||||
|
emitDiagnosticsTimelineEvent(
|
||||||
|
{
|
||||||
|
type: "span.error",
|
||||||
|
name,
|
||||||
|
phase: options.phase,
|
||||||
|
spanId,
|
||||||
|
parentSpanId: options.parentSpanId,
|
||||||
|
durationMs: performance.now() - startedAt,
|
||||||
|
attributes: options.attributes,
|
||||||
|
errorName: error instanceof Error ? error.name : typeof error,
|
||||||
|
errorMessage: error instanceof Error ? error.message : String(error),
|
||||||
|
},
|
||||||
|
env,
|
||||||
|
);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function flushDiagnosticsTimelineForTest(): Promise<void> {
|
||||||
|
await Promise.resolve();
|
||||||
|
}
|
||||||
@@ -12,6 +12,7 @@ import type { OpenClawConfig } from "../config/types.openclaw.js";
|
|||||||
import type { PluginInstallRecord } from "../config/types.plugins.js";
|
import type { PluginInstallRecord } from "../config/types.plugins.js";
|
||||||
import type { GatewayRequestHandler } from "../gateway/server-methods/types.js";
|
import type { GatewayRequestHandler } from "../gateway/server-methods/types.js";
|
||||||
import { openBoundaryFileSync } from "../infra/boundary-file-read.js";
|
import { openBoundaryFileSync } from "../infra/boundary-file-read.js";
|
||||||
|
import { measureDiagnosticsTimelineSpanSync } from "../infra/diagnostics-timeline.js";
|
||||||
import { createSubsystemLogger } from "../logging/subsystem.js";
|
import { createSubsystemLogger } from "../logging/subsystem.js";
|
||||||
import {
|
import {
|
||||||
DEFAULT_MEMORY_DREAMING_PLUGIN_ID,
|
DEFAULT_MEMORY_DREAMING_PLUGIN_ID,
|
||||||
@@ -2136,7 +2137,18 @@ export function loadOpenClawPlugins(options: PluginLoadOptions = {}): PluginRegi
|
|||||||
env,
|
env,
|
||||||
warn: (message) => logger.warn(`[plugins] ${record.id}: ${message}`),
|
warn: (message) => logger.warn(`[plugins] ${record.id}: ${message}`),
|
||||||
}));
|
}));
|
||||||
installer(installParams);
|
measureDiagnosticsTimelineSpanSync(
|
||||||
|
"runtimeDeps.stage",
|
||||||
|
() => installer(installParams),
|
||||||
|
{
|
||||||
|
phase: "startup",
|
||||||
|
env,
|
||||||
|
attributes: {
|
||||||
|
pluginId: record.id,
|
||||||
|
dependencyCount: installSpecs.length,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
);
|
||||||
},
|
},
|
||||||
logInstalled: (installedSpecs) => {
|
logInstalled: (installedSpecs) => {
|
||||||
if (shouldActivate) {
|
if (shouldActivate) {
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
import type { OpenClawConfig } from "../config/types.openclaw.js";
|
import type { OpenClawConfig } from "../config/types.openclaw.js";
|
||||||
|
import { measureDiagnosticsTimelineSpanSync } from "../infra/diagnostics-timeline.js";
|
||||||
import { resolveInstalledPluginIndexPolicyHash } from "./installed-plugin-index-policy.js";
|
import { resolveInstalledPluginIndexPolicyHash } from "./installed-plugin-index-policy.js";
|
||||||
import type { InstalledPluginIndex } from "./installed-plugin-index.js";
|
import type { InstalledPluginIndex } from "./installed-plugin-index.js";
|
||||||
import {
|
import {
|
||||||
@@ -123,6 +124,23 @@ export function buildPluginMetadataOwnerMaps(
|
|||||||
|
|
||||||
export function loadPluginMetadataSnapshot(
|
export function loadPluginMetadataSnapshot(
|
||||||
params: LoadPluginMetadataSnapshotParams,
|
params: LoadPluginMetadataSnapshotParams,
|
||||||
|
): PluginMetadataSnapshot {
|
||||||
|
return measureDiagnosticsTimelineSpanSync(
|
||||||
|
"plugins.metadata.scan",
|
||||||
|
() => loadPluginMetadataSnapshotImpl(params),
|
||||||
|
{
|
||||||
|
phase: "startup",
|
||||||
|
env: params.env,
|
||||||
|
attributes: {
|
||||||
|
hasWorkspaceDir: params.workspaceDir !== undefined,
|
||||||
|
hasInstalledIndex: params.index !== undefined,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function loadPluginMetadataSnapshotImpl(
|
||||||
|
params: LoadPluginMetadataSnapshotParams,
|
||||||
): PluginMetadataSnapshot {
|
): PluginMetadataSnapshot {
|
||||||
const totalStartedAt = performance.now();
|
const totalStartedAt = performance.now();
|
||||||
const registryStartedAt = performance.now();
|
const registryStartedAt = performance.now();
|
||||||
|
|||||||
Reference in New Issue
Block a user