mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 08:20:43 +00:00
feat: emit diagnostics timeline
This commit is contained in:
@@ -112,6 +112,7 @@ Inline `--password` can be exposed in local process listings. Prefer `--password
|
||||
### Startup profiling
|
||||
|
||||
- Set `OPENCLAW_GATEWAY_STARTUP_TRACE=1` to log phase timings during Gateway startup, including per-phase `eventLoopMax` delay and plugin lookup-table timings for installed-index, manifest registry, startup planning, and owner-map work.
|
||||
- Set `OPENCLAW_DIAGNOSTICS=1` with `OPENCLAW_DIAGNOSTICS_TIMELINE_PATH=<path>` to write a best-effort JSONL startup diagnostics timeline for external QA harnesses. Add `OPENCLAW_DIAGNOSTICS_EVENT_LOOP=1` to include event-loop samples.
|
||||
- Run `pnpm test:startup:gateway -- --runs 5 --warmup 1` to benchmark Gateway startup. The benchmark records first process output, `/healthz`, `/readyz`, startup trace timings, event-loop delay, and plugin lookup-table timing details.
|
||||
|
||||
## Query a running Gateway
|
||||
|
||||
@@ -3,6 +3,7 @@ import { initSubagentRegistry } from "../agents/subagent-registry.js";
|
||||
import { runChannelPluginStartupMaintenance } from "../channels/plugins/lifecycle-startup.js";
|
||||
import { applyPluginAutoEnable } from "../config/plugin-auto-enable.js";
|
||||
import type { OpenClawConfig } from "../config/types.openclaw.js";
|
||||
import { measureDiagnosticsTimelineSpan } from "../infra/diagnostics-timeline.js";
|
||||
import { resolveOpenClawPackageRootSync } from "../infra/openclaw-root.js";
|
||||
import {
|
||||
pruneUnknownBundledRuntimeDepsRoots,
|
||||
@@ -43,6 +44,23 @@ async function prestageGatewayBundledRuntimeDeps(params: {
|
||||
cfg: OpenClawConfig;
|
||||
pluginIds: readonly string[];
|
||||
log: GatewayPluginBootstrapLog;
|
||||
}): Promise<void> {
|
||||
await measureDiagnosticsTimelineSpan(
|
||||
"runtimeDeps.stage",
|
||||
() => prestageGatewayBundledRuntimeDepsImpl(params),
|
||||
{
|
||||
phase: "startup",
|
||||
attributes: {
|
||||
pluginCount: params.pluginIds.length,
|
||||
},
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
async function prestageGatewayBundledRuntimeDepsImpl(params: {
|
||||
cfg: OpenClawConfig;
|
||||
pluginIds: readonly string[];
|
||||
log: GatewayPluginBootstrapLog;
|
||||
}): Promise<void> {
|
||||
if (params.pluginIds.length === 0) {
|
||||
return;
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { monitorEventLoopDelay } from "node:perf_hooks";
|
||||
import { monitorEventLoopDelay, performance } from "node:perf_hooks";
|
||||
import { getActiveEmbeddedRunCount } from "../agents/pi-embedded-runner/run-state.js";
|
||||
import { getTotalPendingReplies } from "../auto-reply/reply/dispatcher-registry.js";
|
||||
import type { CanvasHostServer } from "../canvas-host/server.js";
|
||||
@@ -24,6 +24,10 @@ import {
|
||||
isDiagnosticsEnabled,
|
||||
setDiagnosticsEnabledForProcess,
|
||||
} from "../infra/diagnostic-events.js";
|
||||
import {
|
||||
emitDiagnosticsTimelineEvent,
|
||||
isDiagnosticsTimelineEnabled,
|
||||
} from "../infra/diagnostics-timeline.js";
|
||||
import { isTruthyEnvValue, isVitestRuntimeEnv, logAcceptedEnvOption } from "../infra/env.js";
|
||||
import { ensureOpenClawCliOnPath } from "../infra/path-env.js";
|
||||
import { setGatewaySigusr1RestartPolicy, setPreRestartDeferralCheck } from "../infra/restart.js";
|
||||
@@ -164,30 +168,80 @@ const gatewayRuntime = runtimeForLogger(log);
|
||||
const canvasRuntime = runtimeForLogger(logCanvas);
|
||||
|
||||
function createGatewayStartupTrace() {
|
||||
const enabled = isTruthyEnvValue(process.env.OPENCLAW_GATEWAY_STARTUP_TRACE);
|
||||
const eventLoopDelay = enabled ? monitorEventLoopDelay({ resolution: 10 }) : undefined;
|
||||
const logEnabled = isTruthyEnvValue(process.env.OPENCLAW_GATEWAY_STARTUP_TRACE);
|
||||
const timelineEnabled = isDiagnosticsTimelineEnabled();
|
||||
const eventLoopTimelineEnabled =
|
||||
timelineEnabled && isTruthyEnvValue(process.env.OPENCLAW_DIAGNOSTICS_EVENT_LOOP);
|
||||
const eventLoopDelay =
|
||||
logEnabled || eventLoopTimelineEnabled ? monitorEventLoopDelay({ resolution: 10 }) : undefined;
|
||||
eventLoopDelay?.enable();
|
||||
const started = performance.now();
|
||||
let last = started;
|
||||
let spanSequence = 0;
|
||||
const formatMetric = (key: string, value: number | string) =>
|
||||
`${key}=${typeof value === "number" ? value.toFixed(1) : value}`;
|
||||
const readEventLoopMaxMs = () => {
|
||||
if (!eventLoopDelay) {
|
||||
return 0;
|
||||
const mapTimelineName = (name: string) => {
|
||||
switch (name) {
|
||||
case "config.snapshot":
|
||||
return "config.load";
|
||||
case "config.auth":
|
||||
case "config.final-snapshot":
|
||||
case "runtime.config":
|
||||
return "config.normalize";
|
||||
case "plugins.bootstrap":
|
||||
return "plugins.load";
|
||||
case "runtime.post-attach":
|
||||
case "ready":
|
||||
return "gateway.ready";
|
||||
default:
|
||||
return name;
|
||||
}
|
||||
const maxMs = eventLoopDelay.max / 1_000_000;
|
||||
};
|
||||
const takeEventLoopSample = () => {
|
||||
if (!eventLoopDelay) {
|
||||
return undefined;
|
||||
}
|
||||
const sample = {
|
||||
p50Ms: eventLoopDelay.percentile(50) / 1_000_000,
|
||||
p95Ms: eventLoopDelay.percentile(95) / 1_000_000,
|
||||
p99Ms: eventLoopDelay.percentile(99) / 1_000_000,
|
||||
maxMs: eventLoopDelay.max / 1_000_000,
|
||||
};
|
||||
eventLoopDelay.reset();
|
||||
return maxMs;
|
||||
return sample;
|
||||
};
|
||||
const emitEventLoopTimelineSample = (
|
||||
activeSpanName: string,
|
||||
sample: ReturnType<typeof takeEventLoopSample>,
|
||||
) => {
|
||||
if (!eventLoopTimelineEnabled) {
|
||||
return;
|
||||
}
|
||||
if (!sample) {
|
||||
return;
|
||||
}
|
||||
emitDiagnosticsTimelineEvent({
|
||||
type: "eventLoop.sample",
|
||||
name: "eventLoop",
|
||||
phase: "startup",
|
||||
activeSpanName: mapTimelineName(activeSpanName),
|
||||
attributes:
|
||||
activeSpanName === mapTimelineName(activeSpanName)
|
||||
? undefined
|
||||
: { traceName: activeSpanName },
|
||||
...sample,
|
||||
});
|
||||
};
|
||||
const emit = (
|
||||
name: string,
|
||||
durationMs: number,
|
||||
totalMs: number,
|
||||
eventLoopSample: ReturnType<typeof takeEventLoopSample>,
|
||||
extras: ReadonlyArray<readonly [string, number | string]> = [],
|
||||
) => {
|
||||
if (enabled) {
|
||||
if (logEnabled) {
|
||||
const metrics = [
|
||||
`eventLoopMax=${readEventLoopMaxMs().toFixed(1)}ms`,
|
||||
`eventLoopMax=${(eventLoopSample?.maxMs ?? 0).toFixed(1)}ms`,
|
||||
...extras.map(([key, value]) => formatMetric(key, value)),
|
||||
].join(" ");
|
||||
log.info(
|
||||
@@ -198,27 +252,78 @@ function createGatewayStartupTrace() {
|
||||
return {
|
||||
mark(name: string) {
|
||||
const now = performance.now();
|
||||
emit(name, now - last, now - started);
|
||||
const eventLoopSample = takeEventLoopSample();
|
||||
emit(name, now - last, now - started, eventLoopSample);
|
||||
emitDiagnosticsTimelineEvent({
|
||||
type: "mark",
|
||||
name: mapTimelineName(name),
|
||||
phase: "startup",
|
||||
durationMs: now - started,
|
||||
attributes: name === mapTimelineName(name) ? undefined : { traceName: name },
|
||||
});
|
||||
emitEventLoopTimelineSample(name, eventLoopSample);
|
||||
last = now;
|
||||
if (name === "ready") {
|
||||
eventLoopDelay?.disable();
|
||||
}
|
||||
},
|
||||
detail(name: string, metrics: ReadonlyArray<readonly [string, number | string]>) {
|
||||
if (!enabled) {
|
||||
return;
|
||||
const attributes = Object.fromEntries(metrics);
|
||||
if (logEnabled) {
|
||||
log.info(
|
||||
`startup trace: ${name} ${metrics.map(([key, value]) => formatMetric(key, value)).join(" ")}`,
|
||||
);
|
||||
}
|
||||
log.info(
|
||||
`startup trace: ${name} ${metrics.map(([key, value]) => formatMetric(key, value)).join(" ")}`,
|
||||
);
|
||||
emitDiagnosticsTimelineEvent({
|
||||
type: "mark",
|
||||
name: mapTimelineName(name),
|
||||
phase: "startup",
|
||||
attributes: {
|
||||
traceName: name,
|
||||
...attributes,
|
||||
},
|
||||
});
|
||||
},
|
||||
async measure<T>(name: string, run: () => Promise<T> | T): Promise<T> {
|
||||
const before = performance.now();
|
||||
const spanId = `gateway-startup-${++spanSequence}`;
|
||||
emitDiagnosticsTimelineEvent({
|
||||
type: "span.start",
|
||||
name: mapTimelineName(name),
|
||||
phase: "startup",
|
||||
spanId,
|
||||
attributes: name === mapTimelineName(name) ? undefined : { traceName: name },
|
||||
});
|
||||
try {
|
||||
return await run();
|
||||
const result = await run();
|
||||
const now = performance.now();
|
||||
emitDiagnosticsTimelineEvent({
|
||||
type: "span.end",
|
||||
name: mapTimelineName(name),
|
||||
phase: "startup",
|
||||
spanId,
|
||||
durationMs: now - before,
|
||||
attributes: name === mapTimelineName(name) ? undefined : { traceName: name },
|
||||
});
|
||||
return result;
|
||||
} catch (error) {
|
||||
const now = performance.now();
|
||||
emitDiagnosticsTimelineEvent({
|
||||
type: "span.error",
|
||||
name: mapTimelineName(name),
|
||||
phase: "startup",
|
||||
spanId,
|
||||
durationMs: now - before,
|
||||
attributes: name === mapTimelineName(name) ? undefined : { traceName: name },
|
||||
errorName: error instanceof Error ? error.name : typeof error,
|
||||
errorMessage: error instanceof Error ? error.message : String(error),
|
||||
});
|
||||
throw error;
|
||||
} finally {
|
||||
const now = performance.now();
|
||||
emit(name, now - before, now - started);
|
||||
const eventLoopSample = takeEventLoopSample();
|
||||
emit(name, now - before, now - started, eventLoopSample);
|
||||
emitEventLoopTimelineSample(name, eventLoopSample);
|
||||
last = now;
|
||||
}
|
||||
},
|
||||
|
||||
160
src/infra/diagnostics-timeline.test.ts
Normal file
160
src/infra/diagnostics-timeline.test.ts
Normal file
@@ -0,0 +1,160 @@
|
||||
import { mkdtemp, readFile, rm } from "node:fs/promises";
|
||||
import { tmpdir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import { afterEach, describe, expect, it } from "vitest";
|
||||
import {
|
||||
emitDiagnosticsTimelineEvent,
|
||||
flushDiagnosticsTimelineForTest,
|
||||
isDiagnosticsTimelineEnabled,
|
||||
measureDiagnosticsTimelineSpan,
|
||||
measureDiagnosticsTimelineSpanSync,
|
||||
} from "./diagnostics-timeline.js";
|
||||
|
||||
const tempDirs: string[] = [];
|
||||
|
||||
async function createTimelineEnv() {
|
||||
const dir = await mkdtemp(join(tmpdir(), "openclaw-diagnostics-timeline-"));
|
||||
tempDirs.push(dir);
|
||||
return {
|
||||
env: {
|
||||
OPENCLAW_DIAGNOSTICS: "1",
|
||||
OPENCLAW_DIAGNOSTICS_RUN_ID: "run-1",
|
||||
OPENCLAW_DIAGNOSTICS_ENV: "env-1",
|
||||
OPENCLAW_DIAGNOSTICS_TIMELINE_PATH: join(dir, "nested", "timeline.jsonl"),
|
||||
} as NodeJS.ProcessEnv,
|
||||
path: join(dir, "nested", "timeline.jsonl"),
|
||||
};
|
||||
}
|
||||
|
||||
async function readTimeline(path: string) {
|
||||
await flushDiagnosticsTimelineForTest();
|
||||
return (await readFile(path, "utf8"))
|
||||
.trim()
|
||||
.split("\n")
|
||||
.map((line) => JSON.parse(line) as Record<string, unknown>);
|
||||
}
|
||||
|
||||
afterEach(async () => {
|
||||
await Promise.all(tempDirs.splice(0).map((dir) => rm(dir, { recursive: true, force: true })));
|
||||
});
|
||||
|
||||
describe("diagnostics timeline", () => {
|
||||
it("detects when timeline output is enabled", async () => {
|
||||
const { env } = await createTimelineEnv();
|
||||
|
||||
expect(isDiagnosticsTimelineEnabled(env)).toBe(true);
|
||||
expect(isDiagnosticsTimelineEnabled({ ...env, OPENCLAW_DIAGNOSTICS: "0" })).toBe(false);
|
||||
expect(isDiagnosticsTimelineEnabled({ ...env, OPENCLAW_DIAGNOSTICS_TIMELINE_PATH: "" })).toBe(
|
||||
false,
|
||||
);
|
||||
});
|
||||
|
||||
it("writes JSONL diagnostic events with the stable envelope", async () => {
|
||||
const { env, path } = await createTimelineEnv();
|
||||
|
||||
emitDiagnosticsTimelineEvent(
|
||||
{
|
||||
type: "mark",
|
||||
name: "gateway.ready",
|
||||
phase: "startup",
|
||||
attributes: {
|
||||
ok: true,
|
||||
count: 2,
|
||||
ignored: Number.NaN,
|
||||
},
|
||||
},
|
||||
env,
|
||||
);
|
||||
|
||||
const [event] = await readTimeline(path);
|
||||
expect(event).toMatchObject({
|
||||
schemaVersion: "openclaw.diagnostics.v1",
|
||||
type: "mark",
|
||||
name: "gateway.ready",
|
||||
runId: "run-1",
|
||||
envName: "env-1",
|
||||
phase: "startup",
|
||||
attributes: {
|
||||
ok: true,
|
||||
count: 2,
|
||||
},
|
||||
});
|
||||
expect(event?.timestamp).toEqual(expect.any(String));
|
||||
expect(event?.pid).toEqual(expect.any(Number));
|
||||
expect((event?.attributes as Record<string, unknown>).ignored).toBeUndefined();
|
||||
});
|
||||
|
||||
it("records span start and end events around successful work", async () => {
|
||||
const { env, path } = await createTimelineEnv();
|
||||
|
||||
await expect(
|
||||
measureDiagnosticsTimelineSpan("runtimeDeps.stage", () => "ok", {
|
||||
env,
|
||||
phase: "startup",
|
||||
attributes: { pluginCount: 3 },
|
||||
}),
|
||||
).resolves.toBe("ok");
|
||||
|
||||
const events = await readTimeline(path);
|
||||
expect(events).toHaveLength(2);
|
||||
expect(events[0]).toMatchObject({
|
||||
type: "span.start",
|
||||
name: "runtimeDeps.stage",
|
||||
phase: "startup",
|
||||
attributes: { pluginCount: 3 },
|
||||
});
|
||||
expect(events[1]).toMatchObject({
|
||||
type: "span.end",
|
||||
name: "runtimeDeps.stage",
|
||||
phase: "startup",
|
||||
attributes: { pluginCount: 3 },
|
||||
});
|
||||
expect(events[1]?.spanId).toBe(events[0]?.spanId);
|
||||
expect(events[1]?.durationMs).toEqual(expect.any(Number));
|
||||
});
|
||||
|
||||
it("records span error events and rethrows failures", async () => {
|
||||
const { env, path } = await createTimelineEnv();
|
||||
|
||||
await expect(
|
||||
measureDiagnosticsTimelineSpan(
|
||||
"plugins.load",
|
||||
() => {
|
||||
throw new TypeError("bad plugin");
|
||||
},
|
||||
{ env, phase: "startup" },
|
||||
),
|
||||
).rejects.toThrow("bad plugin");
|
||||
|
||||
const events = await readTimeline(path);
|
||||
expect(events).toHaveLength(2);
|
||||
expect(events[1]).toMatchObject({
|
||||
type: "span.error",
|
||||
name: "plugins.load",
|
||||
phase: "startup",
|
||||
errorName: "TypeError",
|
||||
errorMessage: "bad plugin",
|
||||
});
|
||||
});
|
||||
|
||||
it("records synchronous spans", async () => {
|
||||
const { env, path } = await createTimelineEnv();
|
||||
|
||||
const result = measureDiagnosticsTimelineSpanSync("plugins.metadata.scan", () => 42, {
|
||||
env,
|
||||
phase: "startup",
|
||||
});
|
||||
|
||||
expect(result).toBe(42);
|
||||
const events = await readTimeline(path);
|
||||
expect(events).toHaveLength(2);
|
||||
expect(events[0]).toMatchObject({
|
||||
type: "span.start",
|
||||
name: "plugins.metadata.scan",
|
||||
});
|
||||
expect(events[1]).toMatchObject({
|
||||
type: "span.end",
|
||||
name: "plugins.metadata.scan",
|
||||
});
|
||||
});
|
||||
});
|
||||
272
src/infra/diagnostics-timeline.ts
Normal file
272
src/infra/diagnostics-timeline.ts
Normal file
@@ -0,0 +1,272 @@
|
||||
import { randomUUID } from "node:crypto";
|
||||
import { appendFileSync, mkdirSync } from "node:fs";
|
||||
import { dirname } from "node:path";
|
||||
import { performance } from "node:perf_hooks";
|
||||
import { isTruthyEnvValue } from "./env.js";
|
||||
|
||||
export const OPENCLAW_DIAGNOSTICS_TIMELINE_SCHEMA_VERSION = "openclaw.diagnostics.v1";
|
||||
|
||||
export type DiagnosticsTimelineEventType =
|
||||
| "span.start"
|
||||
| "span.end"
|
||||
| "span.error"
|
||||
| "mark"
|
||||
| "eventLoop.sample"
|
||||
| "provider.request"
|
||||
| "childProcess.exit";
|
||||
|
||||
export type DiagnosticsTimelineAttributes = Record<string, string | number | boolean | null>;
|
||||
|
||||
export type DiagnosticsTimelineEvent = {
|
||||
type: DiagnosticsTimelineEventType;
|
||||
name: string;
|
||||
timestamp?: string;
|
||||
runId?: string;
|
||||
envName?: string;
|
||||
pid?: number;
|
||||
phase?: string;
|
||||
spanId?: string;
|
||||
parentSpanId?: string;
|
||||
durationMs?: number;
|
||||
attributes?: DiagnosticsTimelineAttributes;
|
||||
errorName?: string;
|
||||
errorMessage?: string;
|
||||
p50Ms?: number;
|
||||
p95Ms?: number;
|
||||
p99Ms?: number;
|
||||
maxMs?: number;
|
||||
activeSpanName?: string;
|
||||
provider?: string;
|
||||
operation?: string;
|
||||
ok?: boolean;
|
||||
command?: string;
|
||||
exitCode?: number | null;
|
||||
signal?: string | null;
|
||||
};
|
||||
|
||||
type DiagnosticsTimelineSpanOptions = {
|
||||
phase?: string;
|
||||
parentSpanId?: string;
|
||||
attributes?: DiagnosticsTimelineAttributes;
|
||||
env?: NodeJS.ProcessEnv;
|
||||
};
|
||||
|
||||
let warnedAboutTimelineWrite = false;
|
||||
const createdTimelineDirs = new Set<string>();
|
||||
|
||||
export function isDiagnosticsTimelineEnabled(env: NodeJS.ProcessEnv = process.env): boolean {
|
||||
return (
|
||||
isTruthyEnvValue(env.OPENCLAW_DIAGNOSTICS) &&
|
||||
typeof env.OPENCLAW_DIAGNOSTICS_TIMELINE_PATH === "string" &&
|
||||
env.OPENCLAW_DIAGNOSTICS_TIMELINE_PATH.trim().length > 0
|
||||
);
|
||||
}
|
||||
|
||||
function normalizeNumber(value: number | undefined): number | undefined {
|
||||
if (typeof value !== "number" || !Number.isFinite(value)) {
|
||||
return undefined;
|
||||
}
|
||||
return Math.max(0, Math.round(value * 1000) / 1000);
|
||||
}
|
||||
|
||||
function normalizeAttributes(
|
||||
attributes: DiagnosticsTimelineAttributes | undefined,
|
||||
): DiagnosticsTimelineAttributes | undefined {
|
||||
if (!attributes) {
|
||||
return undefined;
|
||||
}
|
||||
const normalized: DiagnosticsTimelineAttributes = {};
|
||||
for (const [key, value] of Object.entries(attributes)) {
|
||||
if (typeof value === "number") {
|
||||
if (Number.isFinite(value)) {
|
||||
normalized[key] = normalizeNumber(value) ?? 0;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (typeof value === "string" || typeof value === "boolean" || value === null) {
|
||||
normalized[key] = value;
|
||||
}
|
||||
}
|
||||
return Object.keys(normalized).length > 0 ? normalized : undefined;
|
||||
}
|
||||
|
||||
function serializeTimelineEvent(event: DiagnosticsTimelineEvent, env: NodeJS.ProcessEnv): string {
|
||||
const normalized = {
|
||||
schemaVersion: OPENCLAW_DIAGNOSTICS_TIMELINE_SCHEMA_VERSION,
|
||||
type: event.type,
|
||||
timestamp: event.timestamp ?? new Date().toISOString(),
|
||||
name: event.name,
|
||||
...(env.OPENCLAW_DIAGNOSTICS_RUN_ID ? { runId: env.OPENCLAW_DIAGNOSTICS_RUN_ID } : {}),
|
||||
...(env.OPENCLAW_DIAGNOSTICS_ENV ? { envName: env.OPENCLAW_DIAGNOSTICS_ENV } : {}),
|
||||
pid: process.pid,
|
||||
...(event.runId ? { runId: event.runId } : {}),
|
||||
...(event.envName ? { envName: event.envName } : {}),
|
||||
...(typeof event.pid === "number" ? { pid: event.pid } : {}),
|
||||
...(event.phase ? { phase: event.phase } : {}),
|
||||
...(event.spanId ? { spanId: event.spanId } : {}),
|
||||
...(event.parentSpanId ? { parentSpanId: event.parentSpanId } : {}),
|
||||
...(typeof event.durationMs === "number"
|
||||
? { durationMs: normalizeNumber(event.durationMs) }
|
||||
: {}),
|
||||
...(event.errorName ? { errorName: event.errorName } : {}),
|
||||
...(event.errorMessage ? { errorMessage: event.errorMessage } : {}),
|
||||
...(typeof event.p50Ms === "number" ? { p50Ms: normalizeNumber(event.p50Ms) } : {}),
|
||||
...(typeof event.p95Ms === "number" ? { p95Ms: normalizeNumber(event.p95Ms) } : {}),
|
||||
...(typeof event.p99Ms === "number" ? { p99Ms: normalizeNumber(event.p99Ms) } : {}),
|
||||
...(typeof event.maxMs === "number" ? { maxMs: normalizeNumber(event.maxMs) } : {}),
|
||||
...(event.activeSpanName ? { activeSpanName: event.activeSpanName } : {}),
|
||||
...(event.provider ? { provider: event.provider } : {}),
|
||||
...(event.operation ? { operation: event.operation } : {}),
|
||||
...(typeof event.ok === "boolean" ? { ok: event.ok } : {}),
|
||||
...(event.command ? { command: event.command } : {}),
|
||||
...(event.exitCode !== undefined ? { exitCode: event.exitCode } : {}),
|
||||
...(event.signal !== undefined ? { signal: event.signal } : {}),
|
||||
...(normalizeAttributes(event.attributes)
|
||||
? { attributes: normalizeAttributes(event.attributes) }
|
||||
: {}),
|
||||
};
|
||||
return `${JSON.stringify(normalized)}\n`;
|
||||
}
|
||||
|
||||
export function emitDiagnosticsTimelineEvent(
|
||||
event: DiagnosticsTimelineEvent,
|
||||
env: NodeJS.ProcessEnv = process.env,
|
||||
): void {
|
||||
if (!isDiagnosticsTimelineEnabled(env)) {
|
||||
return;
|
||||
}
|
||||
const path = env.OPENCLAW_DIAGNOSTICS_TIMELINE_PATH?.trim();
|
||||
if (!path) {
|
||||
return;
|
||||
}
|
||||
const line = serializeTimelineEvent(event, env);
|
||||
try {
|
||||
const dir = dirname(path);
|
||||
if (!createdTimelineDirs.has(dir)) {
|
||||
mkdirSync(dir, { recursive: true });
|
||||
createdTimelineDirs.add(dir);
|
||||
}
|
||||
appendFileSync(path, line, "utf8");
|
||||
} catch (error) {
|
||||
if (!warnedAboutTimelineWrite) {
|
||||
warnedAboutTimelineWrite = true;
|
||||
process.stderr.write(`[diagnostics] failed to write timeline event: ${String(error)}\n`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export async function measureDiagnosticsTimelineSpan<T>(
|
||||
name: string,
|
||||
run: () => Promise<T> | T,
|
||||
options: DiagnosticsTimelineSpanOptions = {},
|
||||
): Promise<T> {
|
||||
const env = options.env ?? process.env;
|
||||
if (!isDiagnosticsTimelineEnabled(env)) {
|
||||
return await run();
|
||||
}
|
||||
const spanId = randomUUID();
|
||||
const startedAt = performance.now();
|
||||
emitDiagnosticsTimelineEvent(
|
||||
{
|
||||
type: "span.start",
|
||||
name,
|
||||
phase: options.phase,
|
||||
spanId,
|
||||
parentSpanId: options.parentSpanId,
|
||||
attributes: options.attributes,
|
||||
},
|
||||
env,
|
||||
);
|
||||
try {
|
||||
const result = await run();
|
||||
emitDiagnosticsTimelineEvent(
|
||||
{
|
||||
type: "span.end",
|
||||
name,
|
||||
phase: options.phase,
|
||||
spanId,
|
||||
parentSpanId: options.parentSpanId,
|
||||
durationMs: performance.now() - startedAt,
|
||||
attributes: options.attributes,
|
||||
},
|
||||
env,
|
||||
);
|
||||
return result;
|
||||
} catch (error) {
|
||||
emitDiagnosticsTimelineEvent(
|
||||
{
|
||||
type: "span.error",
|
||||
name,
|
||||
phase: options.phase,
|
||||
spanId,
|
||||
parentSpanId: options.parentSpanId,
|
||||
durationMs: performance.now() - startedAt,
|
||||
attributes: options.attributes,
|
||||
errorName: error instanceof Error ? error.name : typeof error,
|
||||
errorMessage: error instanceof Error ? error.message : String(error),
|
||||
},
|
||||
env,
|
||||
);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
export function measureDiagnosticsTimelineSpanSync<T>(
|
||||
name: string,
|
||||
run: () => T,
|
||||
options: DiagnosticsTimelineSpanOptions = {},
|
||||
): T {
|
||||
const env = options.env ?? process.env;
|
||||
if (!isDiagnosticsTimelineEnabled(env)) {
|
||||
return run();
|
||||
}
|
||||
const spanId = randomUUID();
|
||||
const startedAt = performance.now();
|
||||
emitDiagnosticsTimelineEvent(
|
||||
{
|
||||
type: "span.start",
|
||||
name,
|
||||
phase: options.phase,
|
||||
spanId,
|
||||
parentSpanId: options.parentSpanId,
|
||||
attributes: options.attributes,
|
||||
},
|
||||
env,
|
||||
);
|
||||
try {
|
||||
const result = run();
|
||||
emitDiagnosticsTimelineEvent(
|
||||
{
|
||||
type: "span.end",
|
||||
name,
|
||||
phase: options.phase,
|
||||
spanId,
|
||||
parentSpanId: options.parentSpanId,
|
||||
durationMs: performance.now() - startedAt,
|
||||
attributes: options.attributes,
|
||||
},
|
||||
env,
|
||||
);
|
||||
return result;
|
||||
} catch (error) {
|
||||
emitDiagnosticsTimelineEvent(
|
||||
{
|
||||
type: "span.error",
|
||||
name,
|
||||
phase: options.phase,
|
||||
spanId,
|
||||
parentSpanId: options.parentSpanId,
|
||||
durationMs: performance.now() - startedAt,
|
||||
attributes: options.attributes,
|
||||
errorName: error instanceof Error ? error.name : typeof error,
|
||||
errorMessage: error instanceof Error ? error.message : String(error),
|
||||
},
|
||||
env,
|
||||
);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
export async function flushDiagnosticsTimelineForTest(): Promise<void> {
|
||||
await Promise.resolve();
|
||||
}
|
||||
@@ -12,6 +12,7 @@ import type { OpenClawConfig } from "../config/types.openclaw.js";
|
||||
import type { PluginInstallRecord } from "../config/types.plugins.js";
|
||||
import type { GatewayRequestHandler } from "../gateway/server-methods/types.js";
|
||||
import { openBoundaryFileSync } from "../infra/boundary-file-read.js";
|
||||
import { measureDiagnosticsTimelineSpanSync } from "../infra/diagnostics-timeline.js";
|
||||
import { createSubsystemLogger } from "../logging/subsystem.js";
|
||||
import {
|
||||
DEFAULT_MEMORY_DREAMING_PLUGIN_ID,
|
||||
@@ -2136,7 +2137,18 @@ export function loadOpenClawPlugins(options: PluginLoadOptions = {}): PluginRegi
|
||||
env,
|
||||
warn: (message) => logger.warn(`[plugins] ${record.id}: ${message}`),
|
||||
}));
|
||||
installer(installParams);
|
||||
measureDiagnosticsTimelineSpanSync(
|
||||
"runtimeDeps.stage",
|
||||
() => installer(installParams),
|
||||
{
|
||||
phase: "startup",
|
||||
env,
|
||||
attributes: {
|
||||
pluginId: record.id,
|
||||
dependencyCount: installSpecs.length,
|
||||
},
|
||||
},
|
||||
);
|
||||
},
|
||||
logInstalled: (installedSpecs) => {
|
||||
if (shouldActivate) {
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import type { OpenClawConfig } from "../config/types.openclaw.js";
|
||||
import { measureDiagnosticsTimelineSpanSync } from "../infra/diagnostics-timeline.js";
|
||||
import { resolveInstalledPluginIndexPolicyHash } from "./installed-plugin-index-policy.js";
|
||||
import type { InstalledPluginIndex } from "./installed-plugin-index.js";
|
||||
import {
|
||||
@@ -123,6 +124,23 @@ export function buildPluginMetadataOwnerMaps(
|
||||
|
||||
export function loadPluginMetadataSnapshot(
|
||||
params: LoadPluginMetadataSnapshotParams,
|
||||
): PluginMetadataSnapshot {
|
||||
return measureDiagnosticsTimelineSpanSync(
|
||||
"plugins.metadata.scan",
|
||||
() => loadPluginMetadataSnapshotImpl(params),
|
||||
{
|
||||
phase: "startup",
|
||||
env: params.env,
|
||||
attributes: {
|
||||
hasWorkspaceDir: params.workspaceDir !== undefined,
|
||||
hasInstalledIndex: params.index !== undefined,
|
||||
},
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
function loadPluginMetadataSnapshotImpl(
|
||||
params: LoadPluginMetadataSnapshotParams,
|
||||
): PluginMetadataSnapshot {
|
||||
const totalStartedAt = performance.now();
|
||||
const registryStartedAt = performance.now();
|
||||
|
||||
Reference in New Issue
Block a user