feat(diagnostics): emit exec process telemetry (#71451)

This commit is contained in:
Vincent Koc
2026-04-25 00:12:58 -07:00
committed by GitHub
parent 188bce424b
commit 3e3bba4f30
8 changed files with 294 additions and 1 deletions

View File

@@ -6,6 +6,7 @@ Docs: https://docs.openclaw.ai
### Changes
- Diagnostics/OTEL: emit bounded exec-process diagnostics and export them as `openclaw.exec` spans without exposing command text, working directories, or container identifiers. (#70424) Thanks @jlapenna.
- Diagnostics/OTEL: support `OPENCLAW_OTEL_PRELOADED=1` so the plugin can reuse an already-registered OpenTelemetry SDK while keeping OpenClaw diagnostic listeners wired. (#70424) Thanks @jlapenna.
- Control UI: refine the agent Tool Access panel with compact live-tool chips, collapsible tool groups, direct per-tool toggles, and clearer runtime/source provenance. (#71405) Thanks @BunsDev.
- Memory-core/hybrid search: expose raw `vectorScore` and `textScore` alongside the combined `score` on hybrid memory search results, so callers can inspect vector-versus-text retrieval contribution before temporal decay or MMR reordering. Fixes #68166. (#68286) Thanks @ajfonthemove.

View File

@@ -216,6 +216,12 @@ Queue + session:
- `run.attempt`: run retry/attempt metadata.
- `diagnostic.heartbeat`: aggregate counters (webhooks/queue/session).
Exec:
- `exec.process.completed`: terminal exec process outcome, duration, target, mode,
exit code, and failure kind. Command text and working directories are not
included.
### Enable diagnostics (no exporter)
Use this if you want diagnostics events available to plugins or custom sinks:
@@ -352,6 +358,11 @@ Queues + sessions:
- `openclaw.session.stuck_age_ms` (histogram, attrs: `openclaw.state`)
- `openclaw.run.attempt` (counter, attrs: `openclaw.attempt`)
Exec:
- `openclaw.exec.duration_ms` (histogram, attrs: `openclaw.exec.target`,
`openclaw.exec.mode`, `openclaw.outcome`, `openclaw.failureKind`)
### Exported spans (names + key attributes)
- `openclaw.model.usage`
@@ -367,6 +378,10 @@ Queues + sessions:
- `openclaw.tool.execution`
- `gen_ai.tool.name`, `openclaw.toolName`, `openclaw.errorCategory`,
`openclaw.tool.params.*`
- `openclaw.exec`
- `openclaw.exec.target`, `openclaw.exec.mode`, `openclaw.outcome`,
`openclaw.failureKind`, `openclaw.exec.command_length`,
`openclaw.exec.exit_code`, `openclaw.exec.timed_out`
- `openclaw.webhook.processed`
- `openclaw.channel`, `openclaw.webhook`, `openclaw.chatId`
- `openclaw.webhook.error`

View File

@@ -817,6 +817,67 @@ describe("diagnostics-otel service", () => {
await service.stop?.(ctx);
});
test("exports exec process spans without command text", async () => {
const service = createDiagnosticsOtelService();
const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { traces: true, metrics: true });
await service.start(ctx);
emitDiagnosticEvent({
type: "exec.process.completed",
target: "host",
mode: "child",
outcome: "failed",
durationMs: 30,
commandLength: 42,
exitCode: 1,
timedOut: false,
failureKind: "runtime-error",
});
await flushDiagnosticEvents();
expect(telemetryState.histograms.get("openclaw.exec.duration_ms")?.record).toHaveBeenCalledWith(
30,
expect.objectContaining({
"openclaw.exec.target": "host",
"openclaw.exec.mode": "child",
"openclaw.outcome": "failed",
"openclaw.failureKind": "runtime-error",
}),
);
const execCall = telemetryState.tracer.startSpan.mock.calls.find(
(call) => call[0] === "openclaw.exec",
);
expect(execCall?.[1]).toMatchObject({
attributes: {
"openclaw.exec.target": "host",
"openclaw.exec.mode": "child",
"openclaw.outcome": "failed",
"openclaw.exec.command_length": 42,
"openclaw.exec.exit_code": 1,
"openclaw.exec.timed_out": false,
"openclaw.failureKind": "runtime-error",
},
startTime: expect.any(Number),
});
expect(execCall?.[1]).toEqual({
attributes: expect.not.objectContaining({
"openclaw.exec.command": expect.anything(),
"openclaw.exec.workdir": expect.anything(),
"openclaw.sessionKey": expect.anything(),
}),
startTime: expect.any(Number),
});
const execSpan = telemetryState.spans.find((span) => span.name === "openclaw.exec");
expect(execSpan?.setStatus).toHaveBeenCalledWith({
code: 2,
message: "runtime-error",
});
expect(execSpan?.end).toHaveBeenCalledWith(expect.any(Number));
await service.stop?.(ctx);
});
test("does not export model or tool content unless capture is explicitly enabled", async () => {
const service = createDiagnosticsOtelService();
const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { traces: true, metrics: true });

View File

@@ -557,6 +557,10 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
description: "Tool execution duration",
},
);
const execProcessDurationHistogram = meter.createHistogram("openclaw.exec.duration_ms", {
unit: "ms",
description: "Exec process duration",
});
let recordLogRecord:
| ((evt: Extract<DiagnosticEventPayload, { type: "log.record" }>) => void)
@@ -1087,6 +1091,48 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
span.end(evt.ts);
};
const recordExecProcessCompleted = (
evt: Extract<DiagnosticEventPayload, { type: "exec.process.completed" }>,
) => {
const attrs: Record<string, string | number> = {
"openclaw.exec.target": evt.target,
"openclaw.exec.mode": evt.mode,
"openclaw.outcome": evt.outcome,
};
if (evt.failureKind) {
attrs["openclaw.failureKind"] = evt.failureKind;
}
execProcessDurationHistogram.record(evt.durationMs, attrs);
if (!tracesEnabled) {
return;
}
const spanAttrs: Record<string, string | number | boolean> = {
...attrs,
"openclaw.exec.command_length": evt.commandLength,
};
if (typeof evt.exitCode === "number") {
spanAttrs["openclaw.exec.exit_code"] = evt.exitCode;
}
if (evt.exitSignal) {
spanAttrs["openclaw.exec.exit_signal"] = lowCardinalityAttr(evt.exitSignal, "other");
}
if (evt.timedOut !== undefined) {
spanAttrs["openclaw.exec.timed_out"] = evt.timedOut;
}
const span = spanWithDuration("openclaw.exec", spanAttrs, evt.durationMs, {
endTimeMs: evt.ts,
});
if (evt.outcome === "failed") {
span.setStatus({
code: SpanStatusCode.ERROR,
...(evt.failureKind ? { message: evt.failureKind } : {}),
});
}
span.end(evt.ts);
};
const recordHeartbeat = (
evt: Extract<DiagnosticEventPayload, { type: "diagnostic.heartbeat" }>,
) => {
@@ -1147,6 +1193,9 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
case "tool.execution.error":
recordToolExecutionError(evt);
return;
case "exec.process.completed":
recordExecProcessCompleted(evt);
return;
case "log.record":
recordLogRecord?.(evt);
return;

View File

@@ -1,4 +1,9 @@
import { afterEach, beforeAll, beforeEach, expect, test, vi } from "vitest";
import {
onInternalDiagnosticEvent,
resetDiagnosticEventsForTest,
type DiagnosticEventPayload,
} from "../infra/diagnostic-events.js";
import type { ManagedRun, SpawnInput } from "../process/supervisor/index.js";
let listRunningSessions: typeof import("./bash-process-registry.js").listRunningSessions;
@@ -56,6 +61,7 @@ beforeEach(() => {
afterEach(() => {
resetProcessRegistryForTests();
resetDiagnosticEventsForTest();
vi.clearAllMocks();
});
@@ -101,3 +107,53 @@ test("exec cleans session state when PTY fallback spawn also fails", async () =>
expect(listRunningSessions()).toHaveLength(0);
});
function flushDiagnosticEvents() {
return new Promise<void>((resolve) => setImmediate(resolve));
}
test("exec emits bounded process diagnostics without command text", async () => {
supervisorSpawnMock.mockImplementationOnce(async (input: SpawnInput) =>
createSuccessfulRun(input),
);
const events: DiagnosticEventPayload[] = [];
const unsubscribe = onInternalDiagnosticEvent((event) => {
events.push(event);
});
try {
const command = "printf super-secret-value";
const handle = await runExecProcess({
command,
workdir: process.cwd(),
env: {},
usePty: false,
warnings: [],
maxOutput: 20_000,
pendingMaxOutput: 20_000,
notifyOnExit: false,
sessionKey: "session-1",
timeoutSec: 5,
});
await handle.promise;
await flushDiagnosticEvents();
const event = events.find((item) => item.type === "exec.process.completed");
expect(event).toMatchObject({
type: "exec.process.completed",
target: "host",
mode: "child",
outcome: "completed",
durationMs: expect.any(Number),
commandLength: command.length,
exitCode: 0,
sessionKey: "session-1",
});
const serialized = JSON.stringify(event);
expect(serialized).not.toContain("printf");
expect(serialized).not.toContain("super-secret-value");
expect(serialized).not.toContain(process.cwd());
} finally {
unsubscribe();
}
});

View File

@@ -1,5 +1,6 @@
import path from "node:path";
import type { AgentToolResult } from "@mariozechner/pi-agent-core";
import { emitDiagnosticEvent } from "../infra/diagnostic-events.js";
import {
DEFAULT_EXEC_APPROVAL_TIMEOUT_MS,
resolveExecApprovalAllowedDecisions,
@@ -165,6 +166,40 @@ export type ExecProcessHandle = {
disableUpdates: () => void;
};
function normalizeExecExitSignal(signal: NodeJS.Signals | number | null): string | undefined {
if (signal === null) {
return undefined;
}
return String(signal);
}
function emitExecProcessCompleted(params: {
command: string;
mode: "child" | "pty";
outcome: ExecProcessOutcome;
sessionKey?: string;
target: "host" | "sandbox";
}): void {
const exitSignal = normalizeExecExitSignal(params.outcome.exitSignal);
emitDiagnosticEvent({
type: "exec.process.completed",
target: params.target,
mode: params.mode,
outcome: params.outcome.status,
durationMs: params.outcome.durationMs,
commandLength: params.command.length,
...(params.sessionKey?.trim() ? { sessionKey: params.sessionKey.trim() } : {}),
...(typeof params.outcome.exitCode === "number" ? { exitCode: params.outcome.exitCode } : {}),
...(exitSignal ? { exitSignal } : {}),
...(params.outcome.status === "failed"
? {
timedOut: params.outcome.timedOut,
failureKind: params.outcome.failureKind,
}
: {}),
});
}
export function renderExecHostLabel(host: ExecHost) {
return host === "sandbox" ? "sandbox" : host === "gateway" ? "gateway" : "node";
}
@@ -523,6 +558,7 @@ export async function runExecProcess(opts: {
const startedAt = Date.now();
const sessionId = createSessionSlug();
const execCommand = opts.execCommand ?? opts.command;
const diagnosticTarget = opts.sandbox ? "sandbox" : "host";
const supervisor = getProcessSupervisor();
const shellRuntimeEnv: Record<string, string> = {
...opts.env,
@@ -759,11 +795,33 @@ export async function runExecProcess(opts: {
} catch (retryErr) {
markExited(session, null, null, "failed");
maybeNotifyOnExit(session, "failed");
emitExecProcessCompleted({
command: opts.command,
mode: "child",
outcome: buildExecRuntimeErrorOutcome({
error: retryErr,
aggregated: session.aggregated.trim(),
durationMs: Date.now() - startedAt,
}),
sessionKey: opts.sessionKey,
target: diagnosticTarget,
});
throw retryErr;
}
} else {
markExited(session, null, null, "failed");
maybeNotifyOnExit(session, "failed");
emitExecProcessCompleted({
command: opts.command,
mode: spawnSpec.mode,
outcome: buildExecRuntimeErrorOutcome({
error: err,
aggregated: session.aggregated.trim(),
durationMs: Date.now() - startedAt,
}),
sessionKey: opts.sessionKey,
target: diagnosticTarget,
});
throw err;
}
}
@@ -799,17 +857,32 @@ export async function runExecProcess(opts: {
token: sandboxFinalizeToken,
});
}
emitExecProcessCompleted({
command: opts.command,
mode: usingPty ? "pty" : "child",
outcome,
sessionKey: opts.sessionKey,
target: diagnosticTarget,
});
return outcome;
})
.catch((err): ExecProcessOutcome => {
updatesDisabled = true;
markExited(session, null, null, "failed");
maybeNotifyOnExit(session, "failed");
return buildExecRuntimeErrorOutcome({
const outcome = buildExecRuntimeErrorOutcome({
error: err,
aggregated: session.aggregated.trim(),
durationMs: Date.now() - startedAt,
});
emitExecProcessCompleted({
command: opts.command,
mode: usingPty ? "pty" : "child",
outcome,
sessionKey: opts.sessionKey,
target: diagnosticTarget,
});
return outcome;
});
return {

View File

@@ -185,6 +185,27 @@ export type DiagnosticToolExecutionErrorEvent = DiagnosticToolExecutionBaseEvent
errorCode?: string;
};
export type DiagnosticExecProcessCompletedEvent = DiagnosticBaseEvent & {
type: "exec.process.completed";
sessionKey?: string;
target: "host" | "sandbox";
mode: "child" | "pty";
outcome: "completed" | "failed";
durationMs: number;
commandLength: number;
exitCode?: number;
exitSignal?: string;
timedOut?: boolean;
failureKind?:
| "shell-command-not-found"
| "shell-not-executable"
| "overall-timeout"
| "no-output-timeout"
| "signal"
| "aborted"
| "runtime-error";
};
type DiagnosticRunBaseEvent = DiagnosticBaseEvent & {
runId: string;
sessionKey?: string;
@@ -299,6 +320,7 @@ export type DiagnosticEventPayload =
| DiagnosticToolExecutionStartedEvent
| DiagnosticToolExecutionCompletedEvent
| DiagnosticToolExecutionErrorEvent
| DiagnosticExecProcessCompletedEvent
| DiagnosticRunStartedEvent
| DiagnosticRunCompletedEvent
| DiagnosticModelCallStartedEvent
@@ -329,6 +351,7 @@ const ASYNC_DIAGNOSTIC_EVENT_TYPES = new Set<DiagnosticEventPayload["type"]>([
"tool.execution.started",
"tool.execution.completed",
"tool.execution.error",
"exec.process.completed",
"model.call.started",
"model.call.completed",
"model.call.error",

View File

@@ -17,10 +17,12 @@ export type DiagnosticStabilityEventRecord = {
channel?: string;
pluginId?: string;
source?: string;
target?: string;
surface?: string;
action?: string;
reason?: string;
outcome?: string;
mode?: string;
level?: string;
detector?: string;
toolName?: string;
@@ -28,6 +30,9 @@ export type DiagnosticStabilityEventRecord = {
provider?: string;
model?: string;
durationMs?: number;
commandLength?: number;
exitCode?: number;
timedOut?: boolean;
costUsd?: number;
count?: number;
bytes?: number;
@@ -247,6 +252,16 @@ function sanitizeDiagnosticEvent(event: DiagnosticEventPayload): DiagnosticStabi
record.durationMs = event.durationMs;
assignReasonCode(record, event.errorCategory);
break;
case "exec.process.completed":
record.target = event.target;
record.mode = event.mode;
record.outcome = event.outcome;
record.durationMs = event.durationMs;
record.commandLength = event.commandLength;
record.exitCode = event.exitCode;
record.timedOut = event.timedOut;
assignReasonCode(record, event.failureKind);
break;
case "run.started":
record.provider = event.provider;
record.model = event.model;