mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 16:50:43 +00:00
feat(diagnostics): emit exec process telemetry (#71451)
This commit is contained in:
@@ -6,6 +6,7 @@ Docs: https://docs.openclaw.ai
|
||||
|
||||
### Changes
|
||||
|
||||
- Diagnostics/OTEL: emit bounded exec-process diagnostics and export them as `openclaw.exec` spans without exposing command text, working directories, or container identifiers. (#70424) Thanks @jlapenna.
|
||||
- Diagnostics/OTEL: support `OPENCLAW_OTEL_PRELOADED=1` so the plugin can reuse an already-registered OpenTelemetry SDK while keeping OpenClaw diagnostic listeners wired. (#70424) Thanks @jlapenna.
|
||||
- Control UI: refine the agent Tool Access panel with compact live-tool chips, collapsible tool groups, direct per-tool toggles, and clearer runtime/source provenance. (#71405) Thanks @BunsDev.
|
||||
- Memory-core/hybrid search: expose raw `vectorScore` and `textScore` alongside the combined `score` on hybrid memory search results, so callers can inspect vector-versus-text retrieval contribution before temporal decay or MMR reordering. Fixes #68166. (#68286) Thanks @ajfonthemove.
|
||||
|
||||
@@ -216,6 +216,12 @@ Queue + session:
|
||||
- `run.attempt`: run retry/attempt metadata.
|
||||
- `diagnostic.heartbeat`: aggregate counters (webhooks/queue/session).
|
||||
|
||||
Exec:
|
||||
|
||||
- `exec.process.completed`: terminal exec process outcome, duration, target, mode,
|
||||
exit code, and failure kind. Command text and working directories are not
|
||||
included.
|
||||
|
||||
### Enable diagnostics (no exporter)
|
||||
|
||||
Use this if you want diagnostics events available to plugins or custom sinks:
|
||||
@@ -352,6 +358,11 @@ Queues + sessions:
|
||||
- `openclaw.session.stuck_age_ms` (histogram, attrs: `openclaw.state`)
|
||||
- `openclaw.run.attempt` (counter, attrs: `openclaw.attempt`)
|
||||
|
||||
Exec:
|
||||
|
||||
- `openclaw.exec.duration_ms` (histogram, attrs: `openclaw.exec.target`,
|
||||
`openclaw.exec.mode`, `openclaw.outcome`, `openclaw.failureKind`)
|
||||
|
||||
### Exported spans (names + key attributes)
|
||||
|
||||
- `openclaw.model.usage`
|
||||
@@ -367,6 +378,10 @@ Queues + sessions:
|
||||
- `openclaw.tool.execution`
|
||||
- `gen_ai.tool.name`, `openclaw.toolName`, `openclaw.errorCategory`,
|
||||
`openclaw.tool.params.*`
|
||||
- `openclaw.exec`
|
||||
- `openclaw.exec.target`, `openclaw.exec.mode`, `openclaw.outcome`,
|
||||
`openclaw.failureKind`, `openclaw.exec.command_length`,
|
||||
`openclaw.exec.exit_code`, `openclaw.exec.timed_out`
|
||||
- `openclaw.webhook.processed`
|
||||
- `openclaw.channel`, `openclaw.webhook`, `openclaw.chatId`
|
||||
- `openclaw.webhook.error`
|
||||
|
||||
@@ -817,6 +817,67 @@ describe("diagnostics-otel service", () => {
|
||||
await service.stop?.(ctx);
|
||||
});
|
||||
|
||||
test("exports exec process spans without command text", async () => {
|
||||
const service = createDiagnosticsOtelService();
|
||||
const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { traces: true, metrics: true });
|
||||
await service.start(ctx);
|
||||
|
||||
emitDiagnosticEvent({
|
||||
type: "exec.process.completed",
|
||||
target: "host",
|
||||
mode: "child",
|
||||
outcome: "failed",
|
||||
durationMs: 30,
|
||||
commandLength: 42,
|
||||
exitCode: 1,
|
||||
timedOut: false,
|
||||
failureKind: "runtime-error",
|
||||
});
|
||||
await flushDiagnosticEvents();
|
||||
|
||||
expect(telemetryState.histograms.get("openclaw.exec.duration_ms")?.record).toHaveBeenCalledWith(
|
||||
30,
|
||||
expect.objectContaining({
|
||||
"openclaw.exec.target": "host",
|
||||
"openclaw.exec.mode": "child",
|
||||
"openclaw.outcome": "failed",
|
||||
"openclaw.failureKind": "runtime-error",
|
||||
}),
|
||||
);
|
||||
|
||||
const execCall = telemetryState.tracer.startSpan.mock.calls.find(
|
||||
(call) => call[0] === "openclaw.exec",
|
||||
);
|
||||
expect(execCall?.[1]).toMatchObject({
|
||||
attributes: {
|
||||
"openclaw.exec.target": "host",
|
||||
"openclaw.exec.mode": "child",
|
||||
"openclaw.outcome": "failed",
|
||||
"openclaw.exec.command_length": 42,
|
||||
"openclaw.exec.exit_code": 1,
|
||||
"openclaw.exec.timed_out": false,
|
||||
"openclaw.failureKind": "runtime-error",
|
||||
},
|
||||
startTime: expect.any(Number),
|
||||
});
|
||||
expect(execCall?.[1]).toEqual({
|
||||
attributes: expect.not.objectContaining({
|
||||
"openclaw.exec.command": expect.anything(),
|
||||
"openclaw.exec.workdir": expect.anything(),
|
||||
"openclaw.sessionKey": expect.anything(),
|
||||
}),
|
||||
startTime: expect.any(Number),
|
||||
});
|
||||
|
||||
const execSpan = telemetryState.spans.find((span) => span.name === "openclaw.exec");
|
||||
expect(execSpan?.setStatus).toHaveBeenCalledWith({
|
||||
code: 2,
|
||||
message: "runtime-error",
|
||||
});
|
||||
expect(execSpan?.end).toHaveBeenCalledWith(expect.any(Number));
|
||||
await service.stop?.(ctx);
|
||||
});
|
||||
|
||||
test("does not export model or tool content unless capture is explicitly enabled", async () => {
|
||||
const service = createDiagnosticsOtelService();
|
||||
const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { traces: true, metrics: true });
|
||||
|
||||
@@ -557,6 +557,10 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
|
||||
description: "Tool execution duration",
|
||||
},
|
||||
);
|
||||
const execProcessDurationHistogram = meter.createHistogram("openclaw.exec.duration_ms", {
|
||||
unit: "ms",
|
||||
description: "Exec process duration",
|
||||
});
|
||||
|
||||
let recordLogRecord:
|
||||
| ((evt: Extract<DiagnosticEventPayload, { type: "log.record" }>) => void)
|
||||
@@ -1087,6 +1091,48 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
|
||||
span.end(evt.ts);
|
||||
};
|
||||
|
||||
const recordExecProcessCompleted = (
|
||||
evt: Extract<DiagnosticEventPayload, { type: "exec.process.completed" }>,
|
||||
) => {
|
||||
const attrs: Record<string, string | number> = {
|
||||
"openclaw.exec.target": evt.target,
|
||||
"openclaw.exec.mode": evt.mode,
|
||||
"openclaw.outcome": evt.outcome,
|
||||
};
|
||||
if (evt.failureKind) {
|
||||
attrs["openclaw.failureKind"] = evt.failureKind;
|
||||
}
|
||||
execProcessDurationHistogram.record(evt.durationMs, attrs);
|
||||
if (!tracesEnabled) {
|
||||
return;
|
||||
}
|
||||
|
||||
const spanAttrs: Record<string, string | number | boolean> = {
|
||||
...attrs,
|
||||
"openclaw.exec.command_length": evt.commandLength,
|
||||
};
|
||||
if (typeof evt.exitCode === "number") {
|
||||
spanAttrs["openclaw.exec.exit_code"] = evt.exitCode;
|
||||
}
|
||||
if (evt.exitSignal) {
|
||||
spanAttrs["openclaw.exec.exit_signal"] = lowCardinalityAttr(evt.exitSignal, "other");
|
||||
}
|
||||
if (evt.timedOut !== undefined) {
|
||||
spanAttrs["openclaw.exec.timed_out"] = evt.timedOut;
|
||||
}
|
||||
|
||||
const span = spanWithDuration("openclaw.exec", spanAttrs, evt.durationMs, {
|
||||
endTimeMs: evt.ts,
|
||||
});
|
||||
if (evt.outcome === "failed") {
|
||||
span.setStatus({
|
||||
code: SpanStatusCode.ERROR,
|
||||
...(evt.failureKind ? { message: evt.failureKind } : {}),
|
||||
});
|
||||
}
|
||||
span.end(evt.ts);
|
||||
};
|
||||
|
||||
const recordHeartbeat = (
|
||||
evt: Extract<DiagnosticEventPayload, { type: "diagnostic.heartbeat" }>,
|
||||
) => {
|
||||
@@ -1147,6 +1193,9 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
|
||||
case "tool.execution.error":
|
||||
recordToolExecutionError(evt);
|
||||
return;
|
||||
case "exec.process.completed":
|
||||
recordExecProcessCompleted(evt);
|
||||
return;
|
||||
case "log.record":
|
||||
recordLogRecord?.(evt);
|
||||
return;
|
||||
|
||||
@@ -1,4 +1,9 @@
|
||||
import { afterEach, beforeAll, beforeEach, expect, test, vi } from "vitest";
|
||||
import {
|
||||
onInternalDiagnosticEvent,
|
||||
resetDiagnosticEventsForTest,
|
||||
type DiagnosticEventPayload,
|
||||
} from "../infra/diagnostic-events.js";
|
||||
import type { ManagedRun, SpawnInput } from "../process/supervisor/index.js";
|
||||
|
||||
let listRunningSessions: typeof import("./bash-process-registry.js").listRunningSessions;
|
||||
@@ -56,6 +61,7 @@ beforeEach(() => {
|
||||
|
||||
afterEach(() => {
|
||||
resetProcessRegistryForTests();
|
||||
resetDiagnosticEventsForTest();
|
||||
vi.clearAllMocks();
|
||||
});
|
||||
|
||||
@@ -101,3 +107,53 @@ test("exec cleans session state when PTY fallback spawn also fails", async () =>
|
||||
|
||||
expect(listRunningSessions()).toHaveLength(0);
|
||||
});
|
||||
|
||||
function flushDiagnosticEvents() {
|
||||
return new Promise<void>((resolve) => setImmediate(resolve));
|
||||
}
|
||||
|
||||
test("exec emits bounded process diagnostics without command text", async () => {
|
||||
supervisorSpawnMock.mockImplementationOnce(async (input: SpawnInput) =>
|
||||
createSuccessfulRun(input),
|
||||
);
|
||||
const events: DiagnosticEventPayload[] = [];
|
||||
const unsubscribe = onInternalDiagnosticEvent((event) => {
|
||||
events.push(event);
|
||||
});
|
||||
try {
|
||||
const command = "printf super-secret-value";
|
||||
const handle = await runExecProcess({
|
||||
command,
|
||||
workdir: process.cwd(),
|
||||
env: {},
|
||||
usePty: false,
|
||||
warnings: [],
|
||||
maxOutput: 20_000,
|
||||
pendingMaxOutput: 20_000,
|
||||
notifyOnExit: false,
|
||||
sessionKey: "session-1",
|
||||
timeoutSec: 5,
|
||||
});
|
||||
|
||||
await handle.promise;
|
||||
await flushDiagnosticEvents();
|
||||
|
||||
const event = events.find((item) => item.type === "exec.process.completed");
|
||||
expect(event).toMatchObject({
|
||||
type: "exec.process.completed",
|
||||
target: "host",
|
||||
mode: "child",
|
||||
outcome: "completed",
|
||||
durationMs: expect.any(Number),
|
||||
commandLength: command.length,
|
||||
exitCode: 0,
|
||||
sessionKey: "session-1",
|
||||
});
|
||||
const serialized = JSON.stringify(event);
|
||||
expect(serialized).not.toContain("printf");
|
||||
expect(serialized).not.toContain("super-secret-value");
|
||||
expect(serialized).not.toContain(process.cwd());
|
||||
} finally {
|
||||
unsubscribe();
|
||||
}
|
||||
});
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import path from "node:path";
|
||||
import type { AgentToolResult } from "@mariozechner/pi-agent-core";
|
||||
import { emitDiagnosticEvent } from "../infra/diagnostic-events.js";
|
||||
import {
|
||||
DEFAULT_EXEC_APPROVAL_TIMEOUT_MS,
|
||||
resolveExecApprovalAllowedDecisions,
|
||||
@@ -165,6 +166,40 @@ export type ExecProcessHandle = {
|
||||
disableUpdates: () => void;
|
||||
};
|
||||
|
||||
function normalizeExecExitSignal(signal: NodeJS.Signals | number | null): string | undefined {
|
||||
if (signal === null) {
|
||||
return undefined;
|
||||
}
|
||||
return String(signal);
|
||||
}
|
||||
|
||||
function emitExecProcessCompleted(params: {
|
||||
command: string;
|
||||
mode: "child" | "pty";
|
||||
outcome: ExecProcessOutcome;
|
||||
sessionKey?: string;
|
||||
target: "host" | "sandbox";
|
||||
}): void {
|
||||
const exitSignal = normalizeExecExitSignal(params.outcome.exitSignal);
|
||||
emitDiagnosticEvent({
|
||||
type: "exec.process.completed",
|
||||
target: params.target,
|
||||
mode: params.mode,
|
||||
outcome: params.outcome.status,
|
||||
durationMs: params.outcome.durationMs,
|
||||
commandLength: params.command.length,
|
||||
...(params.sessionKey?.trim() ? { sessionKey: params.sessionKey.trim() } : {}),
|
||||
...(typeof params.outcome.exitCode === "number" ? { exitCode: params.outcome.exitCode } : {}),
|
||||
...(exitSignal ? { exitSignal } : {}),
|
||||
...(params.outcome.status === "failed"
|
||||
? {
|
||||
timedOut: params.outcome.timedOut,
|
||||
failureKind: params.outcome.failureKind,
|
||||
}
|
||||
: {}),
|
||||
});
|
||||
}
|
||||
|
||||
export function renderExecHostLabel(host: ExecHost) {
|
||||
return host === "sandbox" ? "sandbox" : host === "gateway" ? "gateway" : "node";
|
||||
}
|
||||
@@ -523,6 +558,7 @@ export async function runExecProcess(opts: {
|
||||
const startedAt = Date.now();
|
||||
const sessionId = createSessionSlug();
|
||||
const execCommand = opts.execCommand ?? opts.command;
|
||||
const diagnosticTarget = opts.sandbox ? "sandbox" : "host";
|
||||
const supervisor = getProcessSupervisor();
|
||||
const shellRuntimeEnv: Record<string, string> = {
|
||||
...opts.env,
|
||||
@@ -759,11 +795,33 @@ export async function runExecProcess(opts: {
|
||||
} catch (retryErr) {
|
||||
markExited(session, null, null, "failed");
|
||||
maybeNotifyOnExit(session, "failed");
|
||||
emitExecProcessCompleted({
|
||||
command: opts.command,
|
||||
mode: "child",
|
||||
outcome: buildExecRuntimeErrorOutcome({
|
||||
error: retryErr,
|
||||
aggregated: session.aggregated.trim(),
|
||||
durationMs: Date.now() - startedAt,
|
||||
}),
|
||||
sessionKey: opts.sessionKey,
|
||||
target: diagnosticTarget,
|
||||
});
|
||||
throw retryErr;
|
||||
}
|
||||
} else {
|
||||
markExited(session, null, null, "failed");
|
||||
maybeNotifyOnExit(session, "failed");
|
||||
emitExecProcessCompleted({
|
||||
command: opts.command,
|
||||
mode: spawnSpec.mode,
|
||||
outcome: buildExecRuntimeErrorOutcome({
|
||||
error: err,
|
||||
aggregated: session.aggregated.trim(),
|
||||
durationMs: Date.now() - startedAt,
|
||||
}),
|
||||
sessionKey: opts.sessionKey,
|
||||
target: diagnosticTarget,
|
||||
});
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
@@ -799,17 +857,32 @@ export async function runExecProcess(opts: {
|
||||
token: sandboxFinalizeToken,
|
||||
});
|
||||
}
|
||||
emitExecProcessCompleted({
|
||||
command: opts.command,
|
||||
mode: usingPty ? "pty" : "child",
|
||||
outcome,
|
||||
sessionKey: opts.sessionKey,
|
||||
target: diagnosticTarget,
|
||||
});
|
||||
return outcome;
|
||||
})
|
||||
.catch((err): ExecProcessOutcome => {
|
||||
updatesDisabled = true;
|
||||
markExited(session, null, null, "failed");
|
||||
maybeNotifyOnExit(session, "failed");
|
||||
return buildExecRuntimeErrorOutcome({
|
||||
const outcome = buildExecRuntimeErrorOutcome({
|
||||
error: err,
|
||||
aggregated: session.aggregated.trim(),
|
||||
durationMs: Date.now() - startedAt,
|
||||
});
|
||||
emitExecProcessCompleted({
|
||||
command: opts.command,
|
||||
mode: usingPty ? "pty" : "child",
|
||||
outcome,
|
||||
sessionKey: opts.sessionKey,
|
||||
target: diagnosticTarget,
|
||||
});
|
||||
return outcome;
|
||||
});
|
||||
|
||||
return {
|
||||
|
||||
@@ -185,6 +185,27 @@ export type DiagnosticToolExecutionErrorEvent = DiagnosticToolExecutionBaseEvent
|
||||
errorCode?: string;
|
||||
};
|
||||
|
||||
export type DiagnosticExecProcessCompletedEvent = DiagnosticBaseEvent & {
|
||||
type: "exec.process.completed";
|
||||
sessionKey?: string;
|
||||
target: "host" | "sandbox";
|
||||
mode: "child" | "pty";
|
||||
outcome: "completed" | "failed";
|
||||
durationMs: number;
|
||||
commandLength: number;
|
||||
exitCode?: number;
|
||||
exitSignal?: string;
|
||||
timedOut?: boolean;
|
||||
failureKind?:
|
||||
| "shell-command-not-found"
|
||||
| "shell-not-executable"
|
||||
| "overall-timeout"
|
||||
| "no-output-timeout"
|
||||
| "signal"
|
||||
| "aborted"
|
||||
| "runtime-error";
|
||||
};
|
||||
|
||||
type DiagnosticRunBaseEvent = DiagnosticBaseEvent & {
|
||||
runId: string;
|
||||
sessionKey?: string;
|
||||
@@ -299,6 +320,7 @@ export type DiagnosticEventPayload =
|
||||
| DiagnosticToolExecutionStartedEvent
|
||||
| DiagnosticToolExecutionCompletedEvent
|
||||
| DiagnosticToolExecutionErrorEvent
|
||||
| DiagnosticExecProcessCompletedEvent
|
||||
| DiagnosticRunStartedEvent
|
||||
| DiagnosticRunCompletedEvent
|
||||
| DiagnosticModelCallStartedEvent
|
||||
@@ -329,6 +351,7 @@ const ASYNC_DIAGNOSTIC_EVENT_TYPES = new Set<DiagnosticEventPayload["type"]>([
|
||||
"tool.execution.started",
|
||||
"tool.execution.completed",
|
||||
"tool.execution.error",
|
||||
"exec.process.completed",
|
||||
"model.call.started",
|
||||
"model.call.completed",
|
||||
"model.call.error",
|
||||
|
||||
@@ -17,10 +17,12 @@ export type DiagnosticStabilityEventRecord = {
|
||||
channel?: string;
|
||||
pluginId?: string;
|
||||
source?: string;
|
||||
target?: string;
|
||||
surface?: string;
|
||||
action?: string;
|
||||
reason?: string;
|
||||
outcome?: string;
|
||||
mode?: string;
|
||||
level?: string;
|
||||
detector?: string;
|
||||
toolName?: string;
|
||||
@@ -28,6 +30,9 @@ export type DiagnosticStabilityEventRecord = {
|
||||
provider?: string;
|
||||
model?: string;
|
||||
durationMs?: number;
|
||||
commandLength?: number;
|
||||
exitCode?: number;
|
||||
timedOut?: boolean;
|
||||
costUsd?: number;
|
||||
count?: number;
|
||||
bytes?: number;
|
||||
@@ -247,6 +252,16 @@ function sanitizeDiagnosticEvent(event: DiagnosticEventPayload): DiagnosticStabi
|
||||
record.durationMs = event.durationMs;
|
||||
assignReasonCode(record, event.errorCategory);
|
||||
break;
|
||||
case "exec.process.completed":
|
||||
record.target = event.target;
|
||||
record.mode = event.mode;
|
||||
record.outcome = event.outcome;
|
||||
record.durationMs = event.durationMs;
|
||||
record.commandLength = event.commandLength;
|
||||
record.exitCode = event.exitCode;
|
||||
record.timedOut = event.timedOut;
|
||||
assignReasonCode(record, event.failureKind);
|
||||
break;
|
||||
case "run.started":
|
||||
record.provider = event.provider;
|
||||
record.model = event.model;
|
||||
|
||||
Reference in New Issue
Block a user