diff --git a/CHANGELOG.md b/CHANGELOG.md index 563d3039271..779c4dbbd67 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ Docs: https://docs.openclaw.ai ### Changes +- Diagnostics/OTEL: emit bounded exec-process diagnostics and export them as `openclaw.exec` spans without exposing command text, working directories, or container identifiers. (#70424) Thanks @jlapenna. - Diagnostics/OTEL: support `OPENCLAW_OTEL_PRELOADED=1` so the plugin can reuse an already-registered OpenTelemetry SDK while keeping OpenClaw diagnostic listeners wired. (#70424) Thanks @jlapenna. - Control UI: refine the agent Tool Access panel with compact live-tool chips, collapsible tool groups, direct per-tool toggles, and clearer runtime/source provenance. (#71405) Thanks @BunsDev. - Memory-core/hybrid search: expose raw `vectorScore` and `textScore` alongside the combined `score` on hybrid memory search results, so callers can inspect vector-versus-text retrieval contribution before temporal decay or MMR reordering. Fixes #68166. (#68286) Thanks @ajfonthemove. diff --git a/docs/logging.md b/docs/logging.md index 210b3054137..3680eb9c3ee 100644 --- a/docs/logging.md +++ b/docs/logging.md @@ -216,6 +216,12 @@ Queue + session: - `run.attempt`: run retry/attempt metadata. - `diagnostic.heartbeat`: aggregate counters (webhooks/queue/session). +Exec: + +- `exec.process.completed`: terminal exec process outcome, duration, target, mode, + exit code, and failure kind. Command text and working directories are not + included. + ### Enable diagnostics (no exporter) Use this if you want diagnostics events available to plugins or custom sinks: @@ -352,6 +358,11 @@ Queues + sessions: - `openclaw.session.stuck_age_ms` (histogram, attrs: `openclaw.state`) - `openclaw.run.attempt` (counter, attrs: `openclaw.attempt`) +Exec: + +- `openclaw.exec.duration_ms` (histogram, attrs: `openclaw.exec.target`, + `openclaw.exec.mode`, `openclaw.outcome`, `openclaw.failureKind`) + ### Exported spans (names + key attributes) - `openclaw.model.usage` @@ -367,6 +378,10 @@ Queues + sessions: - `openclaw.tool.execution` - `gen_ai.tool.name`, `openclaw.toolName`, `openclaw.errorCategory`, `openclaw.tool.params.*` +- `openclaw.exec` + - `openclaw.exec.target`, `openclaw.exec.mode`, `openclaw.outcome`, + `openclaw.failureKind`, `openclaw.exec.command_length`, + `openclaw.exec.exit_code`, `openclaw.exec.timed_out` - `openclaw.webhook.processed` - `openclaw.channel`, `openclaw.webhook`, `openclaw.chatId` - `openclaw.webhook.error` diff --git a/extensions/diagnostics-otel/src/service.test.ts b/extensions/diagnostics-otel/src/service.test.ts index a11f969ca0c..976f64b23bb 100644 --- a/extensions/diagnostics-otel/src/service.test.ts +++ b/extensions/diagnostics-otel/src/service.test.ts @@ -817,6 +817,67 @@ describe("diagnostics-otel service", () => { await service.stop?.(ctx); }); + test("exports exec process spans without command text", async () => { + const service = createDiagnosticsOtelService(); + const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { traces: true, metrics: true }); + await service.start(ctx); + + emitDiagnosticEvent({ + type: "exec.process.completed", + target: "host", + mode: "child", + outcome: "failed", + durationMs: 30, + commandLength: 42, + exitCode: 1, + timedOut: false, + failureKind: "runtime-error", + }); + await flushDiagnosticEvents(); + + expect(telemetryState.histograms.get("openclaw.exec.duration_ms")?.record).toHaveBeenCalledWith( + 30, + expect.objectContaining({ + "openclaw.exec.target": "host", + "openclaw.exec.mode": "child", + "openclaw.outcome": "failed", + "openclaw.failureKind": "runtime-error", + }), + ); + + const execCall = telemetryState.tracer.startSpan.mock.calls.find( + (call) => call[0] === "openclaw.exec", + ); + expect(execCall?.[1]).toMatchObject({ + attributes: { + "openclaw.exec.target": "host", + "openclaw.exec.mode": "child", + "openclaw.outcome": "failed", + "openclaw.exec.command_length": 42, + "openclaw.exec.exit_code": 1, + "openclaw.exec.timed_out": false, + "openclaw.failureKind": "runtime-error", + }, + startTime: expect.any(Number), + }); + expect(execCall?.[1]).toEqual({ + attributes: expect.not.objectContaining({ + "openclaw.exec.command": expect.anything(), + "openclaw.exec.workdir": expect.anything(), + "openclaw.sessionKey": expect.anything(), + }), + startTime: expect.any(Number), + }); + + const execSpan = telemetryState.spans.find((span) => span.name === "openclaw.exec"); + expect(execSpan?.setStatus).toHaveBeenCalledWith({ + code: 2, + message: "runtime-error", + }); + expect(execSpan?.end).toHaveBeenCalledWith(expect.any(Number)); + await service.stop?.(ctx); + }); + test("does not export model or tool content unless capture is explicitly enabled", async () => { const service = createDiagnosticsOtelService(); const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { traces: true, metrics: true }); diff --git a/extensions/diagnostics-otel/src/service.ts b/extensions/diagnostics-otel/src/service.ts index 8f08472b07d..4a63637f1e2 100644 --- a/extensions/diagnostics-otel/src/service.ts +++ b/extensions/diagnostics-otel/src/service.ts @@ -557,6 +557,10 @@ export function createDiagnosticsOtelService(): OpenClawPluginService { description: "Tool execution duration", }, ); + const execProcessDurationHistogram = meter.createHistogram("openclaw.exec.duration_ms", { + unit: "ms", + description: "Exec process duration", + }); let recordLogRecord: | ((evt: Extract) => void) @@ -1087,6 +1091,48 @@ export function createDiagnosticsOtelService(): OpenClawPluginService { span.end(evt.ts); }; + const recordExecProcessCompleted = ( + evt: Extract, + ) => { + const attrs: Record = { + "openclaw.exec.target": evt.target, + "openclaw.exec.mode": evt.mode, + "openclaw.outcome": evt.outcome, + }; + if (evt.failureKind) { + attrs["openclaw.failureKind"] = evt.failureKind; + } + execProcessDurationHistogram.record(evt.durationMs, attrs); + if (!tracesEnabled) { + return; + } + + const spanAttrs: Record = { + ...attrs, + "openclaw.exec.command_length": evt.commandLength, + }; + if (typeof evt.exitCode === "number") { + spanAttrs["openclaw.exec.exit_code"] = evt.exitCode; + } + if (evt.exitSignal) { + spanAttrs["openclaw.exec.exit_signal"] = lowCardinalityAttr(evt.exitSignal, "other"); + } + if (evt.timedOut !== undefined) { + spanAttrs["openclaw.exec.timed_out"] = evt.timedOut; + } + + const span = spanWithDuration("openclaw.exec", spanAttrs, evt.durationMs, { + endTimeMs: evt.ts, + }); + if (evt.outcome === "failed") { + span.setStatus({ + code: SpanStatusCode.ERROR, + ...(evt.failureKind ? { message: evt.failureKind } : {}), + }); + } + span.end(evt.ts); + }; + const recordHeartbeat = ( evt: Extract, ) => { @@ -1147,6 +1193,9 @@ export function createDiagnosticsOtelService(): OpenClawPluginService { case "tool.execution.error": recordToolExecutionError(evt); return; + case "exec.process.completed": + recordExecProcessCompleted(evt); + return; case "log.record": recordLogRecord?.(evt); return; diff --git a/src/agents/bash-tools.exec-runtime.pty-fallback.test.ts b/src/agents/bash-tools.exec-runtime.pty-fallback.test.ts index c59570f2f46..eef7d50b059 100644 --- a/src/agents/bash-tools.exec-runtime.pty-fallback.test.ts +++ b/src/agents/bash-tools.exec-runtime.pty-fallback.test.ts @@ -1,4 +1,9 @@ import { afterEach, beforeAll, beforeEach, expect, test, vi } from "vitest"; +import { + onInternalDiagnosticEvent, + resetDiagnosticEventsForTest, + type DiagnosticEventPayload, +} from "../infra/diagnostic-events.js"; import type { ManagedRun, SpawnInput } from "../process/supervisor/index.js"; let listRunningSessions: typeof import("./bash-process-registry.js").listRunningSessions; @@ -56,6 +61,7 @@ beforeEach(() => { afterEach(() => { resetProcessRegistryForTests(); + resetDiagnosticEventsForTest(); vi.clearAllMocks(); }); @@ -101,3 +107,53 @@ test("exec cleans session state when PTY fallback spawn also fails", async () => expect(listRunningSessions()).toHaveLength(0); }); + +function flushDiagnosticEvents() { + return new Promise((resolve) => setImmediate(resolve)); +} + +test("exec emits bounded process diagnostics without command text", async () => { + supervisorSpawnMock.mockImplementationOnce(async (input: SpawnInput) => + createSuccessfulRun(input), + ); + const events: DiagnosticEventPayload[] = []; + const unsubscribe = onInternalDiagnosticEvent((event) => { + events.push(event); + }); + try { + const command = "printf super-secret-value"; + const handle = await runExecProcess({ + command, + workdir: process.cwd(), + env: {}, + usePty: false, + warnings: [], + maxOutput: 20_000, + pendingMaxOutput: 20_000, + notifyOnExit: false, + sessionKey: "session-1", + timeoutSec: 5, + }); + + await handle.promise; + await flushDiagnosticEvents(); + + const event = events.find((item) => item.type === "exec.process.completed"); + expect(event).toMatchObject({ + type: "exec.process.completed", + target: "host", + mode: "child", + outcome: "completed", + durationMs: expect.any(Number), + commandLength: command.length, + exitCode: 0, + sessionKey: "session-1", + }); + const serialized = JSON.stringify(event); + expect(serialized).not.toContain("printf"); + expect(serialized).not.toContain("super-secret-value"); + expect(serialized).not.toContain(process.cwd()); + } finally { + unsubscribe(); + } +}); diff --git a/src/agents/bash-tools.exec-runtime.ts b/src/agents/bash-tools.exec-runtime.ts index bc9529a454a..deb20bced03 100644 --- a/src/agents/bash-tools.exec-runtime.ts +++ b/src/agents/bash-tools.exec-runtime.ts @@ -1,5 +1,6 @@ import path from "node:path"; import type { AgentToolResult } from "@mariozechner/pi-agent-core"; +import { emitDiagnosticEvent } from "../infra/diagnostic-events.js"; import { DEFAULT_EXEC_APPROVAL_TIMEOUT_MS, resolveExecApprovalAllowedDecisions, @@ -165,6 +166,40 @@ export type ExecProcessHandle = { disableUpdates: () => void; }; +function normalizeExecExitSignal(signal: NodeJS.Signals | number | null): string | undefined { + if (signal === null) { + return undefined; + } + return String(signal); +} + +function emitExecProcessCompleted(params: { + command: string; + mode: "child" | "pty"; + outcome: ExecProcessOutcome; + sessionKey?: string; + target: "host" | "sandbox"; +}): void { + const exitSignal = normalizeExecExitSignal(params.outcome.exitSignal); + emitDiagnosticEvent({ + type: "exec.process.completed", + target: params.target, + mode: params.mode, + outcome: params.outcome.status, + durationMs: params.outcome.durationMs, + commandLength: params.command.length, + ...(params.sessionKey?.trim() ? { sessionKey: params.sessionKey.trim() } : {}), + ...(typeof params.outcome.exitCode === "number" ? { exitCode: params.outcome.exitCode } : {}), + ...(exitSignal ? { exitSignal } : {}), + ...(params.outcome.status === "failed" + ? { + timedOut: params.outcome.timedOut, + failureKind: params.outcome.failureKind, + } + : {}), + }); +} + export function renderExecHostLabel(host: ExecHost) { return host === "sandbox" ? "sandbox" : host === "gateway" ? "gateway" : "node"; } @@ -523,6 +558,7 @@ export async function runExecProcess(opts: { const startedAt = Date.now(); const sessionId = createSessionSlug(); const execCommand = opts.execCommand ?? opts.command; + const diagnosticTarget = opts.sandbox ? "sandbox" : "host"; const supervisor = getProcessSupervisor(); const shellRuntimeEnv: Record = { ...opts.env, @@ -759,11 +795,33 @@ export async function runExecProcess(opts: { } catch (retryErr) { markExited(session, null, null, "failed"); maybeNotifyOnExit(session, "failed"); + emitExecProcessCompleted({ + command: opts.command, + mode: "child", + outcome: buildExecRuntimeErrorOutcome({ + error: retryErr, + aggregated: session.aggregated.trim(), + durationMs: Date.now() - startedAt, + }), + sessionKey: opts.sessionKey, + target: diagnosticTarget, + }); throw retryErr; } } else { markExited(session, null, null, "failed"); maybeNotifyOnExit(session, "failed"); + emitExecProcessCompleted({ + command: opts.command, + mode: spawnSpec.mode, + outcome: buildExecRuntimeErrorOutcome({ + error: err, + aggregated: session.aggregated.trim(), + durationMs: Date.now() - startedAt, + }), + sessionKey: opts.sessionKey, + target: diagnosticTarget, + }); throw err; } } @@ -799,17 +857,32 @@ export async function runExecProcess(opts: { token: sandboxFinalizeToken, }); } + emitExecProcessCompleted({ + command: opts.command, + mode: usingPty ? "pty" : "child", + outcome, + sessionKey: opts.sessionKey, + target: diagnosticTarget, + }); return outcome; }) .catch((err): ExecProcessOutcome => { updatesDisabled = true; markExited(session, null, null, "failed"); maybeNotifyOnExit(session, "failed"); - return buildExecRuntimeErrorOutcome({ + const outcome = buildExecRuntimeErrorOutcome({ error: err, aggregated: session.aggregated.trim(), durationMs: Date.now() - startedAt, }); + emitExecProcessCompleted({ + command: opts.command, + mode: usingPty ? "pty" : "child", + outcome, + sessionKey: opts.sessionKey, + target: diagnosticTarget, + }); + return outcome; }); return { diff --git a/src/infra/diagnostic-events.ts b/src/infra/diagnostic-events.ts index d1a29993487..ba0635199ca 100644 --- a/src/infra/diagnostic-events.ts +++ b/src/infra/diagnostic-events.ts @@ -185,6 +185,27 @@ export type DiagnosticToolExecutionErrorEvent = DiagnosticToolExecutionBaseEvent errorCode?: string; }; +export type DiagnosticExecProcessCompletedEvent = DiagnosticBaseEvent & { + type: "exec.process.completed"; + sessionKey?: string; + target: "host" | "sandbox"; + mode: "child" | "pty"; + outcome: "completed" | "failed"; + durationMs: number; + commandLength: number; + exitCode?: number; + exitSignal?: string; + timedOut?: boolean; + failureKind?: + | "shell-command-not-found" + | "shell-not-executable" + | "overall-timeout" + | "no-output-timeout" + | "signal" + | "aborted" + | "runtime-error"; +}; + type DiagnosticRunBaseEvent = DiagnosticBaseEvent & { runId: string; sessionKey?: string; @@ -299,6 +320,7 @@ export type DiagnosticEventPayload = | DiagnosticToolExecutionStartedEvent | DiagnosticToolExecutionCompletedEvent | DiagnosticToolExecutionErrorEvent + | DiagnosticExecProcessCompletedEvent | DiagnosticRunStartedEvent | DiagnosticRunCompletedEvent | DiagnosticModelCallStartedEvent @@ -329,6 +351,7 @@ const ASYNC_DIAGNOSTIC_EVENT_TYPES = new Set([ "tool.execution.started", "tool.execution.completed", "tool.execution.error", + "exec.process.completed", "model.call.started", "model.call.completed", "model.call.error", diff --git a/src/logging/diagnostic-stability.ts b/src/logging/diagnostic-stability.ts index 40051a6ec9c..c3f3f1e840d 100644 --- a/src/logging/diagnostic-stability.ts +++ b/src/logging/diagnostic-stability.ts @@ -17,10 +17,12 @@ export type DiagnosticStabilityEventRecord = { channel?: string; pluginId?: string; source?: string; + target?: string; surface?: string; action?: string; reason?: string; outcome?: string; + mode?: string; level?: string; detector?: string; toolName?: string; @@ -28,6 +30,9 @@ export type DiagnosticStabilityEventRecord = { provider?: string; model?: string; durationMs?: number; + commandLength?: number; + exitCode?: number; + timedOut?: boolean; costUsd?: number; count?: number; bytes?: number; @@ -247,6 +252,16 @@ function sanitizeDiagnosticEvent(event: DiagnosticEventPayload): DiagnosticStabi record.durationMs = event.durationMs; assignReasonCode(record, event.errorCategory); break; + case "exec.process.completed": + record.target = event.target; + record.mode = event.mode; + record.outcome = event.outcome; + record.durationMs = event.durationMs; + record.commandLength = event.commandLength; + record.exitCode = event.exitCode; + record.timedOut = event.timedOut; + assignReasonCode(record, event.failureKind); + break; case "run.started": record.provider = event.provider; record.model = event.model;