feat(diagnostics): emit exec process telemetry (#71451)

2026-05-06 16:50:43 +00:00 · 2026-04-25 00:12:58 -07:00
parent 188bce424b
commit 3e3bba4f30
8 changed files with 294 additions and 1 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,7 @@ Docs: https://docs.openclaw.ai

 ### Changes

+- Diagnostics/OTEL: emit bounded exec-process diagnostics and export them as `openclaw.exec` spans without exposing command text, working directories, or container identifiers. (#70424) Thanks @jlapenna.
 - Diagnostics/OTEL: support `OPENCLAW_OTEL_PRELOADED=1` so the plugin can reuse an already-registered OpenTelemetry SDK while keeping OpenClaw diagnostic listeners wired. (#70424) Thanks @jlapenna.
 - Control UI: refine the agent Tool Access panel with compact live-tool chips, collapsible tool groups, direct per-tool toggles, and clearer runtime/source provenance. (#71405) Thanks @BunsDev.
 - Memory-core/hybrid search: expose raw `vectorScore` and `textScore` alongside the combined `score` on hybrid memory search results, so callers can inspect vector-versus-text retrieval contribution before temporal decay or MMR reordering. Fixes #68166. (#68286) Thanks @ajfonthemove.
--- a/docs/logging.md
+++ b/docs/logging.md
@@ -216,6 +216,12 @@ Queue + session:
 - `run.attempt`: run retry/attempt metadata.
 - `diagnostic.heartbeat`: aggregate counters (webhooks/queue/session).

+Exec:
+
+- `exec.process.completed`: terminal exec process outcome, duration, target, mode,
+  exit code, and failure kind. Command text and working directories are not
+  included.
+
 ### Enable diagnostics (no exporter)

 Use this if you want diagnostics events available to plugins or custom sinks:
@@ -352,6 +358,11 @@ Queues + sessions:
 - `openclaw.session.stuck_age_ms` (histogram, attrs: `openclaw.state`)
 - `openclaw.run.attempt` (counter, attrs: `openclaw.attempt`)

+Exec:
+
+- `openclaw.exec.duration_ms` (histogram, attrs: `openclaw.exec.target`,
+  `openclaw.exec.mode`, `openclaw.outcome`, `openclaw.failureKind`)
+
 ### Exported spans (names + key attributes)

 - `openclaw.model.usage`
@@ -367,6 +378,10 @@ Queues + sessions:
 - `openclaw.tool.execution`
  - `gen_ai.tool.name`, `openclaw.toolName`, `openclaw.errorCategory`,
    `openclaw.tool.params.*`
+- `openclaw.exec`
+  - `openclaw.exec.target`, `openclaw.exec.mode`, `openclaw.outcome`,
+    `openclaw.failureKind`, `openclaw.exec.command_length`,
+    `openclaw.exec.exit_code`, `openclaw.exec.timed_out`
 - `openclaw.webhook.processed`
  - `openclaw.channel`, `openclaw.webhook`, `openclaw.chatId`
 - `openclaw.webhook.error`
--- a/extensions/diagnostics-otel/src/service.test.ts
+++ b/extensions/diagnostics-otel/src/service.test.ts
@@ -817,6 +817,67 @@ describe("diagnostics-otel service", () => {
    await service.stop?.(ctx);
  });

+  test("exports exec process spans without command text", async () => {
+    const service = createDiagnosticsOtelService();
+    const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { traces: true, metrics: true });
+    await service.start(ctx);
+
+    emitDiagnosticEvent({
+      type: "exec.process.completed",
+      target: "host",
+      mode: "child",
+      outcome: "failed",
+      durationMs: 30,
+      commandLength: 42,
+      exitCode: 1,
+      timedOut: false,
+      failureKind: "runtime-error",
+    });
+    await flushDiagnosticEvents();
+
+    expect(telemetryState.histograms.get("openclaw.exec.duration_ms")?.record).toHaveBeenCalledWith(
+      30,
+      expect.objectContaining({
+        "openclaw.exec.target": "host",
+        "openclaw.exec.mode": "child",
+        "openclaw.outcome": "failed",
+        "openclaw.failureKind": "runtime-error",
+      }),
+    );
+
+    const execCall = telemetryState.tracer.startSpan.mock.calls.find(
+      (call) => call[0] === "openclaw.exec",
+    );
+    expect(execCall?.[1]).toMatchObject({
+      attributes: {
+        "openclaw.exec.target": "host",
+        "openclaw.exec.mode": "child",
+        "openclaw.outcome": "failed",
+        "openclaw.exec.command_length": 42,
+        "openclaw.exec.exit_code": 1,
+        "openclaw.exec.timed_out": false,
+        "openclaw.failureKind": "runtime-error",
+      },
+      startTime: expect.any(Number),
+    });
+    expect(execCall?.[1]).toEqual({
+      attributes: expect.not.objectContaining({
+        "openclaw.exec.command": expect.anything(),
+        "openclaw.exec.workdir": expect.anything(),
+        "openclaw.sessionKey": expect.anything(),
+      }),
+      startTime: expect.any(Number),
+    });
+
+    const execSpan = telemetryState.spans.find((span) => span.name === "openclaw.exec");
+    expect(execSpan?.setStatus).toHaveBeenCalledWith({
+      code: 2,
+      message: "runtime-error",
+    });
+    expect(execSpan?.end).toHaveBeenCalledWith(expect.any(Number));
+    await service.stop?.(ctx);
+  });
+
  test("does not export model or tool content unless capture is explicitly enabled", async () => {
    const service = createDiagnosticsOtelService();
    const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { traces: true, metrics: true });
--- a/extensions/diagnostics-otel/src/service.ts
+++ b/extensions/diagnostics-otel/src/service.ts
@@ -557,6 +557,10 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
          description: "Tool execution duration",
        },
      );
+      const execProcessDurationHistogram = meter.createHistogram("openclaw.exec.duration_ms", {
+        unit: "ms",
+        description: "Exec process duration",
+      });

      let recordLogRecord:
        | ((evt: Extract<DiagnosticEventPayload, { type: "log.record" }>) => void)
@@ -1087,6 +1091,48 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
        span.end(evt.ts);
      };

+      const recordExecProcessCompleted = (
+        evt: Extract<DiagnosticEventPayload, { type: "exec.process.completed" }>,
+      ) => {
+        const attrs: Record<string, string | number> = {
+          "openclaw.exec.target": evt.target,
+          "openclaw.exec.mode": evt.mode,
+          "openclaw.outcome": evt.outcome,
+        };
+        if (evt.failureKind) {
+          attrs["openclaw.failureKind"] = evt.failureKind;
+        }
+        execProcessDurationHistogram.record(evt.durationMs, attrs);
+        if (!tracesEnabled) {
+          return;
+        }
+
+        const spanAttrs: Record<string, string | number | boolean> = {
+          ...attrs,
+          "openclaw.exec.command_length": evt.commandLength,
+        };
+        if (typeof evt.exitCode === "number") {
+          spanAttrs["openclaw.exec.exit_code"] = evt.exitCode;
+        }
+        if (evt.exitSignal) {
+          spanAttrs["openclaw.exec.exit_signal"] = lowCardinalityAttr(evt.exitSignal, "other");
+        }
+        if (evt.timedOut !== undefined) {
+          spanAttrs["openclaw.exec.timed_out"] = evt.timedOut;
+        }
+
+        const span = spanWithDuration("openclaw.exec", spanAttrs, evt.durationMs, {
+          endTimeMs: evt.ts,
+        });
+        if (evt.outcome === "failed") {
+          span.setStatus({
+            code: SpanStatusCode.ERROR,
+            ...(evt.failureKind ? { message: evt.failureKind } : {}),
+          });
+        }
+        span.end(evt.ts);
+      };
+
      const recordHeartbeat = (
        evt: Extract<DiagnosticEventPayload, { type: "diagnostic.heartbeat" }>,
      ) => {
@@ -1147,6 +1193,9 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
            case "tool.execution.error":
              recordToolExecutionError(evt);
              return;
+            case "exec.process.completed":
+              recordExecProcessCompleted(evt);
+              return;
            case "log.record":
              recordLogRecord?.(evt);
              return;
--- a/src/agents/bash-tools.exec-runtime.pty-fallback.test.ts
+++ b/src/agents/bash-tools.exec-runtime.pty-fallback.test.ts
@@ -1,4 +1,9 @@
 import { afterEach, beforeAll, beforeEach, expect, test, vi } from "vitest";
+import {
+  onInternalDiagnosticEvent,
+  resetDiagnosticEventsForTest,
+  type DiagnosticEventPayload,
+} from "../infra/diagnostic-events.js";
 import type { ManagedRun, SpawnInput } from "../process/supervisor/index.js";

 let listRunningSessions: typeof import("./bash-process-registry.js").listRunningSessions;
@@ -56,6 +61,7 @@ beforeEach(() => {

 afterEach(() => {
  resetProcessRegistryForTests();
+  resetDiagnosticEventsForTest();
  vi.clearAllMocks();
 });

@@ -101,3 +107,53 @@ test("exec cleans session state when PTY fallback spawn also fails", async () =>

  expect(listRunningSessions()).toHaveLength(0);
 });
+
+function flushDiagnosticEvents() {
+  return new Promise<void>((resolve) => setImmediate(resolve));
+}
+
+test("exec emits bounded process diagnostics without command text", async () => {
+  supervisorSpawnMock.mockImplementationOnce(async (input: SpawnInput) =>
+    createSuccessfulRun(input),
+  );
+  const events: DiagnosticEventPayload[] = [];
+  const unsubscribe = onInternalDiagnosticEvent((event) => {
+    events.push(event);
+  });
+  try {
+    const command = "printf super-secret-value";
+    const handle = await runExecProcess({
+      command,
+      workdir: process.cwd(),
+      env: {},
+      usePty: false,
+      warnings: [],
+      maxOutput: 20_000,
+      pendingMaxOutput: 20_000,
+      notifyOnExit: false,
+      sessionKey: "session-1",
+      timeoutSec: 5,
+    });
+
+    await handle.promise;
+    await flushDiagnosticEvents();
+
+    const event = events.find((item) => item.type === "exec.process.completed");
+    expect(event).toMatchObject({
+      type: "exec.process.completed",
+      target: "host",
+      mode: "child",
+      outcome: "completed",
+      durationMs: expect.any(Number),
+      commandLength: command.length,
+      exitCode: 0,
+      sessionKey: "session-1",
+    });
+    const serialized = JSON.stringify(event);
+    expect(serialized).not.toContain("printf");
+    expect(serialized).not.toContain("super-secret-value");
+    expect(serialized).not.toContain(process.cwd());
+  } finally {
+    unsubscribe();
+  }
+});
--- a/src/agents/bash-tools.exec-runtime.ts
+++ b/src/agents/bash-tools.exec-runtime.ts
@@ -1,5 +1,6 @@
 import path from "node:path";
 import type { AgentToolResult } from "@mariozechner/pi-agent-core";
+import { emitDiagnosticEvent } from "../infra/diagnostic-events.js";
 import {
  DEFAULT_EXEC_APPROVAL_TIMEOUT_MS,
  resolveExecApprovalAllowedDecisions,
@@ -165,6 +166,40 @@ export type ExecProcessHandle = {
  disableUpdates: () => void;
 };

+function normalizeExecExitSignal(signal: NodeJS.Signals | number | null): string | undefined {
+  if (signal === null) {
+    return undefined;
+  }
+  return String(signal);
+}
+
+function emitExecProcessCompleted(params: {
+  command: string;
+  mode: "child" | "pty";
+  outcome: ExecProcessOutcome;
+  sessionKey?: string;
+  target: "host" | "sandbox";
+}): void {
+  const exitSignal = normalizeExecExitSignal(params.outcome.exitSignal);
+  emitDiagnosticEvent({
+    type: "exec.process.completed",
+    target: params.target,
+    mode: params.mode,
+    outcome: params.outcome.status,
+    durationMs: params.outcome.durationMs,
+    commandLength: params.command.length,
+    ...(params.sessionKey?.trim() ? { sessionKey: params.sessionKey.trim() } : {}),
+    ...(typeof params.outcome.exitCode === "number" ? { exitCode: params.outcome.exitCode } : {}),
+    ...(exitSignal ? { exitSignal } : {}),
+    ...(params.outcome.status === "failed"
+      ? {
+          timedOut: params.outcome.timedOut,
+          failureKind: params.outcome.failureKind,
+        }
+      : {}),
+  });
+}
+
 export function renderExecHostLabel(host: ExecHost) {
  return host === "sandbox" ? "sandbox" : host === "gateway" ? "gateway" : "node";
 }
@@ -523,6 +558,7 @@ export async function runExecProcess(opts: {
  const startedAt = Date.now();
  const sessionId = createSessionSlug();
  const execCommand = opts.execCommand ?? opts.command;
+  const diagnosticTarget = opts.sandbox ? "sandbox" : "host";
  const supervisor = getProcessSupervisor();
  const shellRuntimeEnv: Record<string, string> = {
    ...opts.env,
@@ -759,11 +795,33 @@ export async function runExecProcess(opts: {
      } catch (retryErr) {
        markExited(session, null, null, "failed");
        maybeNotifyOnExit(session, "failed");
+        emitExecProcessCompleted({
+          command: opts.command,
+          mode: "child",
+          outcome: buildExecRuntimeErrorOutcome({
+            error: retryErr,
+            aggregated: session.aggregated.trim(),
+            durationMs: Date.now() - startedAt,
+          }),
+          sessionKey: opts.sessionKey,
+          target: diagnosticTarget,
+        });
        throw retryErr;
      }
    } else {
      markExited(session, null, null, "failed");
      maybeNotifyOnExit(session, "failed");
+      emitExecProcessCompleted({
+        command: opts.command,
+        mode: spawnSpec.mode,
+        outcome: buildExecRuntimeErrorOutcome({
+          error: err,
+          aggregated: session.aggregated.trim(),
+          durationMs: Date.now() - startedAt,
+        }),
+        sessionKey: opts.sessionKey,
+        target: diagnosticTarget,
+      });
      throw err;
    }
  }
@@ -799,17 +857,32 @@ export async function runExecProcess(opts: {
          token: sandboxFinalizeToken,
        });
      }
+      emitExecProcessCompleted({
+        command: opts.command,
+        mode: usingPty ? "pty" : "child",
+        outcome,
+        sessionKey: opts.sessionKey,
+        target: diagnosticTarget,
+      });
      return outcome;
    })
    .catch((err): ExecProcessOutcome => {
      updatesDisabled = true;
      markExited(session, null, null, "failed");
      maybeNotifyOnExit(session, "failed");
-      return buildExecRuntimeErrorOutcome({
+      const outcome = buildExecRuntimeErrorOutcome({
        error: err,
        aggregated: session.aggregated.trim(),
        durationMs: Date.now() - startedAt,
      });
+      emitExecProcessCompleted({
+        command: opts.command,
+        mode: usingPty ? "pty" : "child",
+        outcome,
+        sessionKey: opts.sessionKey,
+        target: diagnosticTarget,
+      });
+      return outcome;
    });

  return {
--- a/src/infra/diagnostic-events.ts
+++ b/src/infra/diagnostic-events.ts
@@ -185,6 +185,27 @@ export type DiagnosticToolExecutionErrorEvent = DiagnosticToolExecutionBaseEvent
  errorCode?: string;
 };

+export type DiagnosticExecProcessCompletedEvent = DiagnosticBaseEvent & {
+  type: "exec.process.completed";
+  sessionKey?: string;
+  target: "host" | "sandbox";
+  mode: "child" | "pty";
+  outcome: "completed" | "failed";
+  durationMs: number;
+  commandLength: number;
+  exitCode?: number;
+  exitSignal?: string;
+  timedOut?: boolean;
+  failureKind?:
+    | "shell-command-not-found"
+    | "shell-not-executable"
+    | "overall-timeout"
+    | "no-output-timeout"
+    | "signal"
+    | "aborted"
+    | "runtime-error";
+};
+
 type DiagnosticRunBaseEvent = DiagnosticBaseEvent & {
  runId: string;
  sessionKey?: string;
@@ -299,6 +320,7 @@ export type DiagnosticEventPayload =
  | DiagnosticToolExecutionStartedEvent
  | DiagnosticToolExecutionCompletedEvent
  | DiagnosticToolExecutionErrorEvent
+  | DiagnosticExecProcessCompletedEvent
  | DiagnosticRunStartedEvent
  | DiagnosticRunCompletedEvent
  | DiagnosticModelCallStartedEvent
@@ -329,6 +351,7 @@ const ASYNC_DIAGNOSTIC_EVENT_TYPES = new Set<DiagnosticEventPayload["type"]>([
  "tool.execution.started",
  "tool.execution.completed",
  "tool.execution.error",
+  "exec.process.completed",
  "model.call.started",
  "model.call.completed",
  "model.call.error",
--- a/src/logging/diagnostic-stability.ts
+++ b/src/logging/diagnostic-stability.ts
@@ -17,10 +17,12 @@ export type DiagnosticStabilityEventRecord = {
  channel?: string;
  pluginId?: string;
  source?: string;
+  target?: string;
  surface?: string;
  action?: string;
  reason?: string;
  outcome?: string;
+  mode?: string;
  level?: string;
  detector?: string;
  toolName?: string;
@@ -28,6 +30,9 @@ export type DiagnosticStabilityEventRecord = {
  provider?: string;
  model?: string;
  durationMs?: number;
+  commandLength?: number;
+  exitCode?: number;
+  timedOut?: boolean;
  costUsd?: number;
  count?: number;
  bytes?: number;
@@ -247,6 +252,16 @@ function sanitizeDiagnosticEvent(event: DiagnosticEventPayload): DiagnosticStabi
      record.durationMs = event.durationMs;
      assignReasonCode(record, event.errorCategory);
      break;
+    case "exec.process.completed":
+      record.target = event.target;
+      record.mode = event.mode;
+      record.outcome = event.outcome;
+      record.durationMs = event.durationMs;
+      record.commandLength = event.commandLength;
+      record.exitCode = event.exitCode;
+      record.timedOut = event.timedOut;
+      assignReasonCode(record, event.failureKind);
+      break;
    case "run.started":
      record.provider = event.provider;
      record.model = event.model;