From cead2ea4b10688250068bbbe41cb5da19ffcc0b6 Mon Sep 17 00:00:00 2001
From: Vincent Koc <vincentkoc@ieee.org>
Date: Fri, 24 Apr 2026 01:16:13 -0700
Subject: [PATCH] feat(diagnostics): emit tool execution events

Emit structured diagnostic events for tool execution lifecycle, with trace context, safe parameter summaries, and non-message error metadata.
---
 CHANGELOG.md                                  |   1 +
 .../.generated/plugin-sdk-api-baseline.sha256 |   4 +-
 extensions/diagnostics-otel/src/service.ts    |   3 +
 .../pi-tools.before-tool-call.e2e.test.ts     | 126 ++++++++++++++++++
 src/agents/pi-tools.before-tool-call.ts       |  83 ++++++++++++
 src/infra/diagnostic-events.ts                |  34 +++++
 src/logging/diagnostic-stability.ts           |  12 ++
 7 files changed, 261 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5e71f7e8657..434e8791ea8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -13,6 +13,7 @@ Docs: https://docs.openclaw.ai
 - Diagnostics/OTEL: attach diagnostic trace context to exported OTEL logs so log records can correlate with future spans without adding retained process state. Thanks @vincentkoc.
 - Diagnostics/OTEL: pass immutable per-run diagnostic trace context through agent and tool hook contexts, and parent exported diagnostic spans from validated context without retaining global trace state. Thanks @vincentkoc.
 - Diagnostics/OTEL: make exporter startup restart-safe so config reloads do not retain stale SDKs, log transports, or diagnostic event listeners. Thanks @vincentkoc.
+- Diagnostics: emit structured tool execution diagnostic events with trace context, timing, and redacted error metadata. Thanks @vincentkoc.
 - Control UI/chat: add a Steer action on queued messages so a browser follow-up can be injected into the active run without retyping it.
 - Control UI/Talk: add browser WebRTC realtime voice sessions backed by OpenAI Realtime, with Gateway-minted ephemeral client secrets and `openclaw_agent_consult` handoff to the full OpenClaw agent.
 - Agents/tools: add optional per-call `timeoutMs` support for image, video, music, and TTS generation tools so agents can extend provider request timeouts only when a specific generation needs it.
diff --git a/docs/.generated/plugin-sdk-api-baseline.sha256 b/docs/.generated/plugin-sdk-api-baseline.sha256
index ed46e206784..6d1cc133f51 100644
--- a/docs/.generated/plugin-sdk-api-baseline.sha256
+++ b/docs/.generated/plugin-sdk-api-baseline.sha256
@@ -1,2 +1,2 @@
-c0f788d1895ced2ffdad9f82e6afc592171e6651c61c0fc5083f0040437cda6d  plugin-sdk-api-baseline.json
-70e320157331080b98f9c2acae58e89ad1dc70b48adad265225a7eb76b6ac29f  plugin-sdk-api-baseline.jsonl
+c57d43f93ec2930b099dd5c5777f201f1bdd1ab432eeb4049b6e62ff23fe8112  plugin-sdk-api-baseline.json
+ece1ea689914c4070b587551e86c6bed6598feba90457ab489222e168b2d9298  plugin-sdk-api-baseline.jsonl
diff --git a/extensions/diagnostics-otel/src/service.ts b/extensions/diagnostics-otel/src/service.ts
index acef6fdd02a..c61f7dcc424 100644
--- a/extensions/diagnostics-otel/src/service.ts
+++ b/extensions/diagnostics-otel/src/service.ts
@@ -812,6 +812,9 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
               recordHeartbeat(evt);
               return;
             case "tool.loop":
+            case "tool.execution.started":
+            case "tool.execution.completed":
+            case "tool.execution.error":
             case "diagnostic.memory.sample":
             case "diagnostic.memory.pressure":
             case "payload.large":
diff --git a/src/agents/pi-tools.before-tool-call.e2e.test.ts b/src/agents/pi-tools.before-tool-call.e2e.test.ts
index da5a2696668..bed1266fcd5 100644
--- a/src/agents/pi-tools.before-tool-call.e2e.test.ts
+++ b/src/agents/pi-tools.before-tool-call.e2e.test.ts
@@ -2,6 +2,7 @@ import { beforeEach, describe, expect, it, vi } from "vitest";
 import {
   onDiagnosticEvent,
   resetDiagnosticEventsForTest,
+  type DiagnosticEventPayload,
   type DiagnosticToolLoopEvent,
 } from "../infra/diagnostic-events.js";
 import { resetDiagnosticSessionStateForTest } from "../logging/diagnostic-session-state.js";
@@ -85,6 +86,22 @@ describe("before_tool_call loop detection behavior", () => {
     }
   }
 
+  async function withToolExecutionEvents(
+    run: (emitted: DiagnosticEventPayload[]) => Promise<void>,
+  ) {
+    const emitted: DiagnosticEventPayload[] = [];
+    const stop = onDiagnosticEvent((evt) => {
+      if (evt.type.startsWith("tool.execution.")) {
+        emitted.push(evt);
+      }
+    });
+    try {
+      await run(emitted);
+    } finally {
+      stop();
+    }
+  }
+
   function createPingPongTools(options?: { withProgress?: boolean }) {
     const readExecute = options?.withProgress
       ? vi.fn().mockImplementation(async (toolCallId: string) => ({
@@ -331,6 +348,115 @@ describe("before_tool_call loop detection behavior", () => {
       });
     });
   });
+
+  it("emits diagnostic tool execution events without parameter values", async () => {
+    const trace = {
+      traceId: "4bf92f3577b34da6a3ce929d0e0e4736",
+      spanId: "00f067aa0ba902b7",
+      traceFlags: "01",
+    };
+    const execute = vi.fn().mockResolvedValue({
+      content: [{ type: "text", text: "ok" }],
+    });
+    const tool = wrapToolWithBeforeToolCallHook({ name: "bash", execute } as any, {
+      agentId: "main",
+      sessionKey: "session-key",
+      sessionId: "session-id",
+      runId: "run-1",
+      trace,
+      loopDetection: { enabled: false },
+    });
+
+    await withToolExecutionEvents(async (emitted) => {
+      await tool.execute(
+        "tool-call-1",
+        { command: "pwd", token: "sk-1234567890abcdef1234567890abcdef" },
+        undefined,
+        undefined,
+      );
+
+      expect(emitted.map((evt) => evt.type)).toEqual([
+        "tool.execution.started",
+        "tool.execution.completed",
+      ]);
+      expect(emitted[0]).toMatchObject({
+        type: "tool.execution.started",
+        runId: "run-1",
+        sessionKey: "session-key",
+        sessionId: "session-id",
+        toolName: "exec",
+        toolCallId: "tool-call-1",
+        paramsSummary: {
+          kind: "object",
+        },
+        trace,
+      });
+      expect(emitted[0]?.trace).not.toBe(trace);
+      expect(Object.isFrozen(emitted[0]?.trace)).toBe(true);
+      expect(emitted[1]).toMatchObject({
+        type: "tool.execution.completed",
+        durationMs: expect.any(Number),
+      });
+      expect(JSON.stringify(emitted)).not.toContain("sk-1234567890abcdef1234567890abcdef");
+      expect(JSON.stringify(emitted)).not.toContain("pwd");
+    });
+  });
+
+  it("emits diagnostic tool execution error events with redacted errors", async () => {
+    const execute = vi
+      .fn()
+      .mockRejectedValue(new Error("failed with key sk-1234567890abcdef1234567890abcdef"));
+    const tool = wrapToolWithBeforeToolCallHook({ name: "read", execute } as any, {
+      agentId: "main",
+      sessionKey: "session-key",
+      loopDetection: { enabled: false },
+    });
+
+    await withToolExecutionEvents(async (emitted) => {
+      await expect(
+        tool.execute("tool-call-error", { path: "/tmp/file" }, undefined, undefined),
+      ).rejects.toThrow("failed with key");
+
+      expect(emitted.map((evt) => evt.type)).toEqual([
+        "tool.execution.started",
+        "tool.execution.error",
+      ]);
+      expect(emitted[1]).toMatchObject({
+        type: "tool.execution.error",
+        toolName: "read",
+        toolCallId: "tool-call-error",
+        durationMs: expect.any(Number),
+        errorCategory: "Error",
+      });
+      expect(JSON.stringify(emitted[1])).not.toContain("sk-1234567890abcdef1234567890abcdef");
+    });
+  });
+
+  it("summarizes hostile object params without enumerating keys", async () => {
+    const execute = vi.fn().mockResolvedValue({ content: [{ type: "text", text: "ok" }] });
+    const tool = wrapToolWithBeforeToolCallHook({ name: "bash", execute } as any, {
+      agentId: "main",
+      sessionKey: "session-key",
+      loopDetection: { enabled: false },
+    });
+    const params = new Proxy(
+      {},
+      {
+        ownKeys() {
+          throw new Error("should not enumerate params");
+        },
+      },
+    );
+
+    await withToolExecutionEvents(async (emitted) => {
+      await tool.execute("tool-call-proxy", params, undefined, undefined);
+
+      expect(emitted[0]).toMatchObject({
+        type: "tool.execution.started",
+        paramsSummary: { kind: "object" },
+      });
+    });
+  });
 });
 
 describe("before_tool_call requireApproval handling", () => {
diff --git a/src/agents/pi-tools.before-tool-call.ts b/src/agents/pi-tools.before-tool-call.ts
index 8dac3bb50e6..34b128c4d61 100644
--- a/src/agents/pi-tools.before-tool-call.ts
+++ b/src/agents/pi-tools.before-tool-call.ts
@@ -1,4 +1,8 @@
 import type { ToolLoopDetectionConfig } from "../config/types.tools.js";
+import {
+  emitDiagnosticEvent,
+  type DiagnosticToolParamsSummary,
+} from "../infra/diagnostic-events.js";
 import {
   freezeDiagnosticTraceContext,
   type DiagnosticTraceContext,
@@ -81,6 +85,57 @@ function unwrapErrorCause(err: unknown): unknown {
   return err;
 }
 
+function summarizeToolParams(params: unknown): DiagnosticToolParamsSummary {
+  if (params === null) {
+    return { kind: "null" };
+  }
+  if (params === undefined) {
+    return { kind: "undefined" };
+  }
+  if (Array.isArray(params)) {
+    return { kind: "array", length: params.length };
+  }
+  if (typeof params === "object") {
+    return { kind: "object" };
+  }
+  if (typeof params === "string") {
+    return { kind: "string", length: params.length };
+  }
+  if (typeof params === "number") {
+    return { kind: "number" };
+  }
+  if (typeof params === "boolean") {
+    return { kind: "boolean" };
+  }
+  return { kind: "other" };
+}
+
+function errorCategory(err: unknown): string {
+  if (err instanceof Error && err.name.trim()) {
+    return err.name;
+  }
+  return typeof err;
+}
+
+function diagnosticErrorCode(err: unknown): string | undefined {
+  if (!err || typeof err !== "object") {
+    return undefined;
+  }
+  const candidate = err as { code?: unknown; status?: unknown; statusCode?: unknown };
+  const code = candidate.code ?? candidate.status ?? candidate.statusCode;
+  if (typeof code === "number" && Number.isFinite(code)) {
+    return String(code);
+  }
+  if (typeof code !== "string") {
+    return undefined;
+  }
+  const trimmed = code.trim();
+  if (!trimmed) {
+    return undefined;
+  }
+  return trimmed.slice(0, 64);
+}
+
 function shouldEmitLoopWarning(state: SessionState, warningKey: string, count: number): boolean {
   if (!state.toolLoopWarningBuckets) {
     state.toolLoopWarningBuckets = new Map();
@@ -415,8 +470,27 @@ export function wrapToolWithBeforeToolCallHook(
         }
       }
       const normalizedToolName = normalizeToolName(toolName || "tool");
+      const eventBase = {
+        ...(ctx?.runId && { runId: ctx.runId }),
+        ...(ctx?.sessionKey && { sessionKey: ctx.sessionKey }),
+        ...(ctx?.sessionId && { sessionId: ctx.sessionId }),
+        ...(ctx?.trace && { trace: freezeDiagnosticTraceContext(ctx.trace) }),
+        toolName: normalizedToolName,
+        ...(toolCallId && { toolCallId }),
+        paramsSummary: summarizeToolParams(outcome.params),
+      };
+      emitDiagnosticEvent({
+        type: "tool.execution.started",
+        ...eventBase,
+      });
+      const startedAt = Date.now();
       try {
         const result = await execute(toolCallId, outcome.params, signal, onUpdate);
+        emitDiagnosticEvent({
+          type: "tool.execution.completed",
+          ...eventBase,
+          durationMs: Date.now() - startedAt,
+        });
         await recordLoopOutcome({
           ctx,
           toolName: normalizedToolName,
@@ -426,6 +500,15 @@ export function wrapToolWithBeforeToolCallHook(
         });
         return result;
       } catch (err) {
+        const cause = unwrapErrorCause(err);
+        const errorCode = diagnosticErrorCode(cause);
+        emitDiagnosticEvent({
+          type: "tool.execution.error",
+          ...eventBase,
+          durationMs: Date.now() - startedAt,
+          errorCategory: errorCategory(cause),
+          ...(errorCode ? { errorCode } : {}),
+        });
         await recordLoopOutcome({
           ctx,
           toolName: normalizedToolName,
diff --git a/src/infra/diagnostic-events.ts b/src/infra/diagnostic-events.ts
index 77ba1b7f51b..c046935c6a9 100644
--- a/src/infra/diagnostic-events.ts
+++ b/src/infra/diagnostic-events.ts
@@ -154,6 +154,37 @@ export type DiagnosticToolLoopEvent = DiagnosticBaseEvent & {
   pairedToolName?: string;
 };
 
+export type DiagnosticToolParamsSummary =
+  | { kind: "object" }
+  | { kind: "array"; length: number }
+  | { kind: "string"; length: number }
+  | { kind: "number" | "boolean" | "null" | "undefined" | "other" };
+
+type DiagnosticToolExecutionBaseEvent = DiagnosticBaseEvent & {
+  runId?: string;
+  sessionKey?: string;
+  sessionId?: string;
+  toolName: string;
+  toolCallId?: string;
+  paramsSummary?: DiagnosticToolParamsSummary;
+};
+
+export type DiagnosticToolExecutionStartedEvent = DiagnosticToolExecutionBaseEvent & {
+  type: "tool.execution.started";
+};
+
+export type DiagnosticToolExecutionCompletedEvent = DiagnosticToolExecutionBaseEvent & {
+  type: "tool.execution.completed";
+  durationMs: number;
+};
+
+export type DiagnosticToolExecutionErrorEvent = DiagnosticToolExecutionBaseEvent & {
+  type: "tool.execution.error";
+  durationMs: number;
+  errorCategory: string;
+  errorCode?: string;
+};
+
 export type DiagnosticMemoryUsage = {
   rssBytes: number;
   heapTotalBytes: number;
@@ -204,6 +235,9 @@ export type DiagnosticEventPayload =
   | DiagnosticRunAttemptEvent
   | DiagnosticHeartbeatEvent
   | DiagnosticToolLoopEvent
+  | DiagnosticToolExecutionStartedEvent
+  | DiagnosticToolExecutionCompletedEvent
+  | DiagnosticToolExecutionErrorEvent
   | DiagnosticMemorySampleEvent
   | DiagnosticMemoryPressureEvent
   | DiagnosticPayloadLargeEvent;
diff --git a/src/logging/diagnostic-stability.ts b/src/logging/diagnostic-stability.ts
index b0a1ca59fad..959819a00ec 100644
--- a/src/logging/diagnostic-stability.ts
+++ b/src/logging/diagnostic-stability.ts
@@ -235,6 +235,18 @@ function sanitizeDiagnosticEvent(event: DiagnosticEventPayload): DiagnosticStabi
       record.count = event.count;
       record.pairedToolName = event.pairedToolName;
       break;
+    case "tool.execution.started":
+      record.toolName = event.toolName;
+      break;
+    case "tool.execution.completed":
+      record.toolName = event.toolName;
+      record.durationMs = event.durationMs;
+      break;
+    case "tool.execution.error":
+      record.toolName = event.toolName;
+      record.durationMs = event.durationMs;
+      record.reason = event.errorCategory;
+      break;
     case "diagnostic.memory.sample":
       record.memory = copyMemory(event.memory);
       break;