From cead2ea4b10688250068bbbe41cb5da19ffcc0b6 Mon Sep 17 00:00:00 2001 From: Vincent Koc Date: Fri, 24 Apr 2026 01:16:13 -0700 Subject: [PATCH] feat(diagnostics): emit tool execution events Emit structured diagnostic events for tool execution lifecycle, with trace context, safe parameter summaries, and non-message error metadata. --- CHANGELOG.md | 1 + .../.generated/plugin-sdk-api-baseline.sha256 | 4 +- extensions/diagnostics-otel/src/service.ts | 3 + .../pi-tools.before-tool-call.e2e.test.ts | 126 ++++++++++++++++++ src/agents/pi-tools.before-tool-call.ts | 83 ++++++++++++ src/infra/diagnostic-events.ts | 34 +++++ src/logging/diagnostic-stability.ts | 12 ++ 7 files changed, 261 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5e71f7e8657..434e8791ea8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ Docs: https://docs.openclaw.ai - Diagnostics/OTEL: attach diagnostic trace context to exported OTEL logs so log records can correlate with future spans without adding retained process state. Thanks @vincentkoc. - Diagnostics/OTEL: pass immutable per-run diagnostic trace context through agent and tool hook contexts, and parent exported diagnostic spans from validated context without retaining global trace state. Thanks @vincentkoc. - Diagnostics/OTEL: make exporter startup restart-safe so config reloads do not retain stale SDKs, log transports, or diagnostic event listeners. Thanks @vincentkoc. +- Diagnostics: emit structured tool execution diagnostic events with trace context, timing, and redacted error metadata. Thanks @vincentkoc. - Control UI/chat: add a Steer action on queued messages so a browser follow-up can be injected into the active run without retyping it. - Control UI/Talk: add browser WebRTC realtime voice sessions backed by OpenAI Realtime, with Gateway-minted ephemeral client secrets and `openclaw_agent_consult` handoff to the full OpenClaw agent. - Agents/tools: add optional per-call `timeoutMs` support for image, video, music, and TTS generation tools so agents can extend provider request timeouts only when a specific generation needs it. diff --git a/docs/.generated/plugin-sdk-api-baseline.sha256 b/docs/.generated/plugin-sdk-api-baseline.sha256 index ed46e206784..6d1cc133f51 100644 --- a/docs/.generated/plugin-sdk-api-baseline.sha256 +++ b/docs/.generated/plugin-sdk-api-baseline.sha256 @@ -1,2 +1,2 @@ -c0f788d1895ced2ffdad9f82e6afc592171e6651c61c0fc5083f0040437cda6d plugin-sdk-api-baseline.json -70e320157331080b98f9c2acae58e89ad1dc70b48adad265225a7eb76b6ac29f plugin-sdk-api-baseline.jsonl +c57d43f93ec2930b099dd5c5777f201f1bdd1ab432eeb4049b6e62ff23fe8112 plugin-sdk-api-baseline.json +ece1ea689914c4070b587551e86c6bed6598feba90457ab489222e168b2d9298 plugin-sdk-api-baseline.jsonl diff --git a/extensions/diagnostics-otel/src/service.ts b/extensions/diagnostics-otel/src/service.ts index acef6fdd02a..c61f7dcc424 100644 --- a/extensions/diagnostics-otel/src/service.ts +++ b/extensions/diagnostics-otel/src/service.ts @@ -812,6 +812,9 @@ export function createDiagnosticsOtelService(): OpenClawPluginService { recordHeartbeat(evt); return; case "tool.loop": + case "tool.execution.started": + case "tool.execution.completed": + case "tool.execution.error": case "diagnostic.memory.sample": case "diagnostic.memory.pressure": case "payload.large": diff --git a/src/agents/pi-tools.before-tool-call.e2e.test.ts b/src/agents/pi-tools.before-tool-call.e2e.test.ts index da5a2696668..bed1266fcd5 100644 --- a/src/agents/pi-tools.before-tool-call.e2e.test.ts +++ b/src/agents/pi-tools.before-tool-call.e2e.test.ts @@ -2,6 +2,7 @@ import { beforeEach, describe, expect, it, vi } from "vitest"; import { onDiagnosticEvent, resetDiagnosticEventsForTest, + type DiagnosticEventPayload, type DiagnosticToolLoopEvent, } from "../infra/diagnostic-events.js"; import { resetDiagnosticSessionStateForTest } from "../logging/diagnostic-session-state.js"; @@ -85,6 +86,22 @@ describe("before_tool_call loop detection behavior", () => { } } + async function withToolExecutionEvents( + run: (emitted: DiagnosticEventPayload[]) => Promise, + ) { + const emitted: DiagnosticEventPayload[] = []; + const stop = onDiagnosticEvent((evt) => { + if (evt.type.startsWith("tool.execution.")) { + emitted.push(evt); + } + }); + try { + await run(emitted); + } finally { + stop(); + } + } + function createPingPongTools(options?: { withProgress?: boolean }) { const readExecute = options?.withProgress ? vi.fn().mockImplementation(async (toolCallId: string) => ({ @@ -331,6 +348,115 @@ describe("before_tool_call loop detection behavior", () => { }); }); }); + + it("emits diagnostic tool execution events without parameter values", async () => { + const trace = { + traceId: "4bf92f3577b34da6a3ce929d0e0e4736", + spanId: "00f067aa0ba902b7", + traceFlags: "01", + }; + const execute = vi.fn().mockResolvedValue({ + content: [{ type: "text", text: "ok" }], + }); + const tool = wrapToolWithBeforeToolCallHook({ name: "bash", execute } as any, { + agentId: "main", + sessionKey: "session-key", + sessionId: "session-id", + runId: "run-1", + trace, + loopDetection: { enabled: false }, + }); + + await withToolExecutionEvents(async (emitted) => { + await tool.execute( + "tool-call-1", + { command: "pwd", token: "sk-1234567890abcdef1234567890abcdef" }, + undefined, + undefined, + ); + + expect(emitted.map((evt) => evt.type)).toEqual([ + "tool.execution.started", + "tool.execution.completed", + ]); + expect(emitted[0]).toMatchObject({ + type: "tool.execution.started", + runId: "run-1", + sessionKey: "session-key", + sessionId: "session-id", + toolName: "exec", + toolCallId: "tool-call-1", + paramsSummary: { + kind: "object", + }, + trace, + }); + expect(emitted[0]?.trace).not.toBe(trace); + expect(Object.isFrozen(emitted[0]?.trace)).toBe(true); + expect(emitted[1]).toMatchObject({ + type: "tool.execution.completed", + durationMs: expect.any(Number), + }); + expect(JSON.stringify(emitted)).not.toContain("sk-1234567890abcdef1234567890abcdef"); + expect(JSON.stringify(emitted)).not.toContain("pwd"); + }); + }); + + it("emits diagnostic tool execution error events with redacted errors", async () => { + const execute = vi + .fn() + .mockRejectedValue(new Error("failed with key sk-1234567890abcdef1234567890abcdef")); + const tool = wrapToolWithBeforeToolCallHook({ name: "read", execute } as any, { + agentId: "main", + sessionKey: "session-key", + loopDetection: { enabled: false }, + }); + + await withToolExecutionEvents(async (emitted) => { + await expect( + tool.execute("tool-call-error", { path: "/tmp/file" }, undefined, undefined), + ).rejects.toThrow("failed with key"); + + expect(emitted.map((evt) => evt.type)).toEqual([ + "tool.execution.started", + "tool.execution.error", + ]); + expect(emitted[1]).toMatchObject({ + type: "tool.execution.error", + toolName: "read", + toolCallId: "tool-call-error", + durationMs: expect.any(Number), + errorCategory: "Error", + }); + expect(JSON.stringify(emitted[1])).not.toContain("sk-1234567890abcdef1234567890abcdef"); + }); + }); + + it("summarizes hostile object params without enumerating keys", async () => { + const execute = vi.fn().mockResolvedValue({ content: [{ type: "text", text: "ok" }] }); + const tool = wrapToolWithBeforeToolCallHook({ name: "bash", execute } as any, { + agentId: "main", + sessionKey: "session-key", + loopDetection: { enabled: false }, + }); + const params = new Proxy( + {}, + { + ownKeys() { + throw new Error("should not enumerate params"); + }, + }, + ); + + await withToolExecutionEvents(async (emitted) => { + await tool.execute("tool-call-proxy", params, undefined, undefined); + + expect(emitted[0]).toMatchObject({ + type: "tool.execution.started", + paramsSummary: { kind: "object" }, + }); + }); + }); }); describe("before_tool_call requireApproval handling", () => { diff --git a/src/agents/pi-tools.before-tool-call.ts b/src/agents/pi-tools.before-tool-call.ts index 8dac3bb50e6..34b128c4d61 100644 --- a/src/agents/pi-tools.before-tool-call.ts +++ b/src/agents/pi-tools.before-tool-call.ts @@ -1,4 +1,8 @@ import type { ToolLoopDetectionConfig } from "../config/types.tools.js"; +import { + emitDiagnosticEvent, + type DiagnosticToolParamsSummary, +} from "../infra/diagnostic-events.js"; import { freezeDiagnosticTraceContext, type DiagnosticTraceContext, @@ -81,6 +85,57 @@ function unwrapErrorCause(err: unknown): unknown { return err; } +function summarizeToolParams(params: unknown): DiagnosticToolParamsSummary { + if (params === null) { + return { kind: "null" }; + } + if (params === undefined) { + return { kind: "undefined" }; + } + if (Array.isArray(params)) { + return { kind: "array", length: params.length }; + } + if (typeof params === "object") { + return { kind: "object" }; + } + if (typeof params === "string") { + return { kind: "string", length: params.length }; + } + if (typeof params === "number") { + return { kind: "number" }; + } + if (typeof params === "boolean") { + return { kind: "boolean" }; + } + return { kind: "other" }; +} + +function errorCategory(err: unknown): string { + if (err instanceof Error && err.name.trim()) { + return err.name; + } + return typeof err; +} + +function diagnosticErrorCode(err: unknown): string | undefined { + if (!err || typeof err !== "object") { + return undefined; + } + const candidate = err as { code?: unknown; status?: unknown; statusCode?: unknown }; + const code = candidate.code ?? candidate.status ?? candidate.statusCode; + if (typeof code === "number" && Number.isFinite(code)) { + return String(code); + } + if (typeof code !== "string") { + return undefined; + } + const trimmed = code.trim(); + if (!trimmed) { + return undefined; + } + return trimmed.slice(0, 64); +} + function shouldEmitLoopWarning(state: SessionState, warningKey: string, count: number): boolean { if (!state.toolLoopWarningBuckets) { state.toolLoopWarningBuckets = new Map(); @@ -415,8 +470,27 @@ export function wrapToolWithBeforeToolCallHook( } } const normalizedToolName = normalizeToolName(toolName || "tool"); + const eventBase = { + ...(ctx?.runId && { runId: ctx.runId }), + ...(ctx?.sessionKey && { sessionKey: ctx.sessionKey }), + ...(ctx?.sessionId && { sessionId: ctx.sessionId }), + ...(ctx?.trace && { trace: freezeDiagnosticTraceContext(ctx.trace) }), + toolName: normalizedToolName, + ...(toolCallId && { toolCallId }), + paramsSummary: summarizeToolParams(outcome.params), + }; + emitDiagnosticEvent({ + type: "tool.execution.started", + ...eventBase, + }); + const startedAt = Date.now(); try { const result = await execute(toolCallId, outcome.params, signal, onUpdate); + emitDiagnosticEvent({ + type: "tool.execution.completed", + ...eventBase, + durationMs: Date.now() - startedAt, + }); await recordLoopOutcome({ ctx, toolName: normalizedToolName, @@ -426,6 +500,15 @@ export function wrapToolWithBeforeToolCallHook( }); return result; } catch (err) { + const cause = unwrapErrorCause(err); + const errorCode = diagnosticErrorCode(cause); + emitDiagnosticEvent({ + type: "tool.execution.error", + ...eventBase, + durationMs: Date.now() - startedAt, + errorCategory: errorCategory(cause), + ...(errorCode ? { errorCode } : {}), + }); await recordLoopOutcome({ ctx, toolName: normalizedToolName, diff --git a/src/infra/diagnostic-events.ts b/src/infra/diagnostic-events.ts index 77ba1b7f51b..c046935c6a9 100644 --- a/src/infra/diagnostic-events.ts +++ b/src/infra/diagnostic-events.ts @@ -154,6 +154,37 @@ export type DiagnosticToolLoopEvent = DiagnosticBaseEvent & { pairedToolName?: string; }; +export type DiagnosticToolParamsSummary = + | { kind: "object" } + | { kind: "array"; length: number } + | { kind: "string"; length: number } + | { kind: "number" | "boolean" | "null" | "undefined" | "other" }; + +type DiagnosticToolExecutionBaseEvent = DiagnosticBaseEvent & { + runId?: string; + sessionKey?: string; + sessionId?: string; + toolName: string; + toolCallId?: string; + paramsSummary?: DiagnosticToolParamsSummary; +}; + +export type DiagnosticToolExecutionStartedEvent = DiagnosticToolExecutionBaseEvent & { + type: "tool.execution.started"; +}; + +export type DiagnosticToolExecutionCompletedEvent = DiagnosticToolExecutionBaseEvent & { + type: "tool.execution.completed"; + durationMs: number; +}; + +export type DiagnosticToolExecutionErrorEvent = DiagnosticToolExecutionBaseEvent & { + type: "tool.execution.error"; + durationMs: number; + errorCategory: string; + errorCode?: string; +}; + export type DiagnosticMemoryUsage = { rssBytes: number; heapTotalBytes: number; @@ -204,6 +235,9 @@ export type DiagnosticEventPayload = | DiagnosticRunAttemptEvent | DiagnosticHeartbeatEvent | DiagnosticToolLoopEvent + | DiagnosticToolExecutionStartedEvent + | DiagnosticToolExecutionCompletedEvent + | DiagnosticToolExecutionErrorEvent | DiagnosticMemorySampleEvent | DiagnosticMemoryPressureEvent | DiagnosticPayloadLargeEvent; diff --git a/src/logging/diagnostic-stability.ts b/src/logging/diagnostic-stability.ts index b0a1ca59fad..959819a00ec 100644 --- a/src/logging/diagnostic-stability.ts +++ b/src/logging/diagnostic-stability.ts @@ -235,6 +235,18 @@ function sanitizeDiagnosticEvent(event: DiagnosticEventPayload): DiagnosticStabi record.count = event.count; record.pairedToolName = event.pairedToolName; break; + case "tool.execution.started": + record.toolName = event.toolName; + break; + case "tool.execution.completed": + record.toolName = event.toolName; + record.durationMs = event.durationMs; + break; + case "tool.execution.error": + record.toolName = event.toolName; + record.durationMs = event.durationMs; + record.reason = event.errorCategory; + break; case "diagnostic.memory.sample": record.memory = copyMemory(event.memory); break;