feat(diagnostics): emit tool execution events

Emit structured diagnostic events for tool execution lifecycle, with trace context, safe parameter summaries, and non-message error metadata.
This commit is contained in:
Vincent Koc
2026-04-24 01:16:13 -07:00
committed by GitHub
parent 447105a278
commit cead2ea4b1
7 changed files with 261 additions and 2 deletions

View File

@@ -13,6 +13,7 @@ Docs: https://docs.openclaw.ai
- Diagnostics/OTEL: attach diagnostic trace context to exported OTEL logs so log records can correlate with future spans without adding retained process state. Thanks @vincentkoc.
- Diagnostics/OTEL: pass immutable per-run diagnostic trace context through agent and tool hook contexts, and parent exported diagnostic spans from validated context without retaining global trace state. Thanks @vincentkoc.
- Diagnostics/OTEL: make exporter startup restart-safe so config reloads do not retain stale SDKs, log transports, or diagnostic event listeners. Thanks @vincentkoc.
- Diagnostics: emit structured tool execution diagnostic events with trace context, timing, and redacted error metadata. Thanks @vincentkoc.
- Control UI/chat: add a Steer action on queued messages so a browser follow-up can be injected into the active run without retyping it.
- Control UI/Talk: add browser WebRTC realtime voice sessions backed by OpenAI Realtime, with Gateway-minted ephemeral client secrets and `openclaw_agent_consult` handoff to the full OpenClaw agent.
- Agents/tools: add optional per-call `timeoutMs` support for image, video, music, and TTS generation tools so agents can extend provider request timeouts only when a specific generation needs it.

View File

@@ -1,2 +1,2 @@
c0f788d1895ced2ffdad9f82e6afc592171e6651c61c0fc5083f0040437cda6d plugin-sdk-api-baseline.json
70e320157331080b98f9c2acae58e89ad1dc70b48adad265225a7eb76b6ac29f plugin-sdk-api-baseline.jsonl
c57d43f93ec2930b099dd5c5777f201f1bdd1ab432eeb4049b6e62ff23fe8112 plugin-sdk-api-baseline.json
ece1ea689914c4070b587551e86c6bed6598feba90457ab489222e168b2d9298 plugin-sdk-api-baseline.jsonl

View File

@@ -812,6 +812,9 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
recordHeartbeat(evt);
return;
case "tool.loop":
case "tool.execution.started":
case "tool.execution.completed":
case "tool.execution.error":
case "diagnostic.memory.sample":
case "diagnostic.memory.pressure":
case "payload.large":

View File

@@ -2,6 +2,7 @@ import { beforeEach, describe, expect, it, vi } from "vitest";
import {
onDiagnosticEvent,
resetDiagnosticEventsForTest,
type DiagnosticEventPayload,
type DiagnosticToolLoopEvent,
} from "../infra/diagnostic-events.js";
import { resetDiagnosticSessionStateForTest } from "../logging/diagnostic-session-state.js";
@@ -85,6 +86,22 @@ describe("before_tool_call loop detection behavior", () => {
}
}
async function withToolExecutionEvents(
run: (emitted: DiagnosticEventPayload[]) => Promise<void>,
) {
const emitted: DiagnosticEventPayload[] = [];
const stop = onDiagnosticEvent((evt) => {
if (evt.type.startsWith("tool.execution.")) {
emitted.push(evt);
}
});
try {
await run(emitted);
} finally {
stop();
}
}
function createPingPongTools(options?: { withProgress?: boolean }) {
const readExecute = options?.withProgress
? vi.fn().mockImplementation(async (toolCallId: string) => ({
@@ -331,6 +348,115 @@ describe("before_tool_call loop detection behavior", () => {
});
});
});
it("emits diagnostic tool execution events without parameter values", async () => {
const trace = {
traceId: "4bf92f3577b34da6a3ce929d0e0e4736",
spanId: "00f067aa0ba902b7",
traceFlags: "01",
};
const execute = vi.fn().mockResolvedValue({
content: [{ type: "text", text: "ok" }],
});
const tool = wrapToolWithBeforeToolCallHook({ name: "bash", execute } as any, {
agentId: "main",
sessionKey: "session-key",
sessionId: "session-id",
runId: "run-1",
trace,
loopDetection: { enabled: false },
});
await withToolExecutionEvents(async (emitted) => {
await tool.execute(
"tool-call-1",
{ command: "pwd", token: "sk-1234567890abcdef1234567890abcdef" },
undefined,
undefined,
);
expect(emitted.map((evt) => evt.type)).toEqual([
"tool.execution.started",
"tool.execution.completed",
]);
expect(emitted[0]).toMatchObject({
type: "tool.execution.started",
runId: "run-1",
sessionKey: "session-key",
sessionId: "session-id",
toolName: "exec",
toolCallId: "tool-call-1",
paramsSummary: {
kind: "object",
},
trace,
});
expect(emitted[0]?.trace).not.toBe(trace);
expect(Object.isFrozen(emitted[0]?.trace)).toBe(true);
expect(emitted[1]).toMatchObject({
type: "tool.execution.completed",
durationMs: expect.any(Number),
});
expect(JSON.stringify(emitted)).not.toContain("sk-1234567890abcdef1234567890abcdef");
expect(JSON.stringify(emitted)).not.toContain("pwd");
});
});
it("emits diagnostic tool execution error events with redacted errors", async () => {
const execute = vi
.fn()
.mockRejectedValue(new Error("failed with key sk-1234567890abcdef1234567890abcdef"));
const tool = wrapToolWithBeforeToolCallHook({ name: "read", execute } as any, {
agentId: "main",
sessionKey: "session-key",
loopDetection: { enabled: false },
});
await withToolExecutionEvents(async (emitted) => {
await expect(
tool.execute("tool-call-error", { path: "/tmp/file" }, undefined, undefined),
).rejects.toThrow("failed with key");
expect(emitted.map((evt) => evt.type)).toEqual([
"tool.execution.started",
"tool.execution.error",
]);
expect(emitted[1]).toMatchObject({
type: "tool.execution.error",
toolName: "read",
toolCallId: "tool-call-error",
durationMs: expect.any(Number),
errorCategory: "Error",
});
expect(JSON.stringify(emitted[1])).not.toContain("sk-1234567890abcdef1234567890abcdef");
});
});
it("summarizes hostile object params without enumerating keys", async () => {
const execute = vi.fn().mockResolvedValue({ content: [{ type: "text", text: "ok" }] });
const tool = wrapToolWithBeforeToolCallHook({ name: "bash", execute } as any, {
agentId: "main",
sessionKey: "session-key",
loopDetection: { enabled: false },
});
const params = new Proxy(
{},
{
ownKeys() {
throw new Error("should not enumerate params");
},
},
);
await withToolExecutionEvents(async (emitted) => {
await tool.execute("tool-call-proxy", params, undefined, undefined);
expect(emitted[0]).toMatchObject({
type: "tool.execution.started",
paramsSummary: { kind: "object" },
});
});
});
});
describe("before_tool_call requireApproval handling", () => {

View File

@@ -1,4 +1,8 @@
import type { ToolLoopDetectionConfig } from "../config/types.tools.js";
import {
emitDiagnosticEvent,
type DiagnosticToolParamsSummary,
} from "../infra/diagnostic-events.js";
import {
freezeDiagnosticTraceContext,
type DiagnosticTraceContext,
@@ -81,6 +85,57 @@ function unwrapErrorCause(err: unknown): unknown {
return err;
}
function summarizeToolParams(params: unknown): DiagnosticToolParamsSummary {
if (params === null) {
return { kind: "null" };
}
if (params === undefined) {
return { kind: "undefined" };
}
if (Array.isArray(params)) {
return { kind: "array", length: params.length };
}
if (typeof params === "object") {
return { kind: "object" };
}
if (typeof params === "string") {
return { kind: "string", length: params.length };
}
if (typeof params === "number") {
return { kind: "number" };
}
if (typeof params === "boolean") {
return { kind: "boolean" };
}
return { kind: "other" };
}
function errorCategory(err: unknown): string {
if (err instanceof Error && err.name.trim()) {
return err.name;
}
return typeof err;
}
function diagnosticErrorCode(err: unknown): string | undefined {
if (!err || typeof err !== "object") {
return undefined;
}
const candidate = err as { code?: unknown; status?: unknown; statusCode?: unknown };
const code = candidate.code ?? candidate.status ?? candidate.statusCode;
if (typeof code === "number" && Number.isFinite(code)) {
return String(code);
}
if (typeof code !== "string") {
return undefined;
}
const trimmed = code.trim();
if (!trimmed) {
return undefined;
}
return trimmed.slice(0, 64);
}
function shouldEmitLoopWarning(state: SessionState, warningKey: string, count: number): boolean {
if (!state.toolLoopWarningBuckets) {
state.toolLoopWarningBuckets = new Map();
@@ -415,8 +470,27 @@ export function wrapToolWithBeforeToolCallHook(
}
}
const normalizedToolName = normalizeToolName(toolName || "tool");
const eventBase = {
...(ctx?.runId && { runId: ctx.runId }),
...(ctx?.sessionKey && { sessionKey: ctx.sessionKey }),
...(ctx?.sessionId && { sessionId: ctx.sessionId }),
...(ctx?.trace && { trace: freezeDiagnosticTraceContext(ctx.trace) }),
toolName: normalizedToolName,
...(toolCallId && { toolCallId }),
paramsSummary: summarizeToolParams(outcome.params),
};
emitDiagnosticEvent({
type: "tool.execution.started",
...eventBase,
});
const startedAt = Date.now();
try {
const result = await execute(toolCallId, outcome.params, signal, onUpdate);
emitDiagnosticEvent({
type: "tool.execution.completed",
...eventBase,
durationMs: Date.now() - startedAt,
});
await recordLoopOutcome({
ctx,
toolName: normalizedToolName,
@@ -426,6 +500,15 @@ export function wrapToolWithBeforeToolCallHook(
});
return result;
} catch (err) {
const cause = unwrapErrorCause(err);
const errorCode = diagnosticErrorCode(cause);
emitDiagnosticEvent({
type: "tool.execution.error",
...eventBase,
durationMs: Date.now() - startedAt,
errorCategory: errorCategory(cause),
...(errorCode ? { errorCode } : {}),
});
await recordLoopOutcome({
ctx,
toolName: normalizedToolName,

View File

@@ -154,6 +154,37 @@ export type DiagnosticToolLoopEvent = DiagnosticBaseEvent & {
pairedToolName?: string;
};
export type DiagnosticToolParamsSummary =
| { kind: "object" }
| { kind: "array"; length: number }
| { kind: "string"; length: number }
| { kind: "number" | "boolean" | "null" | "undefined" | "other" };
type DiagnosticToolExecutionBaseEvent = DiagnosticBaseEvent & {
runId?: string;
sessionKey?: string;
sessionId?: string;
toolName: string;
toolCallId?: string;
paramsSummary?: DiagnosticToolParamsSummary;
};
export type DiagnosticToolExecutionStartedEvent = DiagnosticToolExecutionBaseEvent & {
type: "tool.execution.started";
};
export type DiagnosticToolExecutionCompletedEvent = DiagnosticToolExecutionBaseEvent & {
type: "tool.execution.completed";
durationMs: number;
};
export type DiagnosticToolExecutionErrorEvent = DiagnosticToolExecutionBaseEvent & {
type: "tool.execution.error";
durationMs: number;
errorCategory: string;
errorCode?: string;
};
export type DiagnosticMemoryUsage = {
rssBytes: number;
heapTotalBytes: number;
@@ -204,6 +235,9 @@ export type DiagnosticEventPayload =
| DiagnosticRunAttemptEvent
| DiagnosticHeartbeatEvent
| DiagnosticToolLoopEvent
| DiagnosticToolExecutionStartedEvent
| DiagnosticToolExecutionCompletedEvent
| DiagnosticToolExecutionErrorEvent
| DiagnosticMemorySampleEvent
| DiagnosticMemoryPressureEvent
| DiagnosticPayloadLargeEvent;

View File

@@ -235,6 +235,18 @@ function sanitizeDiagnosticEvent(event: DiagnosticEventPayload): DiagnosticStabi
record.count = event.count;
record.pairedToolName = event.pairedToolName;
break;
case "tool.execution.started":
record.toolName = event.toolName;
break;
case "tool.execution.completed":
record.toolName = event.toolName;
record.durationMs = event.durationMs;
break;
case "tool.execution.error":
record.toolName = event.toolName;
record.durationMs = event.durationMs;
record.reason = event.errorCategory;
break;
case "diagnostic.memory.sample":
record.memory = copyMemory(event.memory);
break;