mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 08:10:44 +00:00
feat(diagnostics): emit tool execution events
Emit structured diagnostic events for tool execution lifecycle, with trace context, safe parameter summaries, and non-message error metadata.
This commit is contained in:
@@ -13,6 +13,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Diagnostics/OTEL: attach diagnostic trace context to exported OTEL logs so log records can correlate with future spans without adding retained process state. Thanks @vincentkoc.
|
||||
- Diagnostics/OTEL: pass immutable per-run diagnostic trace context through agent and tool hook contexts, and parent exported diagnostic spans from validated context without retaining global trace state. Thanks @vincentkoc.
|
||||
- Diagnostics/OTEL: make exporter startup restart-safe so config reloads do not retain stale SDKs, log transports, or diagnostic event listeners. Thanks @vincentkoc.
|
||||
- Diagnostics: emit structured tool execution diagnostic events with trace context, timing, and redacted error metadata. Thanks @vincentkoc.
|
||||
- Control UI/chat: add a Steer action on queued messages so a browser follow-up can be injected into the active run without retyping it.
|
||||
- Control UI/Talk: add browser WebRTC realtime voice sessions backed by OpenAI Realtime, with Gateway-minted ephemeral client secrets and `openclaw_agent_consult` handoff to the full OpenClaw agent.
|
||||
- Agents/tools: add optional per-call `timeoutMs` support for image, video, music, and TTS generation tools so agents can extend provider request timeouts only when a specific generation needs it.
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
c0f788d1895ced2ffdad9f82e6afc592171e6651c61c0fc5083f0040437cda6d plugin-sdk-api-baseline.json
|
||||
70e320157331080b98f9c2acae58e89ad1dc70b48adad265225a7eb76b6ac29f plugin-sdk-api-baseline.jsonl
|
||||
c57d43f93ec2930b099dd5c5777f201f1bdd1ab432eeb4049b6e62ff23fe8112 plugin-sdk-api-baseline.json
|
||||
ece1ea689914c4070b587551e86c6bed6598feba90457ab489222e168b2d9298 plugin-sdk-api-baseline.jsonl
|
||||
|
||||
@@ -812,6 +812,9 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
|
||||
recordHeartbeat(evt);
|
||||
return;
|
||||
case "tool.loop":
|
||||
case "tool.execution.started":
|
||||
case "tool.execution.completed":
|
||||
case "tool.execution.error":
|
||||
case "diagnostic.memory.sample":
|
||||
case "diagnostic.memory.pressure":
|
||||
case "payload.large":
|
||||
|
||||
@@ -2,6 +2,7 @@ import { beforeEach, describe, expect, it, vi } from "vitest";
|
||||
import {
|
||||
onDiagnosticEvent,
|
||||
resetDiagnosticEventsForTest,
|
||||
type DiagnosticEventPayload,
|
||||
type DiagnosticToolLoopEvent,
|
||||
} from "../infra/diagnostic-events.js";
|
||||
import { resetDiagnosticSessionStateForTest } from "../logging/diagnostic-session-state.js";
|
||||
@@ -85,6 +86,22 @@ describe("before_tool_call loop detection behavior", () => {
|
||||
}
|
||||
}
|
||||
|
||||
async function withToolExecutionEvents(
|
||||
run: (emitted: DiagnosticEventPayload[]) => Promise<void>,
|
||||
) {
|
||||
const emitted: DiagnosticEventPayload[] = [];
|
||||
const stop = onDiagnosticEvent((evt) => {
|
||||
if (evt.type.startsWith("tool.execution.")) {
|
||||
emitted.push(evt);
|
||||
}
|
||||
});
|
||||
try {
|
||||
await run(emitted);
|
||||
} finally {
|
||||
stop();
|
||||
}
|
||||
}
|
||||
|
||||
function createPingPongTools(options?: { withProgress?: boolean }) {
|
||||
const readExecute = options?.withProgress
|
||||
? vi.fn().mockImplementation(async (toolCallId: string) => ({
|
||||
@@ -331,6 +348,115 @@ describe("before_tool_call loop detection behavior", () => {
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
it("emits diagnostic tool execution events without parameter values", async () => {
|
||||
const trace = {
|
||||
traceId: "4bf92f3577b34da6a3ce929d0e0e4736",
|
||||
spanId: "00f067aa0ba902b7",
|
||||
traceFlags: "01",
|
||||
};
|
||||
const execute = vi.fn().mockResolvedValue({
|
||||
content: [{ type: "text", text: "ok" }],
|
||||
});
|
||||
const tool = wrapToolWithBeforeToolCallHook({ name: "bash", execute } as any, {
|
||||
agentId: "main",
|
||||
sessionKey: "session-key",
|
||||
sessionId: "session-id",
|
||||
runId: "run-1",
|
||||
trace,
|
||||
loopDetection: { enabled: false },
|
||||
});
|
||||
|
||||
await withToolExecutionEvents(async (emitted) => {
|
||||
await tool.execute(
|
||||
"tool-call-1",
|
||||
{ command: "pwd", token: "sk-1234567890abcdef1234567890abcdef" },
|
||||
undefined,
|
||||
undefined,
|
||||
);
|
||||
|
||||
expect(emitted.map((evt) => evt.type)).toEqual([
|
||||
"tool.execution.started",
|
||||
"tool.execution.completed",
|
||||
]);
|
||||
expect(emitted[0]).toMatchObject({
|
||||
type: "tool.execution.started",
|
||||
runId: "run-1",
|
||||
sessionKey: "session-key",
|
||||
sessionId: "session-id",
|
||||
toolName: "exec",
|
||||
toolCallId: "tool-call-1",
|
||||
paramsSummary: {
|
||||
kind: "object",
|
||||
},
|
||||
trace,
|
||||
});
|
||||
expect(emitted[0]?.trace).not.toBe(trace);
|
||||
expect(Object.isFrozen(emitted[0]?.trace)).toBe(true);
|
||||
expect(emitted[1]).toMatchObject({
|
||||
type: "tool.execution.completed",
|
||||
durationMs: expect.any(Number),
|
||||
});
|
||||
expect(JSON.stringify(emitted)).not.toContain("sk-1234567890abcdef1234567890abcdef");
|
||||
expect(JSON.stringify(emitted)).not.toContain("pwd");
|
||||
});
|
||||
});
|
||||
|
||||
it("emits diagnostic tool execution error events with redacted errors", async () => {
|
||||
const execute = vi
|
||||
.fn()
|
||||
.mockRejectedValue(new Error("failed with key sk-1234567890abcdef1234567890abcdef"));
|
||||
const tool = wrapToolWithBeforeToolCallHook({ name: "read", execute } as any, {
|
||||
agentId: "main",
|
||||
sessionKey: "session-key",
|
||||
loopDetection: { enabled: false },
|
||||
});
|
||||
|
||||
await withToolExecutionEvents(async (emitted) => {
|
||||
await expect(
|
||||
tool.execute("tool-call-error", { path: "/tmp/file" }, undefined, undefined),
|
||||
).rejects.toThrow("failed with key");
|
||||
|
||||
expect(emitted.map((evt) => evt.type)).toEqual([
|
||||
"tool.execution.started",
|
||||
"tool.execution.error",
|
||||
]);
|
||||
expect(emitted[1]).toMatchObject({
|
||||
type: "tool.execution.error",
|
||||
toolName: "read",
|
||||
toolCallId: "tool-call-error",
|
||||
durationMs: expect.any(Number),
|
||||
errorCategory: "Error",
|
||||
});
|
||||
expect(JSON.stringify(emitted[1])).not.toContain("sk-1234567890abcdef1234567890abcdef");
|
||||
});
|
||||
});
|
||||
|
||||
it("summarizes hostile object params without enumerating keys", async () => {
|
||||
const execute = vi.fn().mockResolvedValue({ content: [{ type: "text", text: "ok" }] });
|
||||
const tool = wrapToolWithBeforeToolCallHook({ name: "bash", execute } as any, {
|
||||
agentId: "main",
|
||||
sessionKey: "session-key",
|
||||
loopDetection: { enabled: false },
|
||||
});
|
||||
const params = new Proxy(
|
||||
{},
|
||||
{
|
||||
ownKeys() {
|
||||
throw new Error("should not enumerate params");
|
||||
},
|
||||
},
|
||||
);
|
||||
|
||||
await withToolExecutionEvents(async (emitted) => {
|
||||
await tool.execute("tool-call-proxy", params, undefined, undefined);
|
||||
|
||||
expect(emitted[0]).toMatchObject({
|
||||
type: "tool.execution.started",
|
||||
paramsSummary: { kind: "object" },
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("before_tool_call requireApproval handling", () => {
|
||||
|
||||
@@ -1,4 +1,8 @@
|
||||
import type { ToolLoopDetectionConfig } from "../config/types.tools.js";
|
||||
import {
|
||||
emitDiagnosticEvent,
|
||||
type DiagnosticToolParamsSummary,
|
||||
} from "../infra/diagnostic-events.js";
|
||||
import {
|
||||
freezeDiagnosticTraceContext,
|
||||
type DiagnosticTraceContext,
|
||||
@@ -81,6 +85,57 @@ function unwrapErrorCause(err: unknown): unknown {
|
||||
return err;
|
||||
}
|
||||
|
||||
function summarizeToolParams(params: unknown): DiagnosticToolParamsSummary {
|
||||
if (params === null) {
|
||||
return { kind: "null" };
|
||||
}
|
||||
if (params === undefined) {
|
||||
return { kind: "undefined" };
|
||||
}
|
||||
if (Array.isArray(params)) {
|
||||
return { kind: "array", length: params.length };
|
||||
}
|
||||
if (typeof params === "object") {
|
||||
return { kind: "object" };
|
||||
}
|
||||
if (typeof params === "string") {
|
||||
return { kind: "string", length: params.length };
|
||||
}
|
||||
if (typeof params === "number") {
|
||||
return { kind: "number" };
|
||||
}
|
||||
if (typeof params === "boolean") {
|
||||
return { kind: "boolean" };
|
||||
}
|
||||
return { kind: "other" };
|
||||
}
|
||||
|
||||
function errorCategory(err: unknown): string {
|
||||
if (err instanceof Error && err.name.trim()) {
|
||||
return err.name;
|
||||
}
|
||||
return typeof err;
|
||||
}
|
||||
|
||||
function diagnosticErrorCode(err: unknown): string | undefined {
|
||||
if (!err || typeof err !== "object") {
|
||||
return undefined;
|
||||
}
|
||||
const candidate = err as { code?: unknown; status?: unknown; statusCode?: unknown };
|
||||
const code = candidate.code ?? candidate.status ?? candidate.statusCode;
|
||||
if (typeof code === "number" && Number.isFinite(code)) {
|
||||
return String(code);
|
||||
}
|
||||
if (typeof code !== "string") {
|
||||
return undefined;
|
||||
}
|
||||
const trimmed = code.trim();
|
||||
if (!trimmed) {
|
||||
return undefined;
|
||||
}
|
||||
return trimmed.slice(0, 64);
|
||||
}
|
||||
|
||||
function shouldEmitLoopWarning(state: SessionState, warningKey: string, count: number): boolean {
|
||||
if (!state.toolLoopWarningBuckets) {
|
||||
state.toolLoopWarningBuckets = new Map();
|
||||
@@ -415,8 +470,27 @@ export function wrapToolWithBeforeToolCallHook(
|
||||
}
|
||||
}
|
||||
const normalizedToolName = normalizeToolName(toolName || "tool");
|
||||
const eventBase = {
|
||||
...(ctx?.runId && { runId: ctx.runId }),
|
||||
...(ctx?.sessionKey && { sessionKey: ctx.sessionKey }),
|
||||
...(ctx?.sessionId && { sessionId: ctx.sessionId }),
|
||||
...(ctx?.trace && { trace: freezeDiagnosticTraceContext(ctx.trace) }),
|
||||
toolName: normalizedToolName,
|
||||
...(toolCallId && { toolCallId }),
|
||||
paramsSummary: summarizeToolParams(outcome.params),
|
||||
};
|
||||
emitDiagnosticEvent({
|
||||
type: "tool.execution.started",
|
||||
...eventBase,
|
||||
});
|
||||
const startedAt = Date.now();
|
||||
try {
|
||||
const result = await execute(toolCallId, outcome.params, signal, onUpdate);
|
||||
emitDiagnosticEvent({
|
||||
type: "tool.execution.completed",
|
||||
...eventBase,
|
||||
durationMs: Date.now() - startedAt,
|
||||
});
|
||||
await recordLoopOutcome({
|
||||
ctx,
|
||||
toolName: normalizedToolName,
|
||||
@@ -426,6 +500,15 @@ export function wrapToolWithBeforeToolCallHook(
|
||||
});
|
||||
return result;
|
||||
} catch (err) {
|
||||
const cause = unwrapErrorCause(err);
|
||||
const errorCode = diagnosticErrorCode(cause);
|
||||
emitDiagnosticEvent({
|
||||
type: "tool.execution.error",
|
||||
...eventBase,
|
||||
durationMs: Date.now() - startedAt,
|
||||
errorCategory: errorCategory(cause),
|
||||
...(errorCode ? { errorCode } : {}),
|
||||
});
|
||||
await recordLoopOutcome({
|
||||
ctx,
|
||||
toolName: normalizedToolName,
|
||||
|
||||
@@ -154,6 +154,37 @@ export type DiagnosticToolLoopEvent = DiagnosticBaseEvent & {
|
||||
pairedToolName?: string;
|
||||
};
|
||||
|
||||
export type DiagnosticToolParamsSummary =
|
||||
| { kind: "object" }
|
||||
| { kind: "array"; length: number }
|
||||
| { kind: "string"; length: number }
|
||||
| { kind: "number" | "boolean" | "null" | "undefined" | "other" };
|
||||
|
||||
type DiagnosticToolExecutionBaseEvent = DiagnosticBaseEvent & {
|
||||
runId?: string;
|
||||
sessionKey?: string;
|
||||
sessionId?: string;
|
||||
toolName: string;
|
||||
toolCallId?: string;
|
||||
paramsSummary?: DiagnosticToolParamsSummary;
|
||||
};
|
||||
|
||||
export type DiagnosticToolExecutionStartedEvent = DiagnosticToolExecutionBaseEvent & {
|
||||
type: "tool.execution.started";
|
||||
};
|
||||
|
||||
export type DiagnosticToolExecutionCompletedEvent = DiagnosticToolExecutionBaseEvent & {
|
||||
type: "tool.execution.completed";
|
||||
durationMs: number;
|
||||
};
|
||||
|
||||
export type DiagnosticToolExecutionErrorEvent = DiagnosticToolExecutionBaseEvent & {
|
||||
type: "tool.execution.error";
|
||||
durationMs: number;
|
||||
errorCategory: string;
|
||||
errorCode?: string;
|
||||
};
|
||||
|
||||
export type DiagnosticMemoryUsage = {
|
||||
rssBytes: number;
|
||||
heapTotalBytes: number;
|
||||
@@ -204,6 +235,9 @@ export type DiagnosticEventPayload =
|
||||
| DiagnosticRunAttemptEvent
|
||||
| DiagnosticHeartbeatEvent
|
||||
| DiagnosticToolLoopEvent
|
||||
| DiagnosticToolExecutionStartedEvent
|
||||
| DiagnosticToolExecutionCompletedEvent
|
||||
| DiagnosticToolExecutionErrorEvent
|
||||
| DiagnosticMemorySampleEvent
|
||||
| DiagnosticMemoryPressureEvent
|
||||
| DiagnosticPayloadLargeEvent;
|
||||
|
||||
@@ -235,6 +235,18 @@ function sanitizeDiagnosticEvent(event: DiagnosticEventPayload): DiagnosticStabi
|
||||
record.count = event.count;
|
||||
record.pairedToolName = event.pairedToolName;
|
||||
break;
|
||||
case "tool.execution.started":
|
||||
record.toolName = event.toolName;
|
||||
break;
|
||||
case "tool.execution.completed":
|
||||
record.toolName = event.toolName;
|
||||
record.durationMs = event.durationMs;
|
||||
break;
|
||||
case "tool.execution.error":
|
||||
record.toolName = event.toolName;
|
||||
record.durationMs = event.durationMs;
|
||||
record.reason = event.errorCategory;
|
||||
break;
|
||||
case "diagnostic.memory.sample":
|
||||
record.memory = copyMemory(event.memory);
|
||||
break;
|
||||
|
||||
Reference in New Issue
Block a user