From d6ef1fcf24eea3a5985d653a3baf400e32aa1b75 Mon Sep 17 00:00:00 2001 From: Vincent Koc Date: Sat, 25 Apr 2026 11:11:42 -0700 Subject: [PATCH] feat(diagnostics-otel): export tool loop events --- CHANGELOG.md | 1 + .../diagnostics-otel/src/service.test.ts | 50 +++++++++++++++++++ extensions/diagnostics-otel/src/service.ts | 35 +++++++++++++ 3 files changed, 86 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f7d026c773b..c7f8a1a2738 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,6 +26,7 @@ Docs: https://docs.openclaw.ai - Diagnostics/OTEL: surface provider request identifiers as bounded hashes on model-call diagnostics and span events, without exporting raw request IDs or metric labels. Thanks @Lidang-Jiang and @vincentkoc. - Plugins/diagnostics: add metadata-only `model_call_started` and `model_call_ended` hooks for provider/model call telemetry without exposing prompts, responses, headers, request bodies, or raw provider request IDs. Thanks @vincentkoc. - Diagnostics/OTEL: emit bounded context assembly diagnostics and export `openclaw.context.assembled` spans with prompt/history sizes but no prompt, history, response, or session-key content. Thanks @vincentkoc. +- Diagnostics/OTEL: export existing tool-loop diagnostics as `openclaw.tool.loop` counters and spans without loop messages, session identifiers, params, or tool output. Thanks @vincentkoc. - Diagnostics/OTEL: add bounded outbound message delivery lifecycle diagnostics and export them as low-cardinality delivery spans/metrics without message body, recipient, room, or media-path data. (#71471) Thanks @vincentkoc and @jlapenna. - Diagnostics/OTEL: emit bounded exec-process diagnostics and export them as `openclaw.exec` spans without exposing command text, working directories, or container identifiers. (#71451) Thanks @vincentkoc and @jlapenna. - Diagnostics/OTEL: support `OPENCLAW_OTEL_PRELOADED=1` so the plugin can reuse an already-registered OpenTelemetry SDK while keeping OpenClaw diagnostic listeners wired. (#71450) Thanks @vincentkoc and @jlapenna. diff --git a/extensions/diagnostics-otel/src/service.test.ts b/extensions/diagnostics-otel/src/service.test.ts index 782378d680b..bfc1a63b1d5 100644 --- a/extensions/diagnostics-otel/src/service.test.ts +++ b/extensions/diagnostics-otel/src/service.test.ts @@ -1050,6 +1050,56 @@ describe("diagnostics-otel service", () => { await service.stop?.(ctx); }); + test("exports tool loop diagnostics without loop messages or session identifiers", async () => { + const service = createDiagnosticsOtelService(); + const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { traces: true, metrics: true }); + await service.start(ctx); + + emitDiagnosticEvent({ + type: "tool.loop", + sessionKey: "session-key", + sessionId: "session-id", + toolName: "process", + level: "critical", + action: "block", + detector: "known_poll_no_progress", + count: 20, + message: "CRITICAL: repeated secret-bearing tool output", + pairedToolName: "read", + }); + await flushDiagnosticEvents(); + + expect(telemetryState.counters.get("openclaw.tool.loop")?.add).toHaveBeenCalledWith(1, { + "openclaw.toolName": "process", + "openclaw.loop.level": "critical", + "openclaw.loop.action": "block", + "openclaw.loop.detector": "known_poll_no_progress", + "openclaw.loop.count": 20, + "openclaw.loop.paired_tool": "read", + }); + const loopSpanCall = telemetryState.tracer.startSpan.mock.calls.find( + (call) => call[0] === "openclaw.tool.loop", + ); + expect(loopSpanCall?.[1]).toMatchObject({ + attributes: { + "openclaw.toolName": "process", + "openclaw.loop.level": "critical", + "openclaw.loop.action": "block", + "openclaw.loop.detector": "known_poll_no_progress", + "openclaw.loop.count": 20, + "openclaw.loop.paired_tool": "read", + }, + }); + const loopSpan = telemetryState.spans.find((span) => span.name === "openclaw.tool.loop"); + expect(loopSpan?.setStatus).toHaveBeenCalledWith({ + code: 2, + message: "known_poll_no_progress:block", + }); + expect(JSON.stringify(loopSpanCall)).not.toContain("session-key"); + expect(JSON.stringify(loopSpanCall)).not.toContain("secret-bearing"); + await service.stop?.(ctx); + }); + test("parents trusted diagnostic lifecycle spans from explicit parent ids", async () => { const service = createDiagnosticsOtelService(); const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { traces: true, metrics: true }); diff --git a/extensions/diagnostics-otel/src/service.ts b/extensions/diagnostics-otel/src/service.ts index 0352bf9cfaf..2c6a7509778 100644 --- a/extensions/diagnostics-otel/src/service.ts +++ b/extensions/diagnostics-otel/src/service.ts @@ -657,6 +657,10 @@ export function createDiagnosticsOtelService(): OpenClawPluginService { unit: "1", description: "Run attempts", }); + const toolLoopCounter = meter.createCounter("openclaw.tool.loop", { + unit: "1", + description: "Detected repetitive tool-call loop events", + }); const modelCallDurationHistogram = meter.createHistogram("openclaw.model_call.duration_ms", { unit: "ms", description: "Model call duration", @@ -1093,6 +1097,35 @@ export function createDiagnosticsOtelService(): OpenClawPluginService { runAttemptCounter.add(1, { "openclaw.attempt": evt.attempt }); }; + const toolLoopAttrs = ( + evt: Extract, + ): Record => ({ + "openclaw.toolName": lowCardinalityAttr(evt.toolName, "tool"), + "openclaw.loop.level": evt.level, + "openclaw.loop.action": evt.action, + "openclaw.loop.detector": evt.detector, + "openclaw.loop.count": evt.count, + ...(evt.pairedToolName + ? { "openclaw.loop.paired_tool": lowCardinalityAttr(evt.pairedToolName, "tool") } + : {}), + }); + + const recordToolLoop = (evt: Extract) => { + const attrs = toolLoopAttrs(evt); + toolLoopCounter.add(1, attrs); + if (!tracesEnabled) { + return; + } + const span = spanWithDuration("openclaw.tool.loop", attrs, 0, { endTimeMs: evt.ts }); + if (evt.level === "critical" || evt.action === "block") { + span.setStatus({ + code: SpanStatusCode.ERROR, + message: `${evt.detector}:${evt.action}`, + }); + } + span.end(evt.ts); + }; + const recordRunCompleted = ( evt: Extract, metadata: DiagnosticEventMetadata, @@ -1435,6 +1468,8 @@ export function createDiagnosticsOtelService(): OpenClawPluginService { recordLogRecord?.(evt, metadata); return; case "tool.loop": + recordToolLoop(evt); + return; case "tool.execution.started": case "run.started": case "model.call.started":