From 7bbd47349ec37f01a8f2b1dc219bf950615f361b Mon Sep 17 00:00:00 2001 From: Vincent Koc Date: Sat, 25 Apr 2026 11:31:30 -0700 Subject: [PATCH] feat(diagnostics-otel): add genai token usage metric --- CHANGELOG.md | 1 + .../diagnostics-otel/src/service.test.ts | 49 +++++++++++++++++++ extensions/diagnostics-otel/src/service.ts | 23 +++++++++ 3 files changed, 73 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3dac30c8452..8325adf2894 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,6 +28,7 @@ Docs: https://docs.openclaw.ai - Diagnostics/OTEL: emit bounded context assembly diagnostics and export `openclaw.context.assembled` spans with prompt/history sizes but no prompt, history, response, or session-key content. Thanks @vincentkoc. - Diagnostics/OTEL: export existing tool-loop diagnostics as `openclaw.tool.loop` counters and spans without loop messages, session identifiers, params, or tool output. Thanks @vincentkoc. - Diagnostics/OTEL: export diagnostic memory samples and pressure as bounded memory histograms, counters, and pressure spans to help spot leak regressions without session or payload data. Thanks @vincentkoc. +- Diagnostics/OTEL: add the GenAI `gen_ai.client.token.usage` histogram for input/output model usage while keeping session identifiers and aggregate cache counters out of the semantic metric. Thanks @vincentkoc. - Diagnostics/OTEL: add bounded outbound message delivery lifecycle diagnostics and export them as low-cardinality delivery spans/metrics without message body, recipient, room, or media-path data. (#71471) Thanks @vincentkoc and @jlapenna. - Diagnostics/OTEL: emit bounded exec-process diagnostics and export them as `openclaw.exec` spans without exposing command text, working directories, or container identifiers. (#71451) Thanks @vincentkoc and @jlapenna. - Diagnostics/OTEL: support `OPENCLAW_OTEL_PRELOADED=1` so the plugin can reuse an already-registered OpenTelemetry SDK while keeping OpenClaw diagnostic listeners wired. (#71450) Thanks @vincentkoc and @jlapenna. diff --git a/extensions/diagnostics-otel/src/service.test.ts b/extensions/diagnostics-otel/src/service.test.ts index 28b7525df95..80de301e823 100644 --- a/extensions/diagnostics-otel/src/service.test.ts +++ b/extensions/diagnostics-otel/src/service.test.ts @@ -691,6 +691,55 @@ describe("diagnostics-otel service", () => { await service.stop?.(ctx); }); + test("exports GenAI client token usage histogram for input and output only", async () => { + const service = createDiagnosticsOtelService(); + const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { metrics: true }); + await service.start(ctx); + + emitDiagnosticEvent({ + type: "model.usage", + sessionKey: "session-key", + channel: "webchat", + provider: "openai", + model: "gpt-5.4", + usage: { + input: 12, + output: 7, + cacheRead: 3, + cacheWrite: 2, + promptTokens: 17, + total: 24, + }, + }); + await flushDiagnosticEvents(); + + expect(telemetryState.meter.createHistogram).toHaveBeenCalledWith( + "gen_ai.client.token.usage", + expect.objectContaining({ + unit: "{token}", + advice: { + explicitBucketBoundaries: expect.arrayContaining([1, 4, 16, 1024, 67108864]), + }, + }), + ); + const genAiTokenUsage = telemetryState.histograms.get("gen_ai.client.token.usage"); + expect(genAiTokenUsage?.record).toHaveBeenCalledTimes(2); + expect(genAiTokenUsage?.record).toHaveBeenCalledWith(12, { + "gen_ai.operation.name": "chat", + "gen_ai.provider.name": "openai", + "gen_ai.request.model": "gpt-5.4", + "gen_ai.token.type": "input", + }); + expect(genAiTokenUsage?.record).toHaveBeenCalledWith(7, { + "gen_ai.operation.name": "chat", + "gen_ai.provider.name": "openai", + "gen_ai.request.model": "gpt-5.4", + "gen_ai.token.type": "output", + }); + expect(JSON.stringify(genAiTokenUsage?.record.mock.calls)).not.toContain("session-key"); + await service.stop?.(ctx); + }); + test("exports run, model call, and tool execution lifecycle spans", async () => { const service = createDiagnosticsOtelService(); const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { traces: true, metrics: true }); diff --git a/extensions/diagnostics-otel/src/service.ts b/extensions/diagnostics-otel/src/service.ts index 6e2a805ab74..f9470b15cdb 100644 --- a/extensions/diagnostics-otel/src/service.ts +++ b/extensions/diagnostics-otel/src/service.ts @@ -52,6 +52,9 @@ const BLOCKED_OTEL_LOG_ATTRIBUTE_KEYS = new Set(["__proto__", "prototype", "cons const PRELOADED_OTEL_SDK_ENV = "OPENCLAW_OTEL_PRELOADED"; const OTEL_SEMCONV_STABILITY_OPT_IN_ENV = "OTEL_SEMCONV_STABILITY_OPT_IN"; const GEN_AI_LATEST_EXPERIMENTAL_OPT_IN = "gen_ai_latest_experimental"; +const GEN_AI_TOKEN_USAGE_BUCKETS = [ + 1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864, +]; type OtelContentCapturePolicy = { inputMessages: boolean; @@ -575,6 +578,13 @@ export function createDiagnosticsOtelService(): OpenClawPluginService { unit: "1", description: "Token usage by type", }); + const genAiTokenUsageHistogram = meter.createHistogram("gen_ai.client.token.usage", { + unit: "{token}", + description: "Number of input and output tokens used by GenAI client operations", + advice: { + explicitBucketBoundaries: GEN_AI_TOKEN_USAGE_BUCKETS, + }, + }); const costCounter = meter.createCounter("openclaw.cost.usd", { unit: "1", description: "Estimated model cost (USD)", @@ -854,13 +864,26 @@ export function createDiagnosticsOtelService(): OpenClawPluginService { "openclaw.provider": evt.provider ?? "unknown", "openclaw.model": evt.model ?? "unknown", }; + const genAiAttrs: Record = { + "gen_ai.operation.name": "chat", + "gen_ai.provider.name": lowCardinalityAttr(evt.provider), + ...(evt.model ? { "gen_ai.request.model": lowCardinalityAttr(evt.model) } : {}), + }; const usage = evt.usage; if (usage.input) { tokensCounter.add(usage.input, { ...attrs, "openclaw.token": "input" }); + genAiTokenUsageHistogram.record(usage.input, { + ...genAiAttrs, + "gen_ai.token.type": "input", + }); } if (usage.output) { tokensCounter.add(usage.output, { ...attrs, "openclaw.token": "output" }); + genAiTokenUsageHistogram.record(usage.output, { + ...genAiAttrs, + "gen_ai.token.type": "output", + }); } if (usage.cacheRead) { tokensCounter.add(usage.cacheRead, { ...attrs, "openclaw.token": "cache_read" });