diff --git a/CHANGELOG.md b/CHANGELOG.md index a4f2783021f..ed20b4369ee 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -41,6 +41,7 @@ Docs: https://docs.openclaw.ai ### Fixes +- Diagnostics: keep webhook/message OTEL attributes and Prometheus delivery labels low-cardinality and omit raw chat/message IDs from spans, so progress-draft and message-tool modes do not leak high-cardinality messaging identifiers. - Telegram: render shared interactive reply buttons in reply delivery so plugin approval messages show inline keyboards. (#76238) Thanks @keshavbotagent. - Release validation: install the cross-OS TypeScript harness through Windows-safe Node/npm shims so native Windows package checks reach the OpenClaw smoke suites instead of exiting before artifact capture. Thanks @vincentkoc. - Release validation: let Windows packaged-upgrade checks continue after the shipped 2026.5.2 updater hits its native-module swap cleanup fallback, verifying the fallback-installed candidate through package metadata and downstream smoke instead of crashing on the immediate update-status probe. Thanks @vincentkoc. diff --git a/docs/gateway/opentelemetry.md b/docs/gateway/opentelemetry.md index 758cbf2cb38..31e0b82f455 100644 --- a/docs/gateway/opentelemetry.md +++ b/docs/gateway/opentelemetry.md @@ -268,11 +268,11 @@ heartbeat tick. For the config knob and defaults, see - `openclaw.exec` - `openclaw.exec.target`, `openclaw.exec.mode`, `openclaw.outcome`, `openclaw.failureKind`, `openclaw.exec.command_length`, `openclaw.exec.exit_code`, `openclaw.exec.timed_out` - `openclaw.webhook.processed` - - `openclaw.channel`, `openclaw.webhook`, `openclaw.chatId` + - `openclaw.channel`, `openclaw.webhook` - `openclaw.webhook.error` - - `openclaw.channel`, `openclaw.webhook`, `openclaw.chatId`, `openclaw.error` + - `openclaw.channel`, `openclaw.webhook`, `openclaw.error` - `openclaw.message.processed` - - `openclaw.channel`, `openclaw.outcome`, `openclaw.chatId`, `openclaw.messageId`, `openclaw.reason` + - `openclaw.channel`, `openclaw.outcome`, `openclaw.reason` - `openclaw.message.delivery` - `openclaw.channel`, `openclaw.delivery.kind`, `openclaw.outcome`, `openclaw.errorCategory`, `openclaw.delivery.result_count` - `openclaw.session.stuck` diff --git a/extensions/diagnostics-otel/src/service.test.ts b/extensions/diagnostics-otel/src/service.test.ts index 4b637adf5ed..7b486a06886 100644 --- a/extensions/diagnostics-otel/src/service.test.ts +++ b/extensions/diagnostics-otel/src/service.test.ts @@ -296,6 +296,7 @@ describe("diagnostics-otel service", () => { type: "webhook.processed", channel: "telegram", updateType: "telegram-post", + chatId: "chat-should-not-export", durationMs: 120, }); emitDiagnosticEvent({ @@ -307,7 +308,10 @@ describe("diagnostics-otel service", () => { emitDiagnosticEvent({ type: "message.processed", channel: "telegram", + chatId: "chat-should-not-export", + messageId: "message-should-not-export", outcome: "completed", + reason: "progress draft / message tool 123", durationMs: 55, }); emitDiagnosticEvent({ @@ -348,6 +352,33 @@ describe("diagnostics-otel service", () => { expect(spanNames).toContain("openclaw.webhook.processed"); expect(spanNames).toContain("openclaw.message.processed"); expect(spanNames).toContain("openclaw.session.stuck"); + const webhookSpanCall = telemetryState.tracer.startSpan.mock.calls.find( + (call) => call[0] === "openclaw.webhook.processed", + ); + expect(webhookSpanCall?.[1]).toEqual({ + attributes: expect.not.objectContaining({ + "openclaw.chatId": expect.anything(), + }), + startTime: expect.any(Number), + }); + const messageSpanCall = telemetryState.tracer.startSpan.mock.calls.find( + (call) => call[0] === "openclaw.message.processed", + ); + expect(messageSpanCall?.[1]).toEqual({ + attributes: expect.objectContaining({ + "openclaw.channel": "telegram", + "openclaw.outcome": "completed", + "openclaw.reason": "unknown", + }), + startTime: expect.any(Number), + }); + expect(messageSpanCall?.[1]).toEqual({ + attributes: expect.not.objectContaining({ + "openclaw.chatId": expect.anything(), + "openclaw.messageId": expect.anything(), + }), + startTime: expect.any(Number), + }); emitDiagnosticEvent({ type: "log.record", @@ -2387,6 +2418,7 @@ describe("diagnostics-otel service", () => { for (const call of deliverySpanCalls) { expect(call[1]).toEqual({ attributes: expect.not.objectContaining({ + "openclaw.chatId": expect.anything(), "openclaw.sessionKey": expect.anything(), "openclaw.messageId": expect.anything(), "openclaw.conversationId": expect.anything(), @@ -2406,6 +2438,46 @@ describe("diagnostics-otel service", () => { await service.stop?.(ctx); }); + test("bounds unsafe message delivery attributes before export", async () => { + const service = createDiagnosticsOtelService(); + const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { traces: true, metrics: true }); + await service.start(ctx); + + emitDiagnosticEvent({ + type: "message.delivery.completed", + channel: "discord/custom", + deliveryKind: "progress draft" as never, + durationMs: 20, + resultCount: 1, + sessionKey: "session-secret", + }); + await flushDiagnosticEvents(); + + expect( + telemetryState.histograms.get("openclaw.message.delivery.duration_ms")?.record, + ).toHaveBeenCalledWith( + 20, + expect.objectContaining({ + "openclaw.channel": "unknown", + "openclaw.delivery.kind": "other", + "openclaw.outcome": "completed", + }), + ); + const deliverySpanCall = telemetryState.tracer.startSpan.mock.calls.find( + (call) => call[0] === "openclaw.message.delivery", + ); + expect(deliverySpanCall?.[1]).toMatchObject({ + attributes: { + "openclaw.channel": "unknown", + "openclaw.delivery.kind": "other", + "openclaw.outcome": "completed", + "openclaw.delivery.result_count": 1, + }, + startTime: expect.any(Number), + }); + await service.stop?.(ctx); + }); + test("does not export model or tool content unless capture is explicitly enabled", async () => { const service = createDiagnosticsOtelService(); const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { traces: true, metrics: true }); diff --git a/extensions/diagnostics-otel/src/service.ts b/extensions/diagnostics-otel/src/service.ts index 755d465019a..8c3f607fd66 100644 --- a/extensions/diagnostics-otel/src/service.ts +++ b/extensions/diagnostics-otel/src/service.ts @@ -31,6 +31,8 @@ import { const DEFAULT_SERVICE_NAME = "openclaw"; const DROPPED_OTEL_ATTRIBUTE_KEYS = new Set([ "openclaw.callId", + "openclaw.chatId", + "openclaw.messageId", "openclaw.parentSpanId", "openclaw.runId", "openclaw.sessionId", @@ -1262,8 +1264,8 @@ export function createDiagnosticsOtelService(): OpenClawPluginService { evt: Extract, ) => { const attrs = { - "openclaw.channel": evt.channel ?? "unknown", - "openclaw.webhook": evt.updateType ?? "unknown", + "openclaw.channel": lowCardinalityAttr(evt.channel), + "openclaw.webhook": lowCardinalityAttr(evt.updateType), }; if (typeof evt.durationMs === "number") { webhookDurationHistogram.record(evt.durationMs, attrs); @@ -1272,9 +1274,6 @@ export function createDiagnosticsOtelService(): OpenClawPluginService { return; } const spanAttrs: Record = { ...attrs }; - if (evt.chatId !== undefined) { - spanAttrs["openclaw.chatId"] = String(evt.chatId); - } const span = spanWithDuration("openclaw.webhook.processed", spanAttrs, evt.durationMs); span.end(); }; @@ -1283,8 +1282,8 @@ export function createDiagnosticsOtelService(): OpenClawPluginService { evt: Extract, ) => { const attrs = { - "openclaw.channel": evt.channel ?? "unknown", - "openclaw.webhook": evt.updateType ?? "unknown", + "openclaw.channel": lowCardinalityAttr(evt.channel), + "openclaw.webhook": lowCardinalityAttr(evt.updateType), }; webhookErrorCounter.add(1, attrs); if (!tracesEnabled) { @@ -1295,9 +1294,6 @@ export function createDiagnosticsOtelService(): OpenClawPluginService { ...attrs, "openclaw.error": redactedError, }; - if (evt.chatId !== undefined) { - spanAttrs["openclaw.chatId"] = String(evt.chatId); - } const span = tracer.startSpan("openclaw.webhook.error", { attributes: spanAttrs, }); @@ -1309,8 +1305,8 @@ export function createDiagnosticsOtelService(): OpenClawPluginService { evt: Extract, ) => { const attrs = { - "openclaw.channel": evt.channel ?? "unknown", - "openclaw.source": evt.source ?? "unknown", + "openclaw.channel": lowCardinalityAttr(evt.channel), + "openclaw.source": lowCardinalityAttr(evt.source), }; messageQueuedCounter.add(1, attrs); if (typeof evt.queueDepth === "number") { @@ -1322,7 +1318,7 @@ export function createDiagnosticsOtelService(): OpenClawPluginService { evt: Extract, ) => { const attrs = { - "openclaw.channel": evt.channel ?? "unknown", + "openclaw.channel": lowCardinalityAttr(evt.channel), "openclaw.outcome": evt.outcome ?? "unknown", }; messageProcessedCounter.add(1, attrs); @@ -1333,14 +1329,8 @@ export function createDiagnosticsOtelService(): OpenClawPluginService { return; } const spanAttrs: Record = { ...attrs }; - if (evt.chatId !== undefined) { - spanAttrs["openclaw.chatId"] = String(evt.chatId); - } - if (evt.messageId !== undefined) { - spanAttrs["openclaw.messageId"] = String(evt.messageId); - } if (evt.reason) { - spanAttrs["openclaw.reason"] = redactSensitiveText(evt.reason); + spanAttrs["openclaw.reason"] = lowCardinalityAttr(evt.reason, "unknown"); } const span = spanWithDuration("openclaw.message.processed", spanAttrs, evt.durationMs); if (evt.outcome === "error" && evt.error) { @@ -1352,8 +1342,8 @@ export function createDiagnosticsOtelService(): OpenClawPluginService { const messageDeliveryAttrs = ( evt: MessageDeliveryDiagnosticEvent, ): Record => ({ - "openclaw.channel": evt.channel, - "openclaw.delivery.kind": evt.deliveryKind, + "openclaw.channel": lowCardinalityAttr(evt.channel), + "openclaw.delivery.kind": lowCardinalityAttr(evt.deliveryKind, "other"), }); const recordMessageDeliveryStarted = ( diff --git a/extensions/diagnostics-prometheus/src/service.test.ts b/extensions/diagnostics-prometheus/src/service.test.ts index f3bfba0f4c6..f1530a95ead 100644 --- a/extensions/diagnostics-prometheus/src/service.test.ts +++ b/extensions/diagnostics-prometheus/src/service.test.ts @@ -87,6 +87,49 @@ describe("diagnostics-prometheus service", () => { expect(rendered).not.toContain("sk-secret"); }); + it("bounds messaging labels without exporting raw chat identifiers", () => { + const store = __test__.createPrometheusMetricStore(); + + __test__.recordDiagnosticEvent( + store, + { + ...baseEvent(), + type: "message.processed", + channel: "telegram/custom", + chatId: "chat-should-not-export", + messageId: "message-should-not-export", + outcome: "completed", + reason: "progress draft / message tool 123", + durationMs: 25, + }, + trusted, + ); + __test__.recordDiagnosticEvent( + store, + { + ...baseEvent(), + type: "message.delivery.error", + channel: "discord/custom", + deliveryKind: "progress draft" as never, + durationMs: 50, + errorCategory: "TimeoutError", + }, + trusted, + ); + + const rendered = __test__.renderPrometheusMetrics(store); + + expect(rendered).toContain( + 'openclaw_message_processed_total{channel="unknown",outcome="completed",reason="none"} 1', + ); + expect(rendered).toContain( + 'openclaw_message_delivery_total{channel="unknown",delivery_kind="other",error_category="TimeoutError",outcome="error"} 1', + ); + expect(rendered).not.toContain("chat-should-not-export"); + expect(rendered).not.toContain("message-should-not-export"); + expect(rendered).not.toContain("progress draft"); + }); + it("caps metric series growth and reports dropped series", () => { const store = __test__.createPrometheusMetricStore(); diff --git a/extensions/diagnostics-prometheus/src/service.ts b/extensions/diagnostics-prometheus/src/service.ts index 3605a4a3e4c..fea4dd6e1fd 100644 --- a/extensions/diagnostics-prometheus/src/service.ts +++ b/extensions/diagnostics-prometheus/src/service.ts @@ -504,7 +504,7 @@ function recordDiagnosticEvent( "Outbound message delivery attempts by outcome.", { channel: lowCardinalityLabel(evt.channel), - delivery_kind: evt.deliveryKind, + delivery_kind: lowCardinalityLabel(evt.deliveryKind, "other"), error_category: evt.type === "message.delivery.error" ? lowCardinalityLabel(evt.errorCategory, "other") @@ -517,7 +517,7 @@ function recordDiagnosticEvent( "Outbound message delivery duration in seconds.", { channel: lowCardinalityLabel(evt.channel), - delivery_kind: evt.deliveryKind, + delivery_kind: lowCardinalityLabel(evt.deliveryKind, "other"), error_category: evt.type === "message.delivery.error" ? lowCardinalityLabel(evt.errorCategory, "other") diff --git a/scripts/qa-otel-smoke.ts b/scripts/qa-otel-smoke.ts index 3455f6d7b54..2bc7bf2594a 100644 --- a/scripts/qa-otel-smoke.ts +++ b/scripts/qa-otel-smoke.ts @@ -88,6 +88,8 @@ const REQUIRED_SPAN_NAMES = [ ] as const; const DISALLOWED_ATTRIBUTE_KEYS = new Set([ "openclaw.runId", + "openclaw.chatId", + "openclaw.messageId", "openclaw.sessionKey", "openclaw.sessionId", "openclaw.callId",