mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 05:50:43 +00:00
fix(telemetry): bound message diagnostics labels
This commit is contained in:
@@ -41,6 +41,7 @@ Docs: https://docs.openclaw.ai
|
||||
|
||||
### Fixes
|
||||
|
||||
- Diagnostics: keep webhook/message OTEL attributes and Prometheus delivery labels low-cardinality and omit raw chat/message IDs from spans, so progress-draft and message-tool modes do not leak high-cardinality messaging identifiers.
|
||||
- Telegram: render shared interactive reply buttons in reply delivery so plugin approval messages show inline keyboards. (#76238) Thanks @keshavbotagent.
|
||||
- Release validation: install the cross-OS TypeScript harness through Windows-safe Node/npm shims so native Windows package checks reach the OpenClaw smoke suites instead of exiting before artifact capture. Thanks @vincentkoc.
|
||||
- Release validation: let Windows packaged-upgrade checks continue after the shipped 2026.5.2 updater hits its native-module swap cleanup fallback, verifying the fallback-installed candidate through package metadata and downstream smoke instead of crashing on the immediate update-status probe. Thanks @vincentkoc.
|
||||
|
||||
@@ -268,11 +268,11 @@ heartbeat tick. For the config knob and defaults, see
|
||||
- `openclaw.exec`
|
||||
- `openclaw.exec.target`, `openclaw.exec.mode`, `openclaw.outcome`, `openclaw.failureKind`, `openclaw.exec.command_length`, `openclaw.exec.exit_code`, `openclaw.exec.timed_out`
|
||||
- `openclaw.webhook.processed`
|
||||
- `openclaw.channel`, `openclaw.webhook`, `openclaw.chatId`
|
||||
- `openclaw.channel`, `openclaw.webhook`
|
||||
- `openclaw.webhook.error`
|
||||
- `openclaw.channel`, `openclaw.webhook`, `openclaw.chatId`, `openclaw.error`
|
||||
- `openclaw.channel`, `openclaw.webhook`, `openclaw.error`
|
||||
- `openclaw.message.processed`
|
||||
- `openclaw.channel`, `openclaw.outcome`, `openclaw.chatId`, `openclaw.messageId`, `openclaw.reason`
|
||||
- `openclaw.channel`, `openclaw.outcome`, `openclaw.reason`
|
||||
- `openclaw.message.delivery`
|
||||
- `openclaw.channel`, `openclaw.delivery.kind`, `openclaw.outcome`, `openclaw.errorCategory`, `openclaw.delivery.result_count`
|
||||
- `openclaw.session.stuck`
|
||||
|
||||
@@ -296,6 +296,7 @@ describe("diagnostics-otel service", () => {
|
||||
type: "webhook.processed",
|
||||
channel: "telegram",
|
||||
updateType: "telegram-post",
|
||||
chatId: "chat-should-not-export",
|
||||
durationMs: 120,
|
||||
});
|
||||
emitDiagnosticEvent({
|
||||
@@ -307,7 +308,10 @@ describe("diagnostics-otel service", () => {
|
||||
emitDiagnosticEvent({
|
||||
type: "message.processed",
|
||||
channel: "telegram",
|
||||
chatId: "chat-should-not-export",
|
||||
messageId: "message-should-not-export",
|
||||
outcome: "completed",
|
||||
reason: "progress draft / message tool 123",
|
||||
durationMs: 55,
|
||||
});
|
||||
emitDiagnosticEvent({
|
||||
@@ -348,6 +352,33 @@ describe("diagnostics-otel service", () => {
|
||||
expect(spanNames).toContain("openclaw.webhook.processed");
|
||||
expect(spanNames).toContain("openclaw.message.processed");
|
||||
expect(spanNames).toContain("openclaw.session.stuck");
|
||||
const webhookSpanCall = telemetryState.tracer.startSpan.mock.calls.find(
|
||||
(call) => call[0] === "openclaw.webhook.processed",
|
||||
);
|
||||
expect(webhookSpanCall?.[1]).toEqual({
|
||||
attributes: expect.not.objectContaining({
|
||||
"openclaw.chatId": expect.anything(),
|
||||
}),
|
||||
startTime: expect.any(Number),
|
||||
});
|
||||
const messageSpanCall = telemetryState.tracer.startSpan.mock.calls.find(
|
||||
(call) => call[0] === "openclaw.message.processed",
|
||||
);
|
||||
expect(messageSpanCall?.[1]).toEqual({
|
||||
attributes: expect.objectContaining({
|
||||
"openclaw.channel": "telegram",
|
||||
"openclaw.outcome": "completed",
|
||||
"openclaw.reason": "unknown",
|
||||
}),
|
||||
startTime: expect.any(Number),
|
||||
});
|
||||
expect(messageSpanCall?.[1]).toEqual({
|
||||
attributes: expect.not.objectContaining({
|
||||
"openclaw.chatId": expect.anything(),
|
||||
"openclaw.messageId": expect.anything(),
|
||||
}),
|
||||
startTime: expect.any(Number),
|
||||
});
|
||||
|
||||
emitDiagnosticEvent({
|
||||
type: "log.record",
|
||||
@@ -2387,6 +2418,7 @@ describe("diagnostics-otel service", () => {
|
||||
for (const call of deliverySpanCalls) {
|
||||
expect(call[1]).toEqual({
|
||||
attributes: expect.not.objectContaining({
|
||||
"openclaw.chatId": expect.anything(),
|
||||
"openclaw.sessionKey": expect.anything(),
|
||||
"openclaw.messageId": expect.anything(),
|
||||
"openclaw.conversationId": expect.anything(),
|
||||
@@ -2406,6 +2438,46 @@ describe("diagnostics-otel service", () => {
|
||||
await service.stop?.(ctx);
|
||||
});
|
||||
|
||||
test("bounds unsafe message delivery attributes before export", async () => {
|
||||
const service = createDiagnosticsOtelService();
|
||||
const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { traces: true, metrics: true });
|
||||
await service.start(ctx);
|
||||
|
||||
emitDiagnosticEvent({
|
||||
type: "message.delivery.completed",
|
||||
channel: "discord/custom",
|
||||
deliveryKind: "progress draft" as never,
|
||||
durationMs: 20,
|
||||
resultCount: 1,
|
||||
sessionKey: "session-secret",
|
||||
});
|
||||
await flushDiagnosticEvents();
|
||||
|
||||
expect(
|
||||
telemetryState.histograms.get("openclaw.message.delivery.duration_ms")?.record,
|
||||
).toHaveBeenCalledWith(
|
||||
20,
|
||||
expect.objectContaining({
|
||||
"openclaw.channel": "unknown",
|
||||
"openclaw.delivery.kind": "other",
|
||||
"openclaw.outcome": "completed",
|
||||
}),
|
||||
);
|
||||
const deliverySpanCall = telemetryState.tracer.startSpan.mock.calls.find(
|
||||
(call) => call[0] === "openclaw.message.delivery",
|
||||
);
|
||||
expect(deliverySpanCall?.[1]).toMatchObject({
|
||||
attributes: {
|
||||
"openclaw.channel": "unknown",
|
||||
"openclaw.delivery.kind": "other",
|
||||
"openclaw.outcome": "completed",
|
||||
"openclaw.delivery.result_count": 1,
|
||||
},
|
||||
startTime: expect.any(Number),
|
||||
});
|
||||
await service.stop?.(ctx);
|
||||
});
|
||||
|
||||
test("does not export model or tool content unless capture is explicitly enabled", async () => {
|
||||
const service = createDiagnosticsOtelService();
|
||||
const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { traces: true, metrics: true });
|
||||
|
||||
@@ -31,6 +31,8 @@ import {
|
||||
const DEFAULT_SERVICE_NAME = "openclaw";
|
||||
const DROPPED_OTEL_ATTRIBUTE_KEYS = new Set([
|
||||
"openclaw.callId",
|
||||
"openclaw.chatId",
|
||||
"openclaw.messageId",
|
||||
"openclaw.parentSpanId",
|
||||
"openclaw.runId",
|
||||
"openclaw.sessionId",
|
||||
@@ -1262,8 +1264,8 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
|
||||
evt: Extract<DiagnosticEventPayload, { type: "webhook.processed" }>,
|
||||
) => {
|
||||
const attrs = {
|
||||
"openclaw.channel": evt.channel ?? "unknown",
|
||||
"openclaw.webhook": evt.updateType ?? "unknown",
|
||||
"openclaw.channel": lowCardinalityAttr(evt.channel),
|
||||
"openclaw.webhook": lowCardinalityAttr(evt.updateType),
|
||||
};
|
||||
if (typeof evt.durationMs === "number") {
|
||||
webhookDurationHistogram.record(evt.durationMs, attrs);
|
||||
@@ -1272,9 +1274,6 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
|
||||
return;
|
||||
}
|
||||
const spanAttrs: Record<string, string | number> = { ...attrs };
|
||||
if (evt.chatId !== undefined) {
|
||||
spanAttrs["openclaw.chatId"] = String(evt.chatId);
|
||||
}
|
||||
const span = spanWithDuration("openclaw.webhook.processed", spanAttrs, evt.durationMs);
|
||||
span.end();
|
||||
};
|
||||
@@ -1283,8 +1282,8 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
|
||||
evt: Extract<DiagnosticEventPayload, { type: "webhook.error" }>,
|
||||
) => {
|
||||
const attrs = {
|
||||
"openclaw.channel": evt.channel ?? "unknown",
|
||||
"openclaw.webhook": evt.updateType ?? "unknown",
|
||||
"openclaw.channel": lowCardinalityAttr(evt.channel),
|
||||
"openclaw.webhook": lowCardinalityAttr(evt.updateType),
|
||||
};
|
||||
webhookErrorCounter.add(1, attrs);
|
||||
if (!tracesEnabled) {
|
||||
@@ -1295,9 +1294,6 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
|
||||
...attrs,
|
||||
"openclaw.error": redactedError,
|
||||
};
|
||||
if (evt.chatId !== undefined) {
|
||||
spanAttrs["openclaw.chatId"] = String(evt.chatId);
|
||||
}
|
||||
const span = tracer.startSpan("openclaw.webhook.error", {
|
||||
attributes: spanAttrs,
|
||||
});
|
||||
@@ -1309,8 +1305,8 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
|
||||
evt: Extract<DiagnosticEventPayload, { type: "message.queued" }>,
|
||||
) => {
|
||||
const attrs = {
|
||||
"openclaw.channel": evt.channel ?? "unknown",
|
||||
"openclaw.source": evt.source ?? "unknown",
|
||||
"openclaw.channel": lowCardinalityAttr(evt.channel),
|
||||
"openclaw.source": lowCardinalityAttr(evt.source),
|
||||
};
|
||||
messageQueuedCounter.add(1, attrs);
|
||||
if (typeof evt.queueDepth === "number") {
|
||||
@@ -1322,7 +1318,7 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
|
||||
evt: Extract<DiagnosticEventPayload, { type: "message.processed" }>,
|
||||
) => {
|
||||
const attrs = {
|
||||
"openclaw.channel": evt.channel ?? "unknown",
|
||||
"openclaw.channel": lowCardinalityAttr(evt.channel),
|
||||
"openclaw.outcome": evt.outcome ?? "unknown",
|
||||
};
|
||||
messageProcessedCounter.add(1, attrs);
|
||||
@@ -1333,14 +1329,8 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
|
||||
return;
|
||||
}
|
||||
const spanAttrs: Record<string, string | number> = { ...attrs };
|
||||
if (evt.chatId !== undefined) {
|
||||
spanAttrs["openclaw.chatId"] = String(evt.chatId);
|
||||
}
|
||||
if (evt.messageId !== undefined) {
|
||||
spanAttrs["openclaw.messageId"] = String(evt.messageId);
|
||||
}
|
||||
if (evt.reason) {
|
||||
spanAttrs["openclaw.reason"] = redactSensitiveText(evt.reason);
|
||||
spanAttrs["openclaw.reason"] = lowCardinalityAttr(evt.reason, "unknown");
|
||||
}
|
||||
const span = spanWithDuration("openclaw.message.processed", spanAttrs, evt.durationMs);
|
||||
if (evt.outcome === "error" && evt.error) {
|
||||
@@ -1352,8 +1342,8 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
|
||||
const messageDeliveryAttrs = (
|
||||
evt: MessageDeliveryDiagnosticEvent,
|
||||
): Record<string, string> => ({
|
||||
"openclaw.channel": evt.channel,
|
||||
"openclaw.delivery.kind": evt.deliveryKind,
|
||||
"openclaw.channel": lowCardinalityAttr(evt.channel),
|
||||
"openclaw.delivery.kind": lowCardinalityAttr(evt.deliveryKind, "other"),
|
||||
});
|
||||
|
||||
const recordMessageDeliveryStarted = (
|
||||
|
||||
@@ -87,6 +87,49 @@ describe("diagnostics-prometheus service", () => {
|
||||
expect(rendered).not.toContain("sk-secret");
|
||||
});
|
||||
|
||||
it("bounds messaging labels without exporting raw chat identifiers", () => {
|
||||
const store = __test__.createPrometheusMetricStore();
|
||||
|
||||
__test__.recordDiagnosticEvent(
|
||||
store,
|
||||
{
|
||||
...baseEvent(),
|
||||
type: "message.processed",
|
||||
channel: "telegram/custom",
|
||||
chatId: "chat-should-not-export",
|
||||
messageId: "message-should-not-export",
|
||||
outcome: "completed",
|
||||
reason: "progress draft / message tool 123",
|
||||
durationMs: 25,
|
||||
},
|
||||
trusted,
|
||||
);
|
||||
__test__.recordDiagnosticEvent(
|
||||
store,
|
||||
{
|
||||
...baseEvent(),
|
||||
type: "message.delivery.error",
|
||||
channel: "discord/custom",
|
||||
deliveryKind: "progress draft" as never,
|
||||
durationMs: 50,
|
||||
errorCategory: "TimeoutError",
|
||||
},
|
||||
trusted,
|
||||
);
|
||||
|
||||
const rendered = __test__.renderPrometheusMetrics(store);
|
||||
|
||||
expect(rendered).toContain(
|
||||
'openclaw_message_processed_total{channel="unknown",outcome="completed",reason="none"} 1',
|
||||
);
|
||||
expect(rendered).toContain(
|
||||
'openclaw_message_delivery_total{channel="unknown",delivery_kind="other",error_category="TimeoutError",outcome="error"} 1',
|
||||
);
|
||||
expect(rendered).not.toContain("chat-should-not-export");
|
||||
expect(rendered).not.toContain("message-should-not-export");
|
||||
expect(rendered).not.toContain("progress draft");
|
||||
});
|
||||
|
||||
it("caps metric series growth and reports dropped series", () => {
|
||||
const store = __test__.createPrometheusMetricStore();
|
||||
|
||||
|
||||
@@ -504,7 +504,7 @@ function recordDiagnosticEvent(
|
||||
"Outbound message delivery attempts by outcome.",
|
||||
{
|
||||
channel: lowCardinalityLabel(evt.channel),
|
||||
delivery_kind: evt.deliveryKind,
|
||||
delivery_kind: lowCardinalityLabel(evt.deliveryKind, "other"),
|
||||
error_category:
|
||||
evt.type === "message.delivery.error"
|
||||
? lowCardinalityLabel(evt.errorCategory, "other")
|
||||
@@ -517,7 +517,7 @@ function recordDiagnosticEvent(
|
||||
"Outbound message delivery duration in seconds.",
|
||||
{
|
||||
channel: lowCardinalityLabel(evt.channel),
|
||||
delivery_kind: evt.deliveryKind,
|
||||
delivery_kind: lowCardinalityLabel(evt.deliveryKind, "other"),
|
||||
error_category:
|
||||
evt.type === "message.delivery.error"
|
||||
? lowCardinalityLabel(evt.errorCategory, "other")
|
||||
|
||||
@@ -88,6 +88,8 @@ const REQUIRED_SPAN_NAMES = [
|
||||
] as const;
|
||||
const DISALLOWED_ATTRIBUTE_KEYS = new Set([
|
||||
"openclaw.runId",
|
||||
"openclaw.chatId",
|
||||
"openclaw.messageId",
|
||||
"openclaw.sessionKey",
|
||||
"openclaw.sessionId",
|
||||
"openclaw.callId",
|
||||
|
||||
Reference in New Issue
Block a user