mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 18:20:44 +00:00
feat(diagnostics): add outbound delivery lifecycle events
Add bounded outbound message delivery lifecycle diagnostics and OTEL export without message body, recipient, room, media path, or raw channel result data.
This commit is contained in:
@@ -878,6 +878,107 @@ describe("diagnostics-otel service", () => {
|
||||
await service.stop?.(ctx);
|
||||
});
|
||||
|
||||
test("exports message delivery spans and metrics with low-cardinality attributes", async () => {
|
||||
const service = createDiagnosticsOtelService();
|
||||
const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { traces: true, metrics: true });
|
||||
await service.start(ctx);
|
||||
|
||||
emitDiagnosticEvent({
|
||||
type: "message.delivery.started",
|
||||
channel: "matrix",
|
||||
deliveryKind: "text",
|
||||
sessionKey: "session-secret",
|
||||
});
|
||||
emitDiagnosticEvent({
|
||||
type: "message.delivery.completed",
|
||||
channel: "matrix",
|
||||
deliveryKind: "text",
|
||||
durationMs: 25,
|
||||
resultCount: 1,
|
||||
sessionKey: "session-secret",
|
||||
});
|
||||
emitDiagnosticEvent({
|
||||
type: "message.delivery.error",
|
||||
channel: "discord",
|
||||
deliveryKind: "media",
|
||||
durationMs: 40,
|
||||
errorCategory: "TypeError",
|
||||
sessionKey: "session-secret",
|
||||
});
|
||||
await flushDiagnosticEvents();
|
||||
|
||||
expect(
|
||||
telemetryState.counters.get("openclaw.message.delivery.started")?.add,
|
||||
).toHaveBeenCalledWith(1, {
|
||||
"openclaw.channel": "matrix",
|
||||
"openclaw.delivery.kind": "text",
|
||||
});
|
||||
expect(
|
||||
telemetryState.histograms.get("openclaw.message.delivery.duration_ms")?.record,
|
||||
).toHaveBeenCalledWith(
|
||||
25,
|
||||
expect.objectContaining({
|
||||
"openclaw.channel": "matrix",
|
||||
"openclaw.delivery.kind": "text",
|
||||
"openclaw.outcome": "completed",
|
||||
}),
|
||||
);
|
||||
expect(
|
||||
telemetryState.histograms.get("openclaw.message.delivery.duration_ms")?.record,
|
||||
).toHaveBeenCalledWith(
|
||||
40,
|
||||
expect.objectContaining({
|
||||
"openclaw.channel": "discord",
|
||||
"openclaw.delivery.kind": "media",
|
||||
"openclaw.outcome": "error",
|
||||
"openclaw.errorCategory": "TypeError",
|
||||
}),
|
||||
);
|
||||
|
||||
const deliverySpanCalls = telemetryState.tracer.startSpan.mock.calls.filter(
|
||||
(call) => call[0] === "openclaw.message.delivery",
|
||||
);
|
||||
expect(deliverySpanCalls).toHaveLength(2);
|
||||
expect(deliverySpanCalls[0]?.[1]).toMatchObject({
|
||||
attributes: {
|
||||
"openclaw.channel": "matrix",
|
||||
"openclaw.delivery.kind": "text",
|
||||
"openclaw.outcome": "completed",
|
||||
"openclaw.delivery.result_count": 1,
|
||||
},
|
||||
startTime: expect.any(Number),
|
||||
});
|
||||
expect(deliverySpanCalls[1]?.[1]).toMatchObject({
|
||||
attributes: {
|
||||
"openclaw.channel": "discord",
|
||||
"openclaw.delivery.kind": "media",
|
||||
"openclaw.outcome": "error",
|
||||
"openclaw.errorCategory": "TypeError",
|
||||
},
|
||||
startTime: expect.any(Number),
|
||||
});
|
||||
for (const call of deliverySpanCalls) {
|
||||
expect(call[1]).toEqual({
|
||||
attributes: expect.not.objectContaining({
|
||||
"openclaw.sessionKey": expect.anything(),
|
||||
"openclaw.messageId": expect.anything(),
|
||||
"openclaw.conversationId": expect.anything(),
|
||||
"openclaw.content": expect.anything(),
|
||||
"openclaw.to": expect.anything(),
|
||||
}),
|
||||
startTime: expect.any(Number),
|
||||
});
|
||||
}
|
||||
const errorSpan = telemetryState.spans.find(
|
||||
(span) => span.name === "openclaw.message.delivery" && span.setStatus.mock.calls.length > 0,
|
||||
);
|
||||
expect(errorSpan?.setStatus).toHaveBeenCalledWith({
|
||||
code: 2,
|
||||
message: "TypeError",
|
||||
});
|
||||
await service.stop?.(ctx);
|
||||
});
|
||||
|
||||
test("does not export model or tool content unless capture is explicitly enabled", async () => {
|
||||
const service = createDiagnosticsOtelService();
|
||||
const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { traces: true, metrics: true });
|
||||
|
||||
@@ -59,6 +59,13 @@ type OtelContentCapturePolicy = {
|
||||
systemPrompt: boolean;
|
||||
};
|
||||
|
||||
type MessageDeliveryDiagnosticEvent = Extract<
|
||||
DiagnosticEventPayload,
|
||||
{
|
||||
type: "message.delivery.started" | "message.delivery.completed" | "message.delivery.error";
|
||||
}
|
||||
>;
|
||||
|
||||
const NO_CONTENT_CAPTURE: OtelContentCapturePolicy = {
|
||||
inputMessages: false,
|
||||
outputMessages: false,
|
||||
@@ -514,6 +521,20 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
|
||||
unit: "ms",
|
||||
description: "Message processing duration",
|
||||
});
|
||||
const messageDeliveryStartedCounter = meter.createCounter(
|
||||
"openclaw.message.delivery.started",
|
||||
{
|
||||
unit: "1",
|
||||
description: "Outbound message delivery attempts started",
|
||||
},
|
||||
);
|
||||
const messageDeliveryDurationHistogram = meter.createHistogram(
|
||||
"openclaw.message.delivery.duration_ms",
|
||||
{
|
||||
unit: "ms",
|
||||
description: "Outbound message delivery duration",
|
||||
},
|
||||
);
|
||||
const queueDepthHistogram = meter.createHistogram("openclaw.queue.depth", {
|
||||
unit: "1",
|
||||
description: "Queue depth on enqueue/dequeue",
|
||||
@@ -861,6 +882,64 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
|
||||
span.end();
|
||||
};
|
||||
|
||||
const messageDeliveryAttrs = (
|
||||
evt: MessageDeliveryDiagnosticEvent,
|
||||
): Record<string, string> => ({
|
||||
"openclaw.channel": evt.channel,
|
||||
"openclaw.delivery.kind": evt.deliveryKind,
|
||||
});
|
||||
|
||||
const recordMessageDeliveryStarted = (
|
||||
evt: Extract<DiagnosticEventPayload, { type: "message.delivery.started" }>,
|
||||
) => {
|
||||
messageDeliveryStartedCounter.add(1, messageDeliveryAttrs(evt));
|
||||
};
|
||||
|
||||
const recordMessageDeliveryCompleted = (
|
||||
evt: Extract<DiagnosticEventPayload, { type: "message.delivery.completed" }>,
|
||||
) => {
|
||||
const attrs = {
|
||||
...messageDeliveryAttrs(evt),
|
||||
"openclaw.outcome": "completed",
|
||||
};
|
||||
messageDeliveryDurationHistogram.record(evt.durationMs, attrs);
|
||||
if (!tracesEnabled) {
|
||||
return;
|
||||
}
|
||||
const span = spanWithDuration(
|
||||
"openclaw.message.delivery",
|
||||
{
|
||||
...attrs,
|
||||
"openclaw.delivery.result_count": evt.resultCount,
|
||||
},
|
||||
evt.durationMs,
|
||||
{ endTimeMs: evt.ts },
|
||||
);
|
||||
span.end(evt.ts);
|
||||
};
|
||||
|
||||
const recordMessageDeliveryError = (
|
||||
evt: Extract<DiagnosticEventPayload, { type: "message.delivery.error" }>,
|
||||
) => {
|
||||
const attrs = {
|
||||
...messageDeliveryAttrs(evt),
|
||||
"openclaw.outcome": "error",
|
||||
"openclaw.errorCategory": lowCardinalityAttr(evt.errorCategory, "other"),
|
||||
};
|
||||
messageDeliveryDurationHistogram.record(evt.durationMs, attrs);
|
||||
if (!tracesEnabled) {
|
||||
return;
|
||||
}
|
||||
const span = spanWithDuration("openclaw.message.delivery", attrs, evt.durationMs, {
|
||||
endTimeMs: evt.ts,
|
||||
});
|
||||
span.setStatus({
|
||||
code: SpanStatusCode.ERROR,
|
||||
message: redactSensitiveText(evt.errorCategory),
|
||||
});
|
||||
span.end(evt.ts);
|
||||
};
|
||||
|
||||
const recordLaneEnqueue = (
|
||||
evt: Extract<DiagnosticEventPayload, { type: "queue.lane.enqueue" }>,
|
||||
) => {
|
||||
@@ -1160,6 +1239,15 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
|
||||
case "message.processed":
|
||||
recordMessageProcessed(evt);
|
||||
return;
|
||||
case "message.delivery.started":
|
||||
recordMessageDeliveryStarted(evt);
|
||||
return;
|
||||
case "message.delivery.completed":
|
||||
recordMessageDeliveryCompleted(evt);
|
||||
return;
|
||||
case "message.delivery.error":
|
||||
recordMessageDeliveryError(evt);
|
||||
return;
|
||||
case "queue.lane.enqueue":
|
||||
recordLaneEnqueue(evt);
|
||||
return;
|
||||
|
||||
Reference in New Issue
Block a user