mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 05:50:43 +00:00
feat(diagnostics): export lifecycle OTEL spans
Export diagnostics OTEL lifecycle spans for runs, model calls, and tool executions while avoiding retained live span state and high-cardinality/sensitive exported attributes.
This commit is contained in:
@@ -7,6 +7,7 @@ Docs: https://docs.openclaw.ai
|
||||
### Changes
|
||||
|
||||
- TUI/dependencies: remove direct `cli-highlight` usage from the OpenClaw TUI code-block renderer, keeping themed code coloring without the extra root dependency. Thanks @vincentkoc.
|
||||
- Diagnostics/OTEL: export run, model-call, and tool-execution diagnostic lifecycle events as OTEL spans without retaining live span state. Thanks @vincentkoc.
|
||||
- Plugins/activation: expose activation plan reasons and a richer plan API so callers can inspect why a plugin was selected while preserving existing id-list activation behavior. (#70943) Thanks @vincentkoc.
|
||||
- Plugins/source metadata: expose normalized install-source facts on provider and channel catalogs so onboarding can explain npm pinning, integrity state, and local availability before runtime loads. (#70951) Thanks @vincentkoc.
|
||||
- Plugins/catalog: pin the official external WeCom channel source to an exact npm release plus dist integrity, with a guard that official external sources stay integrity-pinned. (#70997) Thanks @vincentkoc.
|
||||
|
||||
@@ -5,11 +5,20 @@ const registerLogTransportMock = vi.hoisted(() => vi.fn());
|
||||
const telemetryState = vi.hoisted(() => {
|
||||
const counters = new Map<string, { add: ReturnType<typeof vi.fn> }>();
|
||||
const histograms = new Map<string, { record: ReturnType<typeof vi.fn> }>();
|
||||
const spans: Array<{
|
||||
name: string;
|
||||
end: ReturnType<typeof vi.fn>;
|
||||
setStatus: ReturnType<typeof vi.fn>;
|
||||
}> = [];
|
||||
const tracer = {
|
||||
startSpan: vi.fn((_name: string, _opts?: unknown, _ctx?: unknown) => ({
|
||||
end: vi.fn(),
|
||||
setStatus: vi.fn(),
|
||||
})),
|
||||
startSpan: vi.fn((name: string, _opts?: unknown, _ctx?: unknown) => {
|
||||
const span = {
|
||||
end: vi.fn(),
|
||||
setStatus: vi.fn(),
|
||||
};
|
||||
spans.push({ name, ...span });
|
||||
return span;
|
||||
}),
|
||||
setSpanContext: vi.fn((_ctx: unknown, spanContext: unknown) => ({ spanContext })),
|
||||
};
|
||||
const meter = {
|
||||
@@ -24,7 +33,7 @@ const telemetryState = vi.hoisted(() => {
|
||||
return histogram;
|
||||
}),
|
||||
};
|
||||
return { counters, histograms, tracer, meter };
|
||||
return { counters, histograms, spans, tracer, meter };
|
||||
});
|
||||
|
||||
const sdkStart = vi.hoisted(() => vi.fn().mockResolvedValue(undefined));
|
||||
@@ -121,6 +130,8 @@ const OTEL_TEST_ENDPOINT = "http://otel-collector:4318";
|
||||
const OTEL_TEST_PROTOCOL = "http/protobuf";
|
||||
const TRACE_ID = "4bf92f3577b34da6a3ce929d0e0e4736";
|
||||
const SPAN_ID = "00f067aa0ba902b7";
|
||||
const CHILD_SPAN_ID = "1111111111111111";
|
||||
const GRANDCHILD_SPAN_ID = "2222222222222222";
|
||||
|
||||
function createLogger() {
|
||||
return {
|
||||
@@ -189,10 +200,15 @@ async function emitAndCaptureLog(logObj: Record<string, unknown>) {
|
||||
return emitCall;
|
||||
}
|
||||
|
||||
function flushDiagnosticEvents() {
|
||||
return new Promise<void>((resolve) => setImmediate(resolve));
|
||||
}
|
||||
|
||||
describe("diagnostics-otel service", () => {
|
||||
beforeEach(() => {
|
||||
telemetryState.counters.clear();
|
||||
telemetryState.histograms.clear();
|
||||
telemetryState.spans.length = 0;
|
||||
telemetryState.tracer.startSpan.mockClear();
|
||||
telemetryState.tracer.setSpanContext.mockClear();
|
||||
telemetryState.meter.createCounter.mockClear();
|
||||
@@ -432,10 +448,14 @@ describe("diagnostics-otel service", () => {
|
||||
});
|
||||
|
||||
expect(emitCall?.attributes).toMatchObject({
|
||||
"openclaw.traceId": TRACE_ID,
|
||||
"openclaw.spanId": SPAN_ID,
|
||||
"openclaw.traceFlags": "01",
|
||||
});
|
||||
expect(emitCall?.attributes).toEqual(
|
||||
expect.not.objectContaining({
|
||||
"openclaw.traceId": expect.anything(),
|
||||
"openclaw.spanId": expect.anything(),
|
||||
}),
|
||||
);
|
||||
expect(telemetryState.tracer.setSpanContext).toHaveBeenCalledWith(
|
||||
expect.anything(),
|
||||
expect.objectContaining({
|
||||
@@ -453,7 +473,7 @@ describe("diagnostics-otel service", () => {
|
||||
});
|
||||
});
|
||||
|
||||
test("parents diagnostic event spans from trace context", async () => {
|
||||
test("does not parent diagnostic event spans from plugin-emittable trace context", async () => {
|
||||
const service = createDiagnosticsOtelService();
|
||||
const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { traces: true, metrics: true });
|
||||
await service.start(ctx);
|
||||
@@ -474,14 +494,167 @@ describe("diagnostics-otel service", () => {
|
||||
const modelUsageCall = telemetryState.tracer.startSpan.mock.calls.find(
|
||||
(call) => call[0] === "openclaw.model.usage",
|
||||
);
|
||||
expect(modelUsageCall?.[2]).toEqual({
|
||||
spanContext: expect.objectContaining({
|
||||
expect(telemetryState.tracer.setSpanContext).not.toHaveBeenCalled();
|
||||
expect(modelUsageCall?.[2]).toBeUndefined();
|
||||
await service.stop?.(ctx);
|
||||
});
|
||||
|
||||
test("exports run, model call, and tool execution lifecycle spans", async () => {
|
||||
const service = createDiagnosticsOtelService();
|
||||
const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { traces: true, metrics: true });
|
||||
await service.start(ctx);
|
||||
|
||||
emitDiagnosticEvent({
|
||||
type: "run.completed",
|
||||
runId: "run-1",
|
||||
sessionKey: "session-key",
|
||||
provider: "openai",
|
||||
model: "gpt-5.4",
|
||||
channel: "webchat",
|
||||
outcome: "completed",
|
||||
durationMs: 100,
|
||||
trace: {
|
||||
traceId: TRACE_ID,
|
||||
spanId: SPAN_ID,
|
||||
traceFlags: 1,
|
||||
isRemote: true,
|
||||
}),
|
||||
traceFlags: "01",
|
||||
},
|
||||
});
|
||||
emitDiagnosticEvent({
|
||||
type: "model.call.completed",
|
||||
runId: "run-1",
|
||||
callId: "call-1",
|
||||
provider: "openai",
|
||||
model: "gpt-5.4",
|
||||
api: "completions",
|
||||
transport: "http",
|
||||
durationMs: 80,
|
||||
trace: {
|
||||
traceId: TRACE_ID,
|
||||
spanId: CHILD_SPAN_ID,
|
||||
parentSpanId: SPAN_ID,
|
||||
traceFlags: "01",
|
||||
},
|
||||
});
|
||||
emitDiagnosticEvent({
|
||||
type: "tool.execution.error",
|
||||
runId: "run-1",
|
||||
toolName: "read",
|
||||
toolCallId: "tool-1",
|
||||
paramsSummary: { kind: "object" },
|
||||
durationMs: 20,
|
||||
errorCategory: "TypeError",
|
||||
errorCode: "429",
|
||||
trace: {
|
||||
traceId: TRACE_ID,
|
||||
spanId: GRANDCHILD_SPAN_ID,
|
||||
parentSpanId: CHILD_SPAN_ID,
|
||||
traceFlags: "01",
|
||||
},
|
||||
});
|
||||
await flushDiagnosticEvents();
|
||||
|
||||
const spanNames = telemetryState.tracer.startSpan.mock.calls.map((call) => call[0]);
|
||||
expect(spanNames).toEqual(
|
||||
expect.arrayContaining(["openclaw.run", "openclaw.model.call", "openclaw.tool.execution"]),
|
||||
);
|
||||
|
||||
const runCall = telemetryState.tracer.startSpan.mock.calls.find(
|
||||
(call) => call[0] === "openclaw.run",
|
||||
);
|
||||
expect(runCall?.[1]).toMatchObject({
|
||||
attributes: {
|
||||
"openclaw.outcome": "completed",
|
||||
"openclaw.provider": "openai",
|
||||
"openclaw.model": "gpt-5.4",
|
||||
"openclaw.channel": "webchat",
|
||||
},
|
||||
startTime: expect.any(Number),
|
||||
});
|
||||
expect(runCall?.[1]).toEqual({
|
||||
attributes: expect.not.objectContaining({
|
||||
"gen_ai.system": expect.anything(),
|
||||
"gen_ai.request.model": expect.anything(),
|
||||
"openclaw.runId": expect.anything(),
|
||||
"openclaw.sessionKey": expect.anything(),
|
||||
"openclaw.traceId": expect.anything(),
|
||||
}),
|
||||
startTime: expect.any(Number),
|
||||
});
|
||||
|
||||
const modelCall = telemetryState.tracer.startSpan.mock.calls.find(
|
||||
(call) => call[0] === "openclaw.model.call",
|
||||
);
|
||||
expect(modelCall?.[1]).toMatchObject({
|
||||
attributes: {
|
||||
"gen_ai.system": "openai",
|
||||
"gen_ai.request.model": "gpt-5.4",
|
||||
"gen_ai.operation.name": "text_completion",
|
||||
},
|
||||
});
|
||||
expect(modelCall?.[1]).toEqual({
|
||||
attributes: expect.not.objectContaining({
|
||||
"openclaw.callId": expect.anything(),
|
||||
"openclaw.runId": expect.anything(),
|
||||
"openclaw.sessionKey": expect.anything(),
|
||||
}),
|
||||
startTime: expect.any(Number),
|
||||
});
|
||||
expect(modelCall?.[2]).toBeUndefined();
|
||||
|
||||
const toolCall = telemetryState.tracer.startSpan.mock.calls.find(
|
||||
(call) => call[0] === "openclaw.tool.execution",
|
||||
);
|
||||
expect(toolCall?.[1]).toMatchObject({
|
||||
attributes: {
|
||||
"openclaw.toolName": "read",
|
||||
"openclaw.errorCategory": "TypeError",
|
||||
"openclaw.errorCode": "429",
|
||||
"openclaw.tool.params.kind": "object",
|
||||
"gen_ai.tool.name": "read",
|
||||
},
|
||||
});
|
||||
expect(toolCall?.[1]).toEqual({
|
||||
attributes: expect.not.objectContaining({
|
||||
"openclaw.toolCallId": expect.anything(),
|
||||
"openclaw.runId": expect.anything(),
|
||||
"openclaw.sessionKey": expect.anything(),
|
||||
}),
|
||||
startTime: expect.any(Number),
|
||||
});
|
||||
expect(toolCall?.[2]).toBeUndefined();
|
||||
|
||||
expect(
|
||||
telemetryState.histograms.get("openclaw.model_call.duration_ms")?.record,
|
||||
).toHaveBeenCalledWith(
|
||||
80,
|
||||
expect.objectContaining({
|
||||
"openclaw.provider": "openai",
|
||||
"openclaw.model": "gpt-5.4",
|
||||
}),
|
||||
);
|
||||
expect(telemetryState.histograms.get("openclaw.run.duration_ms")?.record).toHaveBeenCalledWith(
|
||||
100,
|
||||
expect.not.objectContaining({
|
||||
"openclaw.runId": expect.anything(),
|
||||
}),
|
||||
);
|
||||
expect(
|
||||
telemetryState.histograms.get("openclaw.tool.execution.duration_ms")?.record,
|
||||
).toHaveBeenCalledWith(
|
||||
20,
|
||||
expect.not.objectContaining({
|
||||
"openclaw.errorCode": expect.anything(),
|
||||
"openclaw.runId": expect.anything(),
|
||||
}),
|
||||
);
|
||||
|
||||
const toolSpan = telemetryState.spans.find((span) => span.name === "openclaw.tool.execution");
|
||||
expect(toolSpan?.setStatus).toHaveBeenCalledWith({
|
||||
code: 2,
|
||||
message: "TypeError",
|
||||
});
|
||||
expect(toolSpan?.end).toHaveBeenCalledWith(expect.any(Number));
|
||||
expect(telemetryState.tracer.setSpanContext).not.toHaveBeenCalled();
|
||||
await service.stop?.(ctx);
|
||||
});
|
||||
|
||||
|
||||
@@ -30,6 +30,17 @@ import {
|
||||
} from "../api.js";
|
||||
|
||||
const DEFAULT_SERVICE_NAME = "openclaw";
|
||||
const DROPPED_OTEL_ATTRIBUTE_KEYS = new Set([
|
||||
"openclaw.callId",
|
||||
"openclaw.parentSpanId",
|
||||
"openclaw.runId",
|
||||
"openclaw.sessionId",
|
||||
"openclaw.sessionKey",
|
||||
"openclaw.spanId",
|
||||
"openclaw.toolCallId",
|
||||
"openclaw.traceId",
|
||||
]);
|
||||
const LOW_CARDINALITY_VALUE_RE = /^[A-Za-z0-9_.:-]{1,120}$/u;
|
||||
|
||||
function normalizeEndpoint(endpoint?: string): string | undefined {
|
||||
const trimmed = endpoint?.trim();
|
||||
@@ -74,11 +85,26 @@ function formatError(err: unknown): string {
|
||||
function redactOtelAttributes(attributes: Record<string, string | number | boolean>) {
|
||||
const redactedAttributes: Record<string, string | number | boolean> = {};
|
||||
for (const [key, value] of Object.entries(attributes)) {
|
||||
if (DROPPED_OTEL_ATTRIBUTE_KEYS.has(key)) {
|
||||
continue;
|
||||
}
|
||||
redactedAttributes[key] = typeof value === "string" ? redactSensitiveText(value) : value;
|
||||
}
|
||||
return redactedAttributes;
|
||||
}
|
||||
|
||||
function lowCardinalityAttr(value: string | undefined, fallback = "unknown"): string {
|
||||
if (!value) {
|
||||
return fallback;
|
||||
}
|
||||
const redacted = redactSensitiveText(value.trim());
|
||||
return LOW_CARDINALITY_VALUE_RE.test(redacted) ? redacted : fallback;
|
||||
}
|
||||
|
||||
function genAiOperationName(api: string | undefined): "chat" | "text_completion" {
|
||||
return api === "completions" ? "text_completion" : "chat";
|
||||
}
|
||||
|
||||
function normalizeTraceContext(value: unknown): DiagnosticTraceContext | undefined {
|
||||
if (!value || typeof value !== "object" || Array.isArray(value)) {
|
||||
return undefined;
|
||||
@@ -364,6 +390,17 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
|
||||
unit: "1",
|
||||
description: "Run attempts",
|
||||
});
|
||||
const modelCallDurationHistogram = meter.createHistogram("openclaw.model_call.duration_ms", {
|
||||
unit: "ms",
|
||||
description: "Model call duration",
|
||||
});
|
||||
const toolExecutionDurationHistogram = meter.createHistogram(
|
||||
"openclaw.tool.execution.duration_ms",
|
||||
{
|
||||
unit: "ms",
|
||||
description: "Tool execution duration",
|
||||
},
|
||||
);
|
||||
|
||||
if (logsEnabled) {
|
||||
const logExporter = new OTLPLogExporter({
|
||||
@@ -499,24 +536,70 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
|
||||
|
||||
const spanWithDuration = (
|
||||
name: string,
|
||||
attributes: Record<string, string | number>,
|
||||
attributes: Record<string, string | number | boolean>,
|
||||
durationMs?: number,
|
||||
traceContext?: DiagnosticTraceContext,
|
||||
options: {
|
||||
parentContext?: ReturnType<typeof contextForTraceContext> | null;
|
||||
endTimeMs?: number;
|
||||
} = {},
|
||||
) => {
|
||||
const endTimeMs = options.endTimeMs ?? Date.now();
|
||||
const startTime =
|
||||
typeof durationMs === "number" ? Date.now() - Math.max(0, durationMs) : undefined;
|
||||
const parentContext = contextForTraceContext(traceContext);
|
||||
typeof durationMs === "number" ? endTimeMs - Math.max(0, durationMs) : undefined;
|
||||
const parentContext =
|
||||
"parentContext" in options ? (options.parentContext ?? undefined) : undefined;
|
||||
const span = tracer.startSpan(
|
||||
name,
|
||||
{
|
||||
attributes,
|
||||
...(startTime ? { startTime } : {}),
|
||||
attributes: redactOtelAttributes(attributes),
|
||||
...(startTime !== undefined ? { startTime } : {}),
|
||||
},
|
||||
parentContext,
|
||||
);
|
||||
return span;
|
||||
};
|
||||
|
||||
const addRunAttrs = (
|
||||
spanAttrs: Record<string, string | number | boolean>,
|
||||
evt: {
|
||||
runId?: string;
|
||||
sessionKey?: string;
|
||||
sessionId?: string;
|
||||
provider?: string;
|
||||
model?: string;
|
||||
channel?: string;
|
||||
trigger?: string;
|
||||
},
|
||||
) => {
|
||||
if (evt.provider) {
|
||||
spanAttrs["openclaw.provider"] = evt.provider;
|
||||
}
|
||||
if (evt.model) {
|
||||
spanAttrs["openclaw.model"] = evt.model;
|
||||
}
|
||||
if (evt.channel) {
|
||||
spanAttrs["openclaw.channel"] = evt.channel;
|
||||
}
|
||||
if (evt.trigger) {
|
||||
spanAttrs["openclaw.trigger"] = evt.trigger;
|
||||
}
|
||||
};
|
||||
|
||||
const paramsSummaryAttrs = (
|
||||
summary: Extract<
|
||||
DiagnosticEventPayload,
|
||||
{ type: "tool.execution.started" }
|
||||
>["paramsSummary"],
|
||||
): Record<string, string | number> => {
|
||||
if (!summary) {
|
||||
return {};
|
||||
}
|
||||
return {
|
||||
"openclaw.tool.params.kind": summary.kind,
|
||||
...("length" in summary ? { "openclaw.tool.params.length": summary.length } : {}),
|
||||
};
|
||||
};
|
||||
|
||||
const recordModelUsage = (evt: Extract<DiagnosticEventPayload, { type: "model.usage" }>) => {
|
||||
const attrs = {
|
||||
"openclaw.channel": evt.channel ?? "unknown",
|
||||
@@ -568,8 +651,6 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
|
||||
}
|
||||
const spanAttrs: Record<string, string | number> = {
|
||||
...attrs,
|
||||
"openclaw.sessionKey": evt.sessionKey ?? "",
|
||||
"openclaw.sessionId": evt.sessionId ?? "",
|
||||
"openclaw.tokens.input": usage.input ?? 0,
|
||||
"openclaw.tokens.output": usage.output ?? 0,
|
||||
"openclaw.tokens.cache_read": usage.cacheRead ?? 0,
|
||||
@@ -577,7 +658,7 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
|
||||
"openclaw.tokens.total": usage.total ?? 0,
|
||||
};
|
||||
|
||||
const span = spanWithDuration("openclaw.model.usage", spanAttrs, evt.durationMs, evt.trace);
|
||||
const span = spanWithDuration("openclaw.model.usage", spanAttrs, evt.durationMs);
|
||||
span.end();
|
||||
};
|
||||
|
||||
@@ -608,12 +689,7 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
|
||||
if (evt.chatId !== undefined) {
|
||||
spanAttrs["openclaw.chatId"] = String(evt.chatId);
|
||||
}
|
||||
const span = spanWithDuration(
|
||||
"openclaw.webhook.processed",
|
||||
spanAttrs,
|
||||
evt.durationMs,
|
||||
evt.trace,
|
||||
);
|
||||
const span = spanWithDuration("openclaw.webhook.processed", spanAttrs, evt.durationMs);
|
||||
span.end();
|
||||
};
|
||||
|
||||
@@ -636,13 +712,9 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
|
||||
if (evt.chatId !== undefined) {
|
||||
spanAttrs["openclaw.chatId"] = String(evt.chatId);
|
||||
}
|
||||
const span = tracer.startSpan(
|
||||
"openclaw.webhook.error",
|
||||
{
|
||||
attributes: spanAttrs,
|
||||
},
|
||||
contextForTraceContext(evt.trace),
|
||||
);
|
||||
const span = tracer.startSpan("openclaw.webhook.error", {
|
||||
attributes: spanAttrs,
|
||||
});
|
||||
span.setStatus({ code: SpanStatusCode.ERROR, message: redactedError });
|
||||
span.end();
|
||||
};
|
||||
@@ -660,18 +732,6 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
|
||||
}
|
||||
};
|
||||
|
||||
const addSessionIdentityAttrs = (
|
||||
spanAttrs: Record<string, string | number>,
|
||||
evt: { sessionKey?: string; sessionId?: string },
|
||||
) => {
|
||||
if (evt.sessionKey) {
|
||||
spanAttrs["openclaw.sessionKey"] = evt.sessionKey;
|
||||
}
|
||||
if (evt.sessionId) {
|
||||
spanAttrs["openclaw.sessionId"] = evt.sessionId;
|
||||
}
|
||||
};
|
||||
|
||||
const recordMessageProcessed = (
|
||||
evt: Extract<DiagnosticEventPayload, { type: "message.processed" }>,
|
||||
) => {
|
||||
@@ -687,7 +747,6 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
|
||||
return;
|
||||
}
|
||||
const spanAttrs: Record<string, string | number> = { ...attrs };
|
||||
addSessionIdentityAttrs(spanAttrs, evt);
|
||||
if (evt.chatId !== undefined) {
|
||||
spanAttrs["openclaw.chatId"] = String(evt.chatId);
|
||||
}
|
||||
@@ -697,12 +756,7 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
|
||||
if (evt.reason) {
|
||||
spanAttrs["openclaw.reason"] = redactSensitiveText(evt.reason);
|
||||
}
|
||||
const span = spanWithDuration(
|
||||
"openclaw.message.processed",
|
||||
spanAttrs,
|
||||
evt.durationMs,
|
||||
evt.trace,
|
||||
);
|
||||
const span = spanWithDuration("openclaw.message.processed", spanAttrs, evt.durationMs);
|
||||
if (evt.outcome === "error" && evt.error) {
|
||||
span.setStatus({ code: SpanStatusCode.ERROR, message: redactSensitiveText(evt.error) });
|
||||
}
|
||||
@@ -750,14 +804,9 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
|
||||
return;
|
||||
}
|
||||
const spanAttrs: Record<string, string | number> = { ...attrs };
|
||||
addSessionIdentityAttrs(spanAttrs, evt);
|
||||
spanAttrs["openclaw.queueDepth"] = evt.queueDepth ?? 0;
|
||||
spanAttrs["openclaw.ageMs"] = evt.ageMs;
|
||||
const span = tracer.startSpan(
|
||||
"openclaw.session.stuck",
|
||||
{ attributes: spanAttrs },
|
||||
contextForTraceContext(evt.trace),
|
||||
);
|
||||
const span = tracer.startSpan("openclaw.session.stuck", { attributes: spanAttrs });
|
||||
span.setStatus({ code: SpanStatusCode.ERROR, message: "session stuck" });
|
||||
span.end();
|
||||
};
|
||||
@@ -766,6 +815,164 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
|
||||
runAttemptCounter.add(1, { "openclaw.attempt": evt.attempt });
|
||||
};
|
||||
|
||||
const recordRunCompleted = (
|
||||
evt: Extract<DiagnosticEventPayload, { type: "run.completed" }>,
|
||||
) => {
|
||||
const attrs: Record<string, string | number> = {
|
||||
"openclaw.outcome": evt.outcome,
|
||||
"openclaw.provider": evt.provider ?? "unknown",
|
||||
"openclaw.model": evt.model ?? "unknown",
|
||||
};
|
||||
if (evt.channel) {
|
||||
attrs["openclaw.channel"] = evt.channel;
|
||||
}
|
||||
durationHistogram.record(evt.durationMs, attrs);
|
||||
if (!tracesEnabled) {
|
||||
return;
|
||||
}
|
||||
const spanAttrs: Record<string, string | number | boolean> = {
|
||||
"openclaw.outcome": evt.outcome,
|
||||
};
|
||||
addRunAttrs(spanAttrs, evt);
|
||||
if (evt.errorCategory) {
|
||||
spanAttrs["openclaw.errorCategory"] = lowCardinalityAttr(evt.errorCategory, "other");
|
||||
}
|
||||
const span = spanWithDuration("openclaw.run", spanAttrs, evt.durationMs, {
|
||||
endTimeMs: evt.ts,
|
||||
});
|
||||
if (evt.outcome === "error") {
|
||||
span.setStatus({
|
||||
code: SpanStatusCode.ERROR,
|
||||
...(evt.errorCategory ? { message: redactSensitiveText(evt.errorCategory) } : {}),
|
||||
});
|
||||
}
|
||||
span.end(evt.ts);
|
||||
};
|
||||
|
||||
const modelCallMetricAttrs = (
|
||||
evt: Extract<DiagnosticEventPayload, { type: "model.call.completed" | "model.call.error" }>,
|
||||
) => ({
|
||||
"openclaw.provider": evt.provider,
|
||||
"openclaw.model": evt.model,
|
||||
"openclaw.api": lowCardinalityAttr(evt.api),
|
||||
"openclaw.transport": lowCardinalityAttr(evt.transport),
|
||||
});
|
||||
|
||||
const recordModelCallCompleted = (
|
||||
evt: Extract<DiagnosticEventPayload, { type: "model.call.completed" }>,
|
||||
) => {
|
||||
modelCallDurationHistogram.record(evt.durationMs, modelCallMetricAttrs(evt));
|
||||
if (!tracesEnabled) {
|
||||
return;
|
||||
}
|
||||
const spanAttrs: Record<string, string | number | boolean> = {
|
||||
"openclaw.provider": evt.provider,
|
||||
"openclaw.model": evt.model,
|
||||
"gen_ai.system": evt.provider,
|
||||
"gen_ai.request.model": evt.model,
|
||||
"gen_ai.operation.name": genAiOperationName(evt.api),
|
||||
};
|
||||
if (evt.api) {
|
||||
spanAttrs["openclaw.api"] = evt.api;
|
||||
}
|
||||
if (evt.transport) {
|
||||
spanAttrs["openclaw.transport"] = evt.transport;
|
||||
}
|
||||
const span = spanWithDuration("openclaw.model.call", spanAttrs, evt.durationMs, {
|
||||
endTimeMs: evt.ts,
|
||||
});
|
||||
span.end(evt.ts);
|
||||
};
|
||||
|
||||
const recordModelCallError = (
|
||||
evt: Extract<DiagnosticEventPayload, { type: "model.call.error" }>,
|
||||
) => {
|
||||
modelCallDurationHistogram.record(evt.durationMs, {
|
||||
...modelCallMetricAttrs(evt),
|
||||
"openclaw.errorCategory": lowCardinalityAttr(evt.errorCategory, "other"),
|
||||
});
|
||||
if (!tracesEnabled) {
|
||||
return;
|
||||
}
|
||||
const spanAttrs: Record<string, string | number | boolean> = {
|
||||
"openclaw.provider": evt.provider,
|
||||
"openclaw.model": evt.model,
|
||||
"openclaw.errorCategory": lowCardinalityAttr(evt.errorCategory, "other"),
|
||||
"gen_ai.system": evt.provider,
|
||||
"gen_ai.request.model": evt.model,
|
||||
"gen_ai.operation.name": genAiOperationName(evt.api),
|
||||
};
|
||||
if (evt.api) {
|
||||
spanAttrs["openclaw.api"] = evt.api;
|
||||
}
|
||||
if (evt.transport) {
|
||||
spanAttrs["openclaw.transport"] = evt.transport;
|
||||
}
|
||||
const span = spanWithDuration("openclaw.model.call", spanAttrs, evt.durationMs, {
|
||||
endTimeMs: evt.ts,
|
||||
});
|
||||
span.setStatus({
|
||||
code: SpanStatusCode.ERROR,
|
||||
message: redactSensitiveText(evt.errorCategory),
|
||||
});
|
||||
span.end(evt.ts);
|
||||
};
|
||||
|
||||
const recordToolExecutionCompleted = (
|
||||
evt: Extract<DiagnosticEventPayload, { type: "tool.execution.completed" }>,
|
||||
) => {
|
||||
const attrs = {
|
||||
"openclaw.toolName": evt.toolName,
|
||||
...paramsSummaryAttrs(evt.paramsSummary),
|
||||
};
|
||||
toolExecutionDurationHistogram.record(evt.durationMs, attrs);
|
||||
if (!tracesEnabled) {
|
||||
return;
|
||||
}
|
||||
const spanAttrs: Record<string, string | number | boolean> = {
|
||||
"openclaw.toolName": evt.toolName,
|
||||
"gen_ai.tool.name": evt.toolName,
|
||||
...paramsSummaryAttrs(evt.paramsSummary),
|
||||
};
|
||||
addRunAttrs(spanAttrs, evt);
|
||||
const span = spanWithDuration("openclaw.tool.execution", spanAttrs, evt.durationMs, {
|
||||
endTimeMs: evt.ts,
|
||||
});
|
||||
span.end(evt.ts);
|
||||
};
|
||||
|
||||
const recordToolExecutionError = (
|
||||
evt: Extract<DiagnosticEventPayload, { type: "tool.execution.error" }>,
|
||||
) => {
|
||||
const attrs = {
|
||||
"openclaw.toolName": evt.toolName,
|
||||
"openclaw.errorCategory": lowCardinalityAttr(evt.errorCategory, "other"),
|
||||
...paramsSummaryAttrs(evt.paramsSummary),
|
||||
};
|
||||
toolExecutionDurationHistogram.record(evt.durationMs, attrs);
|
||||
if (!tracesEnabled) {
|
||||
return;
|
||||
}
|
||||
const spanAttrs: Record<string, string | number | boolean> = {
|
||||
"openclaw.toolName": evt.toolName,
|
||||
"openclaw.errorCategory": lowCardinalityAttr(evt.errorCategory, "other"),
|
||||
"gen_ai.tool.name": evt.toolName,
|
||||
...paramsSummaryAttrs(evt.paramsSummary),
|
||||
};
|
||||
addRunAttrs(spanAttrs, evt);
|
||||
if (evt.errorCode) {
|
||||
spanAttrs["openclaw.errorCode"] = lowCardinalityAttr(evt.errorCode, "other");
|
||||
}
|
||||
const span = spanWithDuration("openclaw.tool.execution", spanAttrs, evt.durationMs, {
|
||||
endTimeMs: evt.ts,
|
||||
});
|
||||
span.setStatus({
|
||||
code: SpanStatusCode.ERROR,
|
||||
message: redactSensitiveText(evt.errorCategory),
|
||||
});
|
||||
span.end(evt.ts);
|
||||
};
|
||||
|
||||
const recordHeartbeat = (
|
||||
evt: Extract<DiagnosticEventPayload, { type: "diagnostic.heartbeat" }>,
|
||||
) => {
|
||||
@@ -811,15 +1018,25 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
|
||||
case "diagnostic.heartbeat":
|
||||
recordHeartbeat(evt);
|
||||
return;
|
||||
case "run.completed":
|
||||
recordRunCompleted(evt);
|
||||
return;
|
||||
case "model.call.completed":
|
||||
recordModelCallCompleted(evt);
|
||||
return;
|
||||
case "model.call.error":
|
||||
recordModelCallError(evt);
|
||||
return;
|
||||
case "tool.execution.completed":
|
||||
recordToolExecutionCompleted(evt);
|
||||
return;
|
||||
case "tool.execution.error":
|
||||
recordToolExecutionError(evt);
|
||||
return;
|
||||
case "tool.loop":
|
||||
case "tool.execution.started":
|
||||
case "tool.execution.completed":
|
||||
case "tool.execution.error":
|
||||
case "run.started":
|
||||
case "run.completed":
|
||||
case "model.call.started":
|
||||
case "model.call.completed":
|
||||
case "model.call.error":
|
||||
case "diagnostic.memory.sample":
|
||||
case "diagnostic.memory.pressure":
|
||||
case "payload.large":
|
||||
|
||||
Reference in New Issue
Block a user