mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 08:10:44 +00:00
feat(diagnostics-otel): add genai operation duration metric
This commit is contained in:
@@ -32,6 +32,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Plugins/install: move managed plugin install metadata from `plugins.installs`
|
||||
to the state-managed `plugins/installs.json` ledger, with legacy config reads
|
||||
kept as a deprecated compatibility fallback. Thanks @vincentkoc.
|
||||
- Diagnostics/OTEL: add the GenAI `gen_ai.client.operation.duration` histogram for model-call latency in seconds with bounded provider/model/API and error attributes. Thanks @vincentkoc.
|
||||
- Diagnostics/OTEL: add bounded outbound message delivery lifecycle diagnostics and export them as low-cardinality delivery spans/metrics without message body, recipient, room, or media-path data. (#71471) Thanks @vincentkoc and @jlapenna.
|
||||
- Diagnostics/OTEL: emit bounded exec-process diagnostics and export them as `openclaw.exec` spans without exposing command text, working directories, or container identifiers. (#71451) Thanks @vincentkoc and @jlapenna.
|
||||
- Diagnostics/OTEL: support `OPENCLAW_OTEL_PRELOADED=1` so the plugin can reuse an already-registered OpenTelemetry SDK while keeping OpenClaw diagnostic listeners wired. (#71450) Thanks @vincentkoc and @jlapenna.
|
||||
|
||||
@@ -740,6 +740,63 @@ describe("diagnostics-otel service", () => {
|
||||
await service.stop?.(ctx);
|
||||
});
|
||||
|
||||
test("exports GenAI client operation duration histogram without diagnostic identifiers", async () => {
|
||||
const service = createDiagnosticsOtelService();
|
||||
const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { metrics: true });
|
||||
await service.start(ctx);
|
||||
|
||||
emitDiagnosticEvent({
|
||||
type: "model.call.completed",
|
||||
runId: "run-1",
|
||||
callId: "call-1",
|
||||
sessionKey: "session-key",
|
||||
provider: "openai",
|
||||
model: "gpt-5.4",
|
||||
api: "openai-completions",
|
||||
durationMs: 250,
|
||||
});
|
||||
emitDiagnosticEvent({
|
||||
type: "model.call.error",
|
||||
runId: "run-1",
|
||||
callId: "call-2",
|
||||
sessionKey: "session-key",
|
||||
provider: "google",
|
||||
model: "gemini-2.5-flash",
|
||||
api: "google-generative-ai",
|
||||
durationMs: 1250,
|
||||
errorCategory: "TimeoutError",
|
||||
});
|
||||
await flushDiagnosticEvents();
|
||||
|
||||
expect(telemetryState.meter.createHistogram).toHaveBeenCalledWith(
|
||||
"gen_ai.client.operation.duration",
|
||||
expect.objectContaining({
|
||||
unit: "s",
|
||||
advice: {
|
||||
explicitBucketBoundaries: expect.arrayContaining([0.01, 0.32, 2.56, 81.92]),
|
||||
},
|
||||
}),
|
||||
);
|
||||
const genAiOperationDuration = telemetryState.histograms.get(
|
||||
"gen_ai.client.operation.duration",
|
||||
);
|
||||
expect(genAiOperationDuration?.record).toHaveBeenCalledTimes(2);
|
||||
expect(genAiOperationDuration?.record).toHaveBeenCalledWith(0.25, {
|
||||
"gen_ai.operation.name": "text_completion",
|
||||
"gen_ai.provider.name": "openai",
|
||||
"gen_ai.request.model": "gpt-5.4",
|
||||
});
|
||||
expect(genAiOperationDuration?.record).toHaveBeenCalledWith(1.25, {
|
||||
"gen_ai.operation.name": "generate_content",
|
||||
"gen_ai.provider.name": "google",
|
||||
"gen_ai.request.model": "gemini-2.5-flash",
|
||||
"error.type": "TimeoutError",
|
||||
});
|
||||
expect(JSON.stringify(genAiOperationDuration?.record.mock.calls)).not.toContain("session-key");
|
||||
expect(JSON.stringify(genAiOperationDuration?.record.mock.calls)).not.toContain("run-1");
|
||||
await service.stop?.(ctx);
|
||||
});
|
||||
|
||||
test("exports run, model call, and tool execution lifecycle spans", async () => {
|
||||
const service = createDiagnosticsOtelService();
|
||||
const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { traces: true, metrics: true });
|
||||
|
||||
@@ -55,6 +55,9 @@ const GEN_AI_LATEST_EXPERIMENTAL_OPT_IN = "gen_ai_latest_experimental";
|
||||
const GEN_AI_TOKEN_USAGE_BUCKETS = [
|
||||
1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864,
|
||||
];
|
||||
const GEN_AI_OPERATION_DURATION_BUCKETS = [
|
||||
0.01, 0.02, 0.04, 0.08, 0.16, 0.32, 0.64, 1.28, 2.56, 5.12, 10.24, 20.48, 40.96, 81.92,
|
||||
];
|
||||
|
||||
type OtelContentCapturePolicy = {
|
||||
inputMessages: boolean;
|
||||
@@ -585,6 +588,16 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
|
||||
explicitBucketBoundaries: GEN_AI_TOKEN_USAGE_BUCKETS,
|
||||
},
|
||||
});
|
||||
const genAiOperationDurationHistogram = meter.createHistogram(
|
||||
"gen_ai.client.operation.duration",
|
||||
{
|
||||
unit: "s",
|
||||
description: "GenAI client operation duration",
|
||||
advice: {
|
||||
explicitBucketBoundaries: GEN_AI_OPERATION_DURATION_BUCKETS,
|
||||
},
|
||||
},
|
||||
);
|
||||
const costCounter = meter.createCounter("openclaw.cost.usd", {
|
||||
unit: "1",
|
||||
description: "Estimated model cost (USD)",
|
||||
@@ -1307,12 +1320,25 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
|
||||
"openclaw.api": lowCardinalityAttr(evt.api),
|
||||
"openclaw.transport": lowCardinalityAttr(evt.transport),
|
||||
});
|
||||
const genAiModelCallMetricAttrs = (
|
||||
evt: ModelCallLifecycleDiagnosticEvent,
|
||||
errorType?: string,
|
||||
) => ({
|
||||
"gen_ai.operation.name": genAiOperationName(evt.api),
|
||||
"gen_ai.provider.name": lowCardinalityAttr(evt.provider),
|
||||
"gen_ai.request.model": lowCardinalityAttr(evt.model),
|
||||
...(errorType ? { "error.type": errorType } : {}),
|
||||
});
|
||||
|
||||
const recordModelCallCompleted = (
|
||||
evt: Extract<DiagnosticEventPayload, { type: "model.call.completed" }>,
|
||||
metadata: DiagnosticEventMetadata,
|
||||
) => {
|
||||
modelCallDurationHistogram.record(evt.durationMs, modelCallMetricAttrs(evt));
|
||||
genAiOperationDurationHistogram.record(
|
||||
evt.durationMs / 1000,
|
||||
genAiModelCallMetricAttrs(evt),
|
||||
);
|
||||
if (!tracesEnabled) {
|
||||
return;
|
||||
}
|
||||
@@ -1344,18 +1370,23 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
|
||||
evt: Extract<DiagnosticEventPayload, { type: "model.call.error" }>,
|
||||
metadata: DiagnosticEventMetadata,
|
||||
) => {
|
||||
const errorType = lowCardinalityAttr(evt.errorCategory, "other");
|
||||
modelCallDurationHistogram.record(evt.durationMs, {
|
||||
...modelCallMetricAttrs(evt),
|
||||
"openclaw.errorCategory": lowCardinalityAttr(evt.errorCategory, "other"),
|
||||
"openclaw.errorCategory": errorType,
|
||||
});
|
||||
genAiOperationDurationHistogram.record(
|
||||
evt.durationMs / 1000,
|
||||
genAiModelCallMetricAttrs(evt, errorType),
|
||||
);
|
||||
if (!tracesEnabled) {
|
||||
return;
|
||||
}
|
||||
const spanAttrs: Record<string, string | number | boolean> = {
|
||||
"openclaw.provider": evt.provider,
|
||||
"openclaw.model": evt.model,
|
||||
"openclaw.errorCategory": lowCardinalityAttr(evt.errorCategory, "other"),
|
||||
"error.type": lowCardinalityAttr(evt.errorCategory, "other"),
|
||||
"openclaw.errorCategory": errorType,
|
||||
"error.type": errorType,
|
||||
};
|
||||
assignGenAiModelCallAttrs(spanAttrs, evt);
|
||||
if (evt.api) {
|
||||
|
||||
Reference in New Issue
Block a user