From 8fade9df27a9a25cd017db1154409b3ad9bd3741 Mon Sep 17 00:00:00 2001 From: Vincent Koc Date: Thu, 23 Apr 2026 23:40:42 -0700 Subject: [PATCH] feat(diagnostics): attach trace context to otel logs (#70961) * feat(diagnostics): attach trace context to otel logs * fix(diagnostics): satisfy trace flags lint --- CHANGELOG.md | 1 + .../diagnostics-otel/src/service.test.ts | 48 +++++++ extensions/diagnostics-otel/src/service.ts | 117 +++++++++++++++++- 3 files changed, 160 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index be7752c3952..ac9edf1022a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ Docs: https://docs.openclaw.ai ### Changes - Diagnostics/OTEL: add a lightweight diagnostic trace-context carrier for future span correlation without adding OTEL SDK state to core. Thanks @vincentkoc. +- Diagnostics/OTEL: attach diagnostic trace context to exported OTEL logs so log records can correlate with future spans without adding retained process state. Thanks @vincentkoc. - Control UI/chat: add a Steer action on queued messages so a browser follow-up can be injected into the active run without retyping it. - Control UI/Talk: add browser WebRTC realtime voice sessions backed by OpenAI Realtime, with Gateway-minted ephemeral client secrets and `openclaw_agent_consult` handoff to the full OpenClaw agent. - Agents/tools: add optional per-call `timeoutMs` support for image, video, music, and TTS generation tools so agents can extend provider request timeouts only when a specific generation needs it. diff --git a/extensions/diagnostics-otel/src/service.test.ts b/extensions/diagnostics-otel/src/service.test.ts index 35c32dde50c..0d184b7c820 100644 --- a/extensions/diagnostics-otel/src/service.test.ts +++ b/extensions/diagnostics-otel/src/service.test.ts @@ -10,6 +10,7 @@ const telemetryState = vi.hoisted(() => { end: vi.fn(), setStatus: vi.fn(), })), + setSpanContext: vi.fn((_ctx: unknown, spanContext: unknown) => ({ spanContext })), }; const meter = { createCounter: vi.fn((name: string) => { @@ -33,11 +34,19 @@ const logShutdown = vi.hoisted(() => vi.fn().mockResolvedValue(undefined)); const traceExporterCtor = vi.hoisted(() => vi.fn()); vi.mock("@opentelemetry/api", () => ({ + context: { + active: () => ({}), + }, metrics: { getMeter: () => telemetryState.meter, }, trace: { getTracer: () => telemetryState.tracer, + setSpanContext: telemetryState.tracer.setSpanContext, + }, + TraceFlags: { + NONE: 0, + SAMPLED: 1, }, SpanStatusCode: { ERROR: 2, @@ -110,6 +119,8 @@ import { createDiagnosticsOtelService } from "./service.js"; const OTEL_TEST_STATE_DIR = "/tmp/openclaw-diagnostics-otel-test"; const OTEL_TEST_ENDPOINT = "http://otel-collector:4318"; const OTEL_TEST_PROTOCOL = "http/protobuf"; +const TRACE_ID = "4bf92f3577b34da6a3ce929d0e0e4736"; +const SPAN_ID = "00f067aa0ba902b7"; function createLogger() { return { @@ -181,6 +192,7 @@ describe("diagnostics-otel service", () => { telemetryState.counters.clear(); telemetryState.histograms.clear(); telemetryState.tracer.startSpan.mockClear(); + telemetryState.tracer.setSpanContext.mockClear(); telemetryState.meter.createCounter.mockClear(); telemetryState.meter.createHistogram.mockClear(); sdkStart.mockClear(); @@ -336,6 +348,42 @@ describe("diagnostics-otel service", () => { } }); + test("attaches diagnostic trace context to exported logs", async () => { + const emitCall = await emitAndCaptureLog({ + 0: '{"subsystem":"diagnostic"}', + 1: { + trace: { + traceId: TRACE_ID, + spanId: SPAN_ID, + traceFlags: "01", + }, + }, + 2: "traceable log", + _meta: { logLevelName: "INFO", date: new Date() }, + }); + + expect(emitCall?.attributes).toMatchObject({ + "openclaw.traceId": TRACE_ID, + "openclaw.spanId": SPAN_ID, + "openclaw.traceFlags": "01", + }); + expect(telemetryState.tracer.setSpanContext).toHaveBeenCalledWith( + expect.anything(), + expect.objectContaining({ + traceId: TRACE_ID, + spanId: SPAN_ID, + traceFlags: 1, + isRemote: true, + }), + ); + expect(emitCall?.context).toEqual({ + spanContext: expect.objectContaining({ + traceId: TRACE_ID, + spanId: SPAN_ID, + }), + }); + }); + test("redacts sensitive reason in session.state metric attributes", async () => { const service = createDiagnosticsOtelService(); const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { metrics: true }); diff --git a/extensions/diagnostics-otel/src/service.ts b/extensions/diagnostics-otel/src/service.ts index ca625cd1181..ae4caffe63d 100644 --- a/extensions/diagnostics-otel/src/service.ts +++ b/extensions/diagnostics-otel/src/service.ts @@ -1,5 +1,11 @@ -import { metrics, trace, SpanStatusCode } from "@opentelemetry/api"; -import type { SeverityNumber } from "@opentelemetry/api-logs"; +import { + context as otelContextApi, + metrics, + trace, + SpanStatusCode, + TraceFlags, +} from "@opentelemetry/api"; +import type { LogRecord, SeverityNumber } from "@opentelemetry/api-logs"; import { OTLPLogExporter } from "@opentelemetry/exporter-logs-otlp-proto"; import { OTLPMetricExporter } from "@opentelemetry/exporter-metrics-otlp-proto"; import { OTLPTraceExporter } from "@opentelemetry/exporter-trace-otlp-proto"; @@ -9,8 +15,19 @@ import { PeriodicExportingMetricReader } from "@opentelemetry/sdk-metrics"; import { NodeSDK } from "@opentelemetry/sdk-node"; import { ParentBasedSampler, TraceIdRatioBasedSampler } from "@opentelemetry/sdk-trace-base"; import { ATTR_SERVICE_NAME } from "@opentelemetry/semantic-conventions"; -import type { DiagnosticEventPayload, OpenClawPluginService } from "../api.js"; -import { onDiagnosticEvent, redactSensitiveText, registerLogTransport } from "../api.js"; +import type { + DiagnosticEventPayload, + DiagnosticTraceContext, + OpenClawPluginService, +} from "../api.js"; +import { + isValidDiagnosticSpanId, + isValidDiagnosticTraceFlags, + isValidDiagnosticTraceId, + onDiagnosticEvent, + redactSensitiveText, + registerLogTransport, +} from "../api.js"; const DEFAULT_SERVICE_NAME = "openclaw"; @@ -62,6 +79,83 @@ function redactOtelAttributes(attributes: Record; + if (!isValidDiagnosticTraceId(candidate.traceId)) { + return undefined; + } + if (candidate.spanId !== undefined && !isValidDiagnosticSpanId(candidate.spanId)) { + return undefined; + } + if (candidate.parentSpanId !== undefined && !isValidDiagnosticSpanId(candidate.parentSpanId)) { + return undefined; + } + if (candidate.traceFlags !== undefined && !isValidDiagnosticTraceFlags(candidate.traceFlags)) { + return undefined; + } + return { + traceId: candidate.traceId, + ...(candidate.spanId ? { spanId: candidate.spanId } : {}), + ...(candidate.parentSpanId ? { parentSpanId: candidate.parentSpanId } : {}), + ...(candidate.traceFlags ? { traceFlags: candidate.traceFlags } : {}), + }; +} + +function extractTraceContext(value: unknown): DiagnosticTraceContext | undefined { + const direct = normalizeTraceContext(value); + if (direct) { + return direct; + } + if (!value || typeof value !== "object" || Array.isArray(value)) { + return undefined; + } + return normalizeTraceContext((value as { trace?: unknown }).trace); +} + +function findLogTraceContext( + bindings: Record | undefined, + numericArgs: unknown[], +): DiagnosticTraceContext | undefined { + const fromBindings = extractTraceContext(bindings); + if (fromBindings) { + return fromBindings; + } + for (const arg of numericArgs) { + const fromArg = extractTraceContext(arg); + if (fromArg) { + return fromArg; + } + } + return undefined; +} + +function traceFlagsToOtel(traceFlags: string | undefined): TraceFlags { + const parsed = Number.parseInt(traceFlags ?? "00", 16); + return (parsed & TraceFlags.SAMPLED) !== 0 ? TraceFlags.SAMPLED : TraceFlags.NONE; +} + +function addTraceAttributes( + attributes: Record, + traceContext: DiagnosticTraceContext | undefined, +): void { + if (!traceContext) { + return; + } + attributes["openclaw.traceId"] = traceContext.traceId; + if (traceContext.spanId) { + attributes["openclaw.spanId"] = traceContext.spanId; + } + if (traceContext.parentSpanId) { + attributes["openclaw.parentSpanId"] = traceContext.parentSpanId; + } + if (traceContext.traceFlags) { + attributes["openclaw.traceFlags"] = traceContext.traceFlags; + } +} + export function createDiagnosticsOtelService(): OpenClawPluginService { let sdk: NodeSDK | null = null; let logProvider: LoggerProvider | null = null; @@ -294,6 +388,7 @@ export function createDiagnosticsOtelService(): OpenClawPluginService { // ignore malformed json bindings } } + const traceContext = findLogTraceContext(bindings, numericArgs); let message = ""; if (numericArgs.length > 0 && typeof numericArgs[numericArgs.length - 1] === "string") { @@ -343,15 +438,25 @@ export function createDiagnosticsOtelService(): OpenClawPluginService { if (meta?.path?.filePathWithLine) { attributes["openclaw.code.location"] = meta.path.filePathWithLine; } + addTraceAttributes(attributes, traceContext); // OTLP can leave the host boundary, so redact string fields before export. - otelLogger.emit({ + const logRecord: LogRecord = { body: redactSensitiveText(message), severityText: logLevelName, severityNumber, attributes: redactOtelAttributes(attributes), timestamp: meta?.date ?? new Date(), - }); + }; + if (traceContext?.spanId) { + logRecord.context = trace.setSpanContext(otelContextApi.active(), { + traceId: traceContext.traceId, + spanId: traceContext.spanId, + traceFlags: traceFlagsToOtel(traceContext.traceFlags), + isRemote: true, + }); + } + otelLogger.emit(logRecord); } catch (err) { ctx.logger.error(`diagnostics-otel: log transport failed: ${formatError(err)}`); }