mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 10:30:44 +00:00
feat(diagnostics): attach trace context to otel logs (#70961)
* feat(diagnostics): attach trace context to otel logs * fix(diagnostics): satisfy trace flags lint
This commit is contained in:
@@ -7,6 +7,7 @@ Docs: https://docs.openclaw.ai
|
||||
### Changes
|
||||
|
||||
- Diagnostics/OTEL: add a lightweight diagnostic trace-context carrier for future span correlation without adding OTEL SDK state to core. Thanks @vincentkoc.
|
||||
- Diagnostics/OTEL: attach diagnostic trace context to exported OTEL logs so log records can correlate with future spans without adding retained process state. Thanks @vincentkoc.
|
||||
- Control UI/chat: add a Steer action on queued messages so a browser follow-up can be injected into the active run without retyping it.
|
||||
- Control UI/Talk: add browser WebRTC realtime voice sessions backed by OpenAI Realtime, with Gateway-minted ephemeral client secrets and `openclaw_agent_consult` handoff to the full OpenClaw agent.
|
||||
- Agents/tools: add optional per-call `timeoutMs` support for image, video, music, and TTS generation tools so agents can extend provider request timeouts only when a specific generation needs it.
|
||||
|
||||
@@ -10,6 +10,7 @@ const telemetryState = vi.hoisted(() => {
|
||||
end: vi.fn(),
|
||||
setStatus: vi.fn(),
|
||||
})),
|
||||
setSpanContext: vi.fn((_ctx: unknown, spanContext: unknown) => ({ spanContext })),
|
||||
};
|
||||
const meter = {
|
||||
createCounter: vi.fn((name: string) => {
|
||||
@@ -33,11 +34,19 @@ const logShutdown = vi.hoisted(() => vi.fn().mockResolvedValue(undefined));
|
||||
const traceExporterCtor = vi.hoisted(() => vi.fn());
|
||||
|
||||
vi.mock("@opentelemetry/api", () => ({
|
||||
context: {
|
||||
active: () => ({}),
|
||||
},
|
||||
metrics: {
|
||||
getMeter: () => telemetryState.meter,
|
||||
},
|
||||
trace: {
|
||||
getTracer: () => telemetryState.tracer,
|
||||
setSpanContext: telemetryState.tracer.setSpanContext,
|
||||
},
|
||||
TraceFlags: {
|
||||
NONE: 0,
|
||||
SAMPLED: 1,
|
||||
},
|
||||
SpanStatusCode: {
|
||||
ERROR: 2,
|
||||
@@ -110,6 +119,8 @@ import { createDiagnosticsOtelService } from "./service.js";
|
||||
const OTEL_TEST_STATE_DIR = "/tmp/openclaw-diagnostics-otel-test";
|
||||
const OTEL_TEST_ENDPOINT = "http://otel-collector:4318";
|
||||
const OTEL_TEST_PROTOCOL = "http/protobuf";
|
||||
const TRACE_ID = "4bf92f3577b34da6a3ce929d0e0e4736";
|
||||
const SPAN_ID = "00f067aa0ba902b7";
|
||||
|
||||
function createLogger() {
|
||||
return {
|
||||
@@ -181,6 +192,7 @@ describe("diagnostics-otel service", () => {
|
||||
telemetryState.counters.clear();
|
||||
telemetryState.histograms.clear();
|
||||
telemetryState.tracer.startSpan.mockClear();
|
||||
telemetryState.tracer.setSpanContext.mockClear();
|
||||
telemetryState.meter.createCounter.mockClear();
|
||||
telemetryState.meter.createHistogram.mockClear();
|
||||
sdkStart.mockClear();
|
||||
@@ -336,6 +348,42 @@ describe("diagnostics-otel service", () => {
|
||||
}
|
||||
});
|
||||
|
||||
test("attaches diagnostic trace context to exported logs", async () => {
|
||||
const emitCall = await emitAndCaptureLog({
|
||||
0: '{"subsystem":"diagnostic"}',
|
||||
1: {
|
||||
trace: {
|
||||
traceId: TRACE_ID,
|
||||
spanId: SPAN_ID,
|
||||
traceFlags: "01",
|
||||
},
|
||||
},
|
||||
2: "traceable log",
|
||||
_meta: { logLevelName: "INFO", date: new Date() },
|
||||
});
|
||||
|
||||
expect(emitCall?.attributes).toMatchObject({
|
||||
"openclaw.traceId": TRACE_ID,
|
||||
"openclaw.spanId": SPAN_ID,
|
||||
"openclaw.traceFlags": "01",
|
||||
});
|
||||
expect(telemetryState.tracer.setSpanContext).toHaveBeenCalledWith(
|
||||
expect.anything(),
|
||||
expect.objectContaining({
|
||||
traceId: TRACE_ID,
|
||||
spanId: SPAN_ID,
|
||||
traceFlags: 1,
|
||||
isRemote: true,
|
||||
}),
|
||||
);
|
||||
expect(emitCall?.context).toEqual({
|
||||
spanContext: expect.objectContaining({
|
||||
traceId: TRACE_ID,
|
||||
spanId: SPAN_ID,
|
||||
}),
|
||||
});
|
||||
});
|
||||
|
||||
test("redacts sensitive reason in session.state metric attributes", async () => {
|
||||
const service = createDiagnosticsOtelService();
|
||||
const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { metrics: true });
|
||||
|
||||
@@ -1,5 +1,11 @@
|
||||
import { metrics, trace, SpanStatusCode } from "@opentelemetry/api";
|
||||
import type { SeverityNumber } from "@opentelemetry/api-logs";
|
||||
import {
|
||||
context as otelContextApi,
|
||||
metrics,
|
||||
trace,
|
||||
SpanStatusCode,
|
||||
TraceFlags,
|
||||
} from "@opentelemetry/api";
|
||||
import type { LogRecord, SeverityNumber } from "@opentelemetry/api-logs";
|
||||
import { OTLPLogExporter } from "@opentelemetry/exporter-logs-otlp-proto";
|
||||
import { OTLPMetricExporter } from "@opentelemetry/exporter-metrics-otlp-proto";
|
||||
import { OTLPTraceExporter } from "@opentelemetry/exporter-trace-otlp-proto";
|
||||
@@ -9,8 +15,19 @@ import { PeriodicExportingMetricReader } from "@opentelemetry/sdk-metrics";
|
||||
import { NodeSDK } from "@opentelemetry/sdk-node";
|
||||
import { ParentBasedSampler, TraceIdRatioBasedSampler } from "@opentelemetry/sdk-trace-base";
|
||||
import { ATTR_SERVICE_NAME } from "@opentelemetry/semantic-conventions";
|
||||
import type { DiagnosticEventPayload, OpenClawPluginService } from "../api.js";
|
||||
import { onDiagnosticEvent, redactSensitiveText, registerLogTransport } from "../api.js";
|
||||
import type {
|
||||
DiagnosticEventPayload,
|
||||
DiagnosticTraceContext,
|
||||
OpenClawPluginService,
|
||||
} from "../api.js";
|
||||
import {
|
||||
isValidDiagnosticSpanId,
|
||||
isValidDiagnosticTraceFlags,
|
||||
isValidDiagnosticTraceId,
|
||||
onDiagnosticEvent,
|
||||
redactSensitiveText,
|
||||
registerLogTransport,
|
||||
} from "../api.js";
|
||||
|
||||
const DEFAULT_SERVICE_NAME = "openclaw";
|
||||
|
||||
@@ -62,6 +79,83 @@ function redactOtelAttributes(attributes: Record<string, string | number | boole
|
||||
return redactedAttributes;
|
||||
}
|
||||
|
||||
function normalizeTraceContext(value: unknown): DiagnosticTraceContext | undefined {
|
||||
if (!value || typeof value !== "object" || Array.isArray(value)) {
|
||||
return undefined;
|
||||
}
|
||||
const candidate = value as Partial<DiagnosticTraceContext>;
|
||||
if (!isValidDiagnosticTraceId(candidate.traceId)) {
|
||||
return undefined;
|
||||
}
|
||||
if (candidate.spanId !== undefined && !isValidDiagnosticSpanId(candidate.spanId)) {
|
||||
return undefined;
|
||||
}
|
||||
if (candidate.parentSpanId !== undefined && !isValidDiagnosticSpanId(candidate.parentSpanId)) {
|
||||
return undefined;
|
||||
}
|
||||
if (candidate.traceFlags !== undefined && !isValidDiagnosticTraceFlags(candidate.traceFlags)) {
|
||||
return undefined;
|
||||
}
|
||||
return {
|
||||
traceId: candidate.traceId,
|
||||
...(candidate.spanId ? { spanId: candidate.spanId } : {}),
|
||||
...(candidate.parentSpanId ? { parentSpanId: candidate.parentSpanId } : {}),
|
||||
...(candidate.traceFlags ? { traceFlags: candidate.traceFlags } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
function extractTraceContext(value: unknown): DiagnosticTraceContext | undefined {
|
||||
const direct = normalizeTraceContext(value);
|
||||
if (direct) {
|
||||
return direct;
|
||||
}
|
||||
if (!value || typeof value !== "object" || Array.isArray(value)) {
|
||||
return undefined;
|
||||
}
|
||||
return normalizeTraceContext((value as { trace?: unknown }).trace);
|
||||
}
|
||||
|
||||
function findLogTraceContext(
|
||||
bindings: Record<string, unknown> | undefined,
|
||||
numericArgs: unknown[],
|
||||
): DiagnosticTraceContext | undefined {
|
||||
const fromBindings = extractTraceContext(bindings);
|
||||
if (fromBindings) {
|
||||
return fromBindings;
|
||||
}
|
||||
for (const arg of numericArgs) {
|
||||
const fromArg = extractTraceContext(arg);
|
||||
if (fromArg) {
|
||||
return fromArg;
|
||||
}
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function traceFlagsToOtel(traceFlags: string | undefined): TraceFlags {
|
||||
const parsed = Number.parseInt(traceFlags ?? "00", 16);
|
||||
return (parsed & TraceFlags.SAMPLED) !== 0 ? TraceFlags.SAMPLED : TraceFlags.NONE;
|
||||
}
|
||||
|
||||
function addTraceAttributes(
|
||||
attributes: Record<string, string | number | boolean>,
|
||||
traceContext: DiagnosticTraceContext | undefined,
|
||||
): void {
|
||||
if (!traceContext) {
|
||||
return;
|
||||
}
|
||||
attributes["openclaw.traceId"] = traceContext.traceId;
|
||||
if (traceContext.spanId) {
|
||||
attributes["openclaw.spanId"] = traceContext.spanId;
|
||||
}
|
||||
if (traceContext.parentSpanId) {
|
||||
attributes["openclaw.parentSpanId"] = traceContext.parentSpanId;
|
||||
}
|
||||
if (traceContext.traceFlags) {
|
||||
attributes["openclaw.traceFlags"] = traceContext.traceFlags;
|
||||
}
|
||||
}
|
||||
|
||||
export function createDiagnosticsOtelService(): OpenClawPluginService {
|
||||
let sdk: NodeSDK | null = null;
|
||||
let logProvider: LoggerProvider | null = null;
|
||||
@@ -294,6 +388,7 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
|
||||
// ignore malformed json bindings
|
||||
}
|
||||
}
|
||||
const traceContext = findLogTraceContext(bindings, numericArgs);
|
||||
|
||||
let message = "";
|
||||
if (numericArgs.length > 0 && typeof numericArgs[numericArgs.length - 1] === "string") {
|
||||
@@ -343,15 +438,25 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
|
||||
if (meta?.path?.filePathWithLine) {
|
||||
attributes["openclaw.code.location"] = meta.path.filePathWithLine;
|
||||
}
|
||||
addTraceAttributes(attributes, traceContext);
|
||||
|
||||
// OTLP can leave the host boundary, so redact string fields before export.
|
||||
otelLogger.emit({
|
||||
const logRecord: LogRecord = {
|
||||
body: redactSensitiveText(message),
|
||||
severityText: logLevelName,
|
||||
severityNumber,
|
||||
attributes: redactOtelAttributes(attributes),
|
||||
timestamp: meta?.date ?? new Date(),
|
||||
});
|
||||
};
|
||||
if (traceContext?.spanId) {
|
||||
logRecord.context = trace.setSpanContext(otelContextApi.active(), {
|
||||
traceId: traceContext.traceId,
|
||||
spanId: traceContext.spanId,
|
||||
traceFlags: traceFlagsToOtel(traceContext.traceFlags),
|
||||
isRemote: true,
|
||||
});
|
||||
}
|
||||
otelLogger.emit(logRecord);
|
||||
} catch (err) {
|
||||
ctx.logger.error(`diagnostics-otel: log transport failed: ${formatError(err)}`);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user