diff --git a/CHANGELOG.md b/CHANGELOG.md index 5adbd53a040..7ac59c99d5a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,7 @@ Docs: https://docs.openclaw.ai - Diagnostics/OTEL: support signal-specific OTLP endpoint overrides for traces, metrics, and logs via config or standard OTEL environment variables. Thanks @vincentkoc. - Diagnostics/OTEL: emit bounded telemetry exporter health diagnostics for startup and log-export failures without exporting raw error text. Thanks @vincentkoc. - Diagnostics/OTEL: export agent harness lifecycle telemetry as bounded `openclaw.harness.run` spans and `openclaw.harness.duration_ms` metrics so QA-lab, Codex, and future harnesses share one trace shape. Thanks @vincentkoc. +- Diagnostics/trace: propagate W3C `traceparent` headers from trusted model-call trace context to provider transports while replacing caller-supplied traceparent values. Thanks @vincentkoc. - Plugins/CLI: add `openclaw plugins registry` for explicit persisted-registry inspection and `--refresh` repair without making normal startup rescan plugin locations. Thanks @vincentkoc. - Plugins/CLI: make `openclaw plugins list` read the cold persisted registry snapshot by default, leaving module-aware diagnostics to `plugins doctor` and `plugins inspect`. Thanks @vincentkoc. - Plugins/startup: move gateway startup plugin planning onto the versioned cold registry index, with postinstall repair for older registry files that predate startup metadata. Thanks @vincentkoc. diff --git a/docs/gateway/opentelemetry.md b/docs/gateway/opentelemetry.md index 26d185abd05..7a66116218a 100644 --- a/docs/gateway/opentelemetry.md +++ b/docs/gateway/opentelemetry.md @@ -19,6 +19,9 @@ works without code changes. For local file logs and how to read them, see and exec. - **`diagnostics-otel` plugin** subscribes to those events and exports them as OpenTelemetry **metrics**, **traces**, and **logs** over OTLP/HTTP. +- **Provider calls** receive a W3C `traceparent` header from OpenClaw's + trusted model-call span context when the provider transport accepts custom + headers. Plugin-emitted trace context is not propagated. - Exporters only attach when both the diagnostics surface and the plugin are enabled, so the in-process cost stays near zero by default. @@ -121,6 +124,11 @@ identifiers (channel, provider, model, error category, hash-only request ids) and never include prompt text, response text, tool inputs, tool outputs, or session keys. +Outbound model requests may include a W3C `traceparent` header. That header is +generated only from OpenClaw-owned diagnostic trace context for the active model +call. Existing caller-supplied `traceparent` headers are replaced, so plugins or +custom provider options cannot spoof cross-service trace ancestry. + Set `diagnostics.otel.captureContent.*` to `true` only when your collector and retention policy are approved for prompt, response, tool, or system-prompt text. Each subkey is opt-in independently: diff --git a/src/agents/pi-embedded-runner/run/attempt.model-diagnostic-events.test.ts b/src/agents/pi-embedded-runner/run/attempt.model-diagnostic-events.test.ts index 0de899bc42d..20ca3888d57 100644 --- a/src/agents/pi-embedded-runner/run/attempt.model-diagnostic-events.test.ts +++ b/src/agents/pi-embedded-runner/run/attempt.model-diagnostic-events.test.ts @@ -105,6 +105,60 @@ describe("wrapStreamFnWithDiagnosticModelCallEvents", () => { }); }); + it("propagates the trusted model-call traceparent without mutating caller headers", async () => { + async function* stream() { + yield { type: "text", text: "ok" }; + } + const capturedOptions: Array[2]> = []; + const callerOptions = { + headers: { + "X-Custom": "kept", + TraceParent: "00-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-bbbbbbbbbbbbbbbb-01", + }, + sessionId: "provider-session", + }; + const wrapped = wrapStreamFnWithDiagnosticModelCallEvents( + (( + _model: Parameters[0], + _context: Parameters[1], + options: Parameters[2], + ) => { + capturedOptions.push(options); + return stream(); + }) as unknown as StreamFn, + { + runId: "run-1", + provider: "openai", + model: "gpt-5.4", + trace: createDiagnosticTraceContext({ + traceId: "4bf92f3577b34da6a3ce929d0e0e4736", + spanId: "00f067aa0ba902b7", + traceFlags: "01", + }), + nextCallId: () => "call-traceparent", + }, + ); + + await drain( + wrapped({} as never, {} as never, callerOptions) as unknown as AsyncIterable, + ); + + expect(capturedOptions).toHaveLength(1); + expect(capturedOptions[0]).not.toBe(callerOptions); + expect(capturedOptions[0]).toMatchObject({ + sessionId: "provider-session", + headers: { + "X-Custom": "kept", + traceparent: expect.stringMatching(/^00-4bf92f3577b34da6a3ce929d0e0e4736-[0-9a-f]{16}-01$/), + }, + }); + expect(capturedOptions[0]?.headers).not.toHaveProperty("TraceParent"); + expect(callerOptions.headers).toEqual({ + "X-Custom": "kept", + TraceParent: "00-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-bbbbbbbbbbbbbbbb-01", + }); + }); + it("emits error events when stream iteration fails", async () => { const requestId = "req_provider_123"; const stream = { diff --git a/src/agents/pi-embedded-runner/run/attempt.model-diagnostic-events.ts b/src/agents/pi-embedded-runner/run/attempt.model-diagnostic-events.ts index cfcc2a56582..54fba3bc916 100644 --- a/src/agents/pi-embedded-runner/run/attempt.model-diagnostic-events.ts +++ b/src/agents/pi-embedded-runner/run/attempt.model-diagnostic-events.ts @@ -11,6 +11,7 @@ import { import { createChildDiagnosticTraceContext, freezeDiagnosticTraceContext, + formatDiagnosticTraceparent, type DiagnosticTraceContext, } from "../../../infra/diagnostic-trace-context.js"; import { getGlobalHookRunner } from "../../../plugins/hook-runner-global.js"; @@ -48,6 +49,8 @@ type ModelCallEndedHookFields = Pick< >; const MODEL_CALL_STREAM_RETURN_TIMEOUT_MS = 1000; +const TRACEPARENT_HEADER_NAME = "traceparent"; +type ModelCallStreamOptions = Parameters[2]; function isPromiseLike(value: unknown): value is PromiseLike { if (value === null || (typeof value !== "object" && typeof value !== "function")) { @@ -197,6 +200,29 @@ function emitModelCallError( }); } +function withDiagnosticTraceparentHeader( + options: ModelCallStreamOptions, + trace: DiagnosticTraceContext, +): ModelCallStreamOptions { + const traceparent = formatDiagnosticTraceparent(trace); + if (!traceparent) { + return options; + } + + const headers: Record = {}; + for (const [key, value] of Object.entries(options?.headers ?? {})) { + if (key.toLowerCase() === TRACEPARENT_HEADER_NAME) { + continue; + } + headers[key] = value; + } + headers[TRACEPARENT_HEADER_NAME] = traceparent; + return { + ...(options ?? {}), + headers, + }; +} + async function safeReturnIterator(iterator: AsyncIterator): Promise { let returnResult: unknown; try { @@ -316,9 +342,10 @@ export function wrapStreamFnWithDiagnosticModelCallEvents( const eventBase = baseModelCallEvent(ctx, callId, trace); emitModelCallStarted(eventBase); const startedAt = Date.now(); + const propagatedOptions = withDiagnosticTraceparentHeader(options, trace); try { - const result = streamFn(model, streamContext, options); + const result = streamFn(model, streamContext, propagatedOptions); if (isPromiseLike(result)) { return result.then( (resolved) => observeModelCallResult(resolved, eventBase, startedAt),