fix(diagnostics): propagate trusted traceparent headers

This commit is contained in:
Vincent Koc
2026-04-26 00:24:19 -07:00
parent 5e8fda4c64
commit a77996dc56
4 changed files with 91 additions and 1 deletions

View File

@@ -29,6 +29,7 @@ Docs: https://docs.openclaw.ai
- Diagnostics/OTEL: support signal-specific OTLP endpoint overrides for traces, metrics, and logs via config or standard OTEL environment variables. Thanks @vincentkoc.
- Diagnostics/OTEL: emit bounded telemetry exporter health diagnostics for startup and log-export failures without exporting raw error text. Thanks @vincentkoc.
- Diagnostics/OTEL: export agent harness lifecycle telemetry as bounded `openclaw.harness.run` spans and `openclaw.harness.duration_ms` metrics so QA-lab, Codex, and future harnesses share one trace shape. Thanks @vincentkoc.
- Diagnostics/trace: propagate W3C `traceparent` headers from trusted model-call trace context to provider transports while replacing caller-supplied traceparent values. Thanks @vincentkoc.
- Plugins/CLI: add `openclaw plugins registry` for explicit persisted-registry inspection and `--refresh` repair without making normal startup rescan plugin locations. Thanks @vincentkoc.
- Plugins/CLI: make `openclaw plugins list` read the cold persisted registry snapshot by default, leaving module-aware diagnostics to `plugins doctor` and `plugins inspect`. Thanks @vincentkoc.
- Plugins/startup: move gateway startup plugin planning onto the versioned cold registry index, with postinstall repair for older registry files that predate startup metadata. Thanks @vincentkoc.

View File

@@ -19,6 +19,9 @@ works without code changes. For local file logs and how to read them, see
and exec.
- **`diagnostics-otel` plugin** subscribes to those events and exports them as
OpenTelemetry **metrics**, **traces**, and **logs** over OTLP/HTTP.
- **Provider calls** receive a W3C `traceparent` header from OpenClaw's
trusted model-call span context when the provider transport accepts custom
headers. Plugin-emitted trace context is not propagated.
- Exporters only attach when both the diagnostics surface and the plugin are
enabled, so the in-process cost stays near zero by default.
@@ -121,6 +124,11 @@ identifiers (channel, provider, model, error category, hash-only request ids)
and never include prompt text, response text, tool inputs, tool outputs, or
session keys.
Outbound model requests may include a W3C `traceparent` header. That header is
generated only from OpenClaw-owned diagnostic trace context for the active model
call. Existing caller-supplied `traceparent` headers are replaced, so plugins or
custom provider options cannot spoof cross-service trace ancestry.
Set `diagnostics.otel.captureContent.*` to `true` only when your collector and
retention policy are approved for prompt, response, tool, or system-prompt
text. Each subkey is opt-in independently:

View File

@@ -105,6 +105,60 @@ describe("wrapStreamFnWithDiagnosticModelCallEvents", () => {
});
});
it("propagates the trusted model-call traceparent without mutating caller headers", async () => {
async function* stream() {
yield { type: "text", text: "ok" };
}
const capturedOptions: Array<Parameters<StreamFn>[2]> = [];
const callerOptions = {
headers: {
"X-Custom": "kept",
TraceParent: "00-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-bbbbbbbbbbbbbbbb-01",
},
sessionId: "provider-session",
};
const wrapped = wrapStreamFnWithDiagnosticModelCallEvents(
((
_model: Parameters<StreamFn>[0],
_context: Parameters<StreamFn>[1],
options: Parameters<StreamFn>[2],
) => {
capturedOptions.push(options);
return stream();
}) as unknown as StreamFn,
{
runId: "run-1",
provider: "openai",
model: "gpt-5.4",
trace: createDiagnosticTraceContext({
traceId: "4bf92f3577b34da6a3ce929d0e0e4736",
spanId: "00f067aa0ba902b7",
traceFlags: "01",
}),
nextCallId: () => "call-traceparent",
},
);
await drain(
wrapped({} as never, {} as never, callerOptions) as unknown as AsyncIterable<unknown>,
);
expect(capturedOptions).toHaveLength(1);
expect(capturedOptions[0]).not.toBe(callerOptions);
expect(capturedOptions[0]).toMatchObject({
sessionId: "provider-session",
headers: {
"X-Custom": "kept",
traceparent: expect.stringMatching(/^00-4bf92f3577b34da6a3ce929d0e0e4736-[0-9a-f]{16}-01$/),
},
});
expect(capturedOptions[0]?.headers).not.toHaveProperty("TraceParent");
expect(callerOptions.headers).toEqual({
"X-Custom": "kept",
TraceParent: "00-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-bbbbbbbbbbbbbbbb-01",
});
});
it("emits error events when stream iteration fails", async () => {
const requestId = "req_provider_123";
const stream = {

View File

@@ -11,6 +11,7 @@ import {
import {
createChildDiagnosticTraceContext,
freezeDiagnosticTraceContext,
formatDiagnosticTraceparent,
type DiagnosticTraceContext,
} from "../../../infra/diagnostic-trace-context.js";
import { getGlobalHookRunner } from "../../../plugins/hook-runner-global.js";
@@ -48,6 +49,8 @@ type ModelCallEndedHookFields = Pick<
>;
const MODEL_CALL_STREAM_RETURN_TIMEOUT_MS = 1000;
const TRACEPARENT_HEADER_NAME = "traceparent";
type ModelCallStreamOptions = Parameters<StreamFn>[2];
function isPromiseLike(value: unknown): value is PromiseLike<unknown> {
if (value === null || (typeof value !== "object" && typeof value !== "function")) {
@@ -197,6 +200,29 @@ function emitModelCallError(
});
}
function withDiagnosticTraceparentHeader(
options: ModelCallStreamOptions,
trace: DiagnosticTraceContext,
): ModelCallStreamOptions {
const traceparent = formatDiagnosticTraceparent(trace);
if (!traceparent) {
return options;
}
const headers: Record<string, string> = {};
for (const [key, value] of Object.entries(options?.headers ?? {})) {
if (key.toLowerCase() === TRACEPARENT_HEADER_NAME) {
continue;
}
headers[key] = value;
}
headers[TRACEPARENT_HEADER_NAME] = traceparent;
return {
...(options ?? {}),
headers,
};
}
async function safeReturnIterator(iterator: AsyncIterator<unknown>): Promise<void> {
let returnResult: unknown;
try {
@@ -316,9 +342,10 @@ export function wrapStreamFnWithDiagnosticModelCallEvents(
const eventBase = baseModelCallEvent(ctx, callId, trace);
emitModelCallStarted(eventBase);
const startedAt = Date.now();
const propagatedOptions = withDiagnosticTraceparentHeader(options, trace);
try {
const result = streamFn(model, streamContext, options);
const result = streamFn(model, streamContext, propagatedOptions);
if (isPromiseLike(result)) {
return result.then(
(resolved) => observeModelCallResult(resolved, eventBase, startedAt),