diff --git a/CHANGELOG.md b/CHANGELOG.md index 80846c9d049..5662fc6b91e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,7 @@ Docs: https://docs.openclaw.ai - Plugins/registry: ignore stale persisted registry reads when plugin policy no longer matches current config, and stamp generated registry files with a do-not-edit warning. Thanks @vincentkoc. - Diagnostics/OTEL: surface provider request identifiers as bounded hashes on model-call diagnostics and span events, without exporting raw request IDs or metric labels. Thanks @Lidang-Jiang and @vincentkoc. - Plugins/diagnostics: add metadata-only `model_call_started` and `model_call_ended` hooks for provider/model call telemetry without exposing prompts, responses, headers, request bodies, or raw provider request IDs. Thanks @vincentkoc. +- Diagnostics/OTEL: emit bounded context assembly diagnostics and export `openclaw.context.assembled` spans with prompt/history sizes but no prompt, history, response, or session-key content. Thanks @vincentkoc. - Diagnostics/OTEL: add bounded outbound message delivery lifecycle diagnostics and export them as low-cardinality delivery spans/metrics without message body, recipient, room, or media-path data. (#71471) Thanks @vincentkoc and @jlapenna. - Diagnostics/OTEL: emit bounded exec-process diagnostics and export them as `openclaw.exec` spans without exposing command text, working directories, or container identifiers. (#71451) Thanks @vincentkoc and @jlapenna. - Diagnostics/OTEL: support `OPENCLAW_OTEL_PRELOADED=1` so the plugin can reuse an already-registered OpenTelemetry SDK while keeping OpenClaw diagnostic listeners wired. (#71450) Thanks @vincentkoc and @jlapenna. diff --git a/extensions/diagnostics-otel/src/service.test.ts b/extensions/diagnostics-otel/src/service.test.ts index 558214fcff2..782378d680b 100644 --- a/extensions/diagnostics-otel/src/service.test.ts +++ b/extensions/diagnostics-otel/src/service.test.ts @@ -989,6 +989,67 @@ describe("diagnostics-otel service", () => { await service.stop?.(ctx); }); + test("exports trusted context assembly spans without prompt content", async () => { + const service = createDiagnosticsOtelService(); + const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { traces: true, metrics: true }); + await service.start(ctx); + + emitTrustedDiagnosticEvent({ + type: "context.assembled", + runId: "run-1", + sessionKey: "session-key", + sessionId: "session-id", + provider: "openai", + model: "gpt-5.4", + channel: "webchat", + trigger: "message", + messageCount: 12, + historyTextChars: 1234, + historyImageBlocks: 2, + maxMessageTextChars: 456, + systemPromptChars: 789, + promptChars: 42, + promptImages: 1, + contextTokenBudget: 128_000, + reserveTokens: 4096, + trace: { + traceId: TRACE_ID, + spanId: GRANDCHILD_SPAN_ID, + parentSpanId: SPAN_ID, + traceFlags: "01", + }, + }); + await flushDiagnosticEvents(); + + const contextCall = telemetryState.tracer.startSpan.mock.calls.find( + (call) => call[0] === "openclaw.context.assembled", + ); + expect(contextCall?.[1]).toMatchObject({ + attributes: { + "openclaw.provider": "openai", + "openclaw.model": "gpt-5.4", + "openclaw.channel": "webchat", + "openclaw.trigger": "message", + "openclaw.context.message_count": 12, + "openclaw.context.history_text_chars": 1234, + "openclaw.context.history_image_blocks": 2, + "openclaw.context.max_message_text_chars": 456, + "openclaw.context.system_prompt_chars": 789, + "openclaw.context.prompt_chars": 42, + "openclaw.context.prompt_images": 1, + "openclaw.context.token_budget": 128_000, + "openclaw.context.reserve_tokens": 4096, + }, + }); + expect(JSON.stringify(contextCall)).not.toContain("session-key"); + expect(JSON.stringify(contextCall)).not.toContain("prompt text"); + expect(telemetryState.tracer.setSpanContext).toHaveBeenCalledWith( + expect.anything(), + expect.objectContaining({ traceId: TRACE_ID, spanId: SPAN_ID }), + ); + await service.stop?.(ctx); + }); + test("parents trusted diagnostic lifecycle spans from explicit parent ids", async () => { const service = createDiagnosticsOtelService(); const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { traces: true, metrics: true }); diff --git a/extensions/diagnostics-otel/src/service.ts b/extensions/diagnostics-otel/src/service.ts index d07009ff152..0352bf9cfaf 100644 --- a/extensions/diagnostics-otel/src/service.ts +++ b/extensions/diagnostics-otel/src/service.ts @@ -1129,6 +1129,36 @@ export function createDiagnosticsOtelService(): OpenClawPluginService { span.end(evt.ts); }; + const recordContextAssembled = ( + evt: Extract, + metadata: DiagnosticEventMetadata, + ) => { + if (!tracesEnabled) { + return; + } + const spanAttrs: Record = { + "openclaw.context.message_count": evt.messageCount, + "openclaw.context.history_text_chars": evt.historyTextChars, + "openclaw.context.history_image_blocks": evt.historyImageBlocks, + "openclaw.context.max_message_text_chars": evt.maxMessageTextChars, + "openclaw.context.system_prompt_chars": evt.systemPromptChars, + "openclaw.context.prompt_chars": evt.promptChars, + "openclaw.context.prompt_images": evt.promptImages, + }; + addRunAttrs(spanAttrs, evt); + if (evt.contextTokenBudget !== undefined) { + spanAttrs["openclaw.context.token_budget"] = evt.contextTokenBudget; + } + if (evt.reserveTokens !== undefined) { + spanAttrs["openclaw.context.reserve_tokens"] = evt.reserveTokens; + } + const span = spanWithDuration("openclaw.context.assembled", spanAttrs, 0, { + parentContext: contextForTrustedDiagnosticSpanParent(evt, metadata), + endTimeMs: evt.ts, + }); + span.end(evt.ts); + }; + const modelCallMetricAttrs = (evt: ModelCallLifecycleDiagnosticEvent) => ({ "openclaw.provider": evt.provider, "openclaw.model": evt.model, @@ -1383,6 +1413,9 @@ export function createDiagnosticsOtelService(): OpenClawPluginService { case "run.completed": recordRunCompleted(evt, metadata); return; + case "context.assembled": + recordContextAssembled(evt, metadata); + return; case "model.call.completed": recordModelCallCompleted(evt, metadata); return; diff --git a/src/agents/pi-embedded-runner/run/attempt.ts b/src/agents/pi-embedded-runner/run/attempt.ts index 43e4efc6d43..da4dd241af5 100644 --- a/src/agents/pi-embedded-runner/run/attempt.ts +++ b/src/agents/pi-embedded-runner/run/attempt.ts @@ -2427,12 +2427,37 @@ export async function runEmbeddedAttempt( }); } + const msgCount = activeSession.messages.length; + const systemLen = systemPromptText?.length ?? 0; + const promptLen = effectivePrompt.length; + const sessionSummary = summarizeSessionContext(activeSession.messages); + const reserveTokens = settingsManager.getCompactionReserveTokens(); + const contextTokenBudget = params.contextTokenBudget ?? DEFAULT_CONTEXT_TOKENS; + emitTrustedDiagnosticEvent({ + type: "context.assembled", + runId: params.runId, + ...(params.sessionKey && { sessionKey: params.sessionKey }), + ...(params.sessionId && { sessionId: params.sessionId }), + provider: params.provider, + model: params.modelId, + ...((params.messageChannel ?? params.messageProvider) + ? { channel: params.messageChannel ?? params.messageProvider } + : {}), + trigger: params.trigger, + messageCount: msgCount, + historyTextChars: sessionSummary.totalTextChars, + historyImageBlocks: sessionSummary.totalImageBlocks, + maxMessageTextChars: sessionSummary.maxMessageTextChars, + systemPromptChars: systemLen, + promptChars: promptLen, + promptImages: imageResult.images.length, + contextTokenBudget, + reserveTokens, + trace: freezeDiagnosticTraceContext(createChildDiagnosticTraceContext(runTrace)), + }); + // Diagnostic: log context sizes before prompt to help debug early overflow errors. if (log.isEnabled("debug")) { - const msgCount = activeSession.messages.length; - const systemLen = systemPromptText?.length ?? 0; - const promptLen = effectivePrompt.length; - const sessionSummary = summarizeSessionContext(activeSession.messages); log.debug( `[context-diag] pre-prompt: sessionKey=${params.sessionKey ?? params.sessionId} ` + `messages=${msgCount} roleCounts=${sessionSummary.roleCounts} ` + @@ -2475,8 +2500,6 @@ export async function runEmbeddedAttempt( }); } - const reserveTokens = settingsManager.getCompactionReserveTokens(); - const contextTokenBudget = params.contextTokenBudget ?? DEFAULT_CONTEXT_TOKENS; const preemptiveCompaction = shouldPreemptivelyCompactBeforePrompt({ messages: activeSession.messages, unwindowedMessages: unwindowedContextEngineMessagesForPrecheck, diff --git a/src/infra/diagnostic-events.ts b/src/infra/diagnostic-events.ts index 0f8e98407e7..6513adabedf 100644 --- a/src/infra/diagnostic-events.ts +++ b/src/infra/diagnostic-events.ts @@ -280,6 +280,26 @@ export type DiagnosticModelCallErrorEvent = DiagnosticModelCallBaseEvent & { errorCategory: string; }; +export type DiagnosticContextAssembledEvent = DiagnosticBaseEvent & { + type: "context.assembled"; + runId: string; + sessionKey?: string; + sessionId?: string; + provider: string; + model: string; + channel?: string; + trigger?: string; + messageCount: number; + historyTextChars: number; + historyImageBlocks: number; + maxMessageTextChars: number; + systemPromptChars: number; + promptChars: number; + promptImages: number; + contextTokenBudget?: number; + reserveTokens?: number; +}; + export type DiagnosticMemoryUsage = { rssBytes: number; heapTotalBytes: number; @@ -355,6 +375,7 @@ export type DiagnosticEventPayload = | DiagnosticModelCallStartedEvent | DiagnosticModelCallCompletedEvent | DiagnosticModelCallErrorEvent + | DiagnosticContextAssembledEvent | DiagnosticMemorySampleEvent | DiagnosticMemoryPressureEvent | DiagnosticPayloadLargeEvent @@ -401,6 +422,7 @@ const ASYNC_DIAGNOSTIC_EVENT_TYPES = new Set([ "model.call.started", "model.call.completed", "model.call.error", + "context.assembled", "log.record", ]);