feat(diagnostics-otel): add context assembly spans

2026-05-06 07:40:44 +00:00 · 2026-04-25 11:03:46 -07:00
parent afd6b5d6fc
commit ff172f46a5
5 changed files with 146 additions and 6 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -25,6 +25,7 @@ Docs: https://docs.openclaw.ai
 - Plugins/registry: ignore stale persisted registry reads when plugin policy no longer matches current config, and stamp generated registry files with a do-not-edit warning. Thanks @vincentkoc.
 - Diagnostics/OTEL: surface provider request identifiers as bounded hashes on model-call diagnostics and span events, without exporting raw request IDs or metric labels. Thanks @Lidang-Jiang and @vincentkoc.
 - Plugins/diagnostics: add metadata-only `model_call_started` and `model_call_ended` hooks for provider/model call telemetry without exposing prompts, responses, headers, request bodies, or raw provider request IDs. Thanks @vincentkoc.
+- Diagnostics/OTEL: emit bounded context assembly diagnostics and export `openclaw.context.assembled` spans with prompt/history sizes but no prompt, history, response, or session-key content. Thanks @vincentkoc.
 - Diagnostics/OTEL: add bounded outbound message delivery lifecycle diagnostics and export them as low-cardinality delivery spans/metrics without message body, recipient, room, or media-path data. (#71471) Thanks @vincentkoc and @jlapenna.
 - Diagnostics/OTEL: emit bounded exec-process diagnostics and export them as `openclaw.exec` spans without exposing command text, working directories, or container identifiers. (#71451) Thanks @vincentkoc and @jlapenna.
 - Diagnostics/OTEL: support `OPENCLAW_OTEL_PRELOADED=1` so the plugin can reuse an already-registered OpenTelemetry SDK while keeping OpenClaw diagnostic listeners wired. (#71450) Thanks @vincentkoc and @jlapenna.
--- a/extensions/diagnostics-otel/src/service.test.ts
+++ b/extensions/diagnostics-otel/src/service.test.ts
@@ -989,6 +989,67 @@ describe("diagnostics-otel service", () => {
    await service.stop?.(ctx);
  });

+  test("exports trusted context assembly spans without prompt content", async () => {
+    const service = createDiagnosticsOtelService();
+    const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { traces: true, metrics: true });
+    await service.start(ctx);
+
+    emitTrustedDiagnosticEvent({
+      type: "context.assembled",
+      runId: "run-1",
+      sessionKey: "session-key",
+      sessionId: "session-id",
+      provider: "openai",
+      model: "gpt-5.4",
+      channel: "webchat",
+      trigger: "message",
+      messageCount: 12,
+      historyTextChars: 1234,
+      historyImageBlocks: 2,
+      maxMessageTextChars: 456,
+      systemPromptChars: 789,
+      promptChars: 42,
+      promptImages: 1,
+      contextTokenBudget: 128_000,
+      reserveTokens: 4096,
+      trace: {
+        traceId: TRACE_ID,
+        spanId: GRANDCHILD_SPAN_ID,
+        parentSpanId: SPAN_ID,
+        traceFlags: "01",
+      },
+    });
+    await flushDiagnosticEvents();
+
+    const contextCall = telemetryState.tracer.startSpan.mock.calls.find(
+      (call) => call[0] === "openclaw.context.assembled",
+    );
+    expect(contextCall?.[1]).toMatchObject({
+      attributes: {
+        "openclaw.provider": "openai",
+        "openclaw.model": "gpt-5.4",
+        "openclaw.channel": "webchat",
+        "openclaw.trigger": "message",
+        "openclaw.context.message_count": 12,
+        "openclaw.context.history_text_chars": 1234,
+        "openclaw.context.history_image_blocks": 2,
+        "openclaw.context.max_message_text_chars": 456,
+        "openclaw.context.system_prompt_chars": 789,
+        "openclaw.context.prompt_chars": 42,
+        "openclaw.context.prompt_images": 1,
+        "openclaw.context.token_budget": 128_000,
+        "openclaw.context.reserve_tokens": 4096,
+      },
+    });
+    expect(JSON.stringify(contextCall)).not.toContain("session-key");
+    expect(JSON.stringify(contextCall)).not.toContain("prompt text");
+    expect(telemetryState.tracer.setSpanContext).toHaveBeenCalledWith(
+      expect.anything(),
+      expect.objectContaining({ traceId: TRACE_ID, spanId: SPAN_ID }),
+    );
+    await service.stop?.(ctx);
+  });
+
  test("parents trusted diagnostic lifecycle spans from explicit parent ids", async () => {
    const service = createDiagnosticsOtelService();
    const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { traces: true, metrics: true });
--- a/extensions/diagnostics-otel/src/service.ts
+++ b/extensions/diagnostics-otel/src/service.ts
@@ -1129,6 +1129,36 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
        span.end(evt.ts);
      };

+      const recordContextAssembled = (
+        evt: Extract<DiagnosticEventPayload, { type: "context.assembled" }>,
+        metadata: DiagnosticEventMetadata,
+      ) => {
+        if (!tracesEnabled) {
+          return;
+        }
+        const spanAttrs: Record<string, string | number | boolean> = {
+          "openclaw.context.message_count": evt.messageCount,
+          "openclaw.context.history_text_chars": evt.historyTextChars,
+          "openclaw.context.history_image_blocks": evt.historyImageBlocks,
+          "openclaw.context.max_message_text_chars": evt.maxMessageTextChars,
+          "openclaw.context.system_prompt_chars": evt.systemPromptChars,
+          "openclaw.context.prompt_chars": evt.promptChars,
+          "openclaw.context.prompt_images": evt.promptImages,
+        };
+        addRunAttrs(spanAttrs, evt);
+        if (evt.contextTokenBudget !== undefined) {
+          spanAttrs["openclaw.context.token_budget"] = evt.contextTokenBudget;
+        }
+        if (evt.reserveTokens !== undefined) {
+          spanAttrs["openclaw.context.reserve_tokens"] = evt.reserveTokens;
+        }
+        const span = spanWithDuration("openclaw.context.assembled", spanAttrs, 0, {
+          parentContext: contextForTrustedDiagnosticSpanParent(evt, metadata),
+          endTimeMs: evt.ts,
+        });
+        span.end(evt.ts);
+      };
+
      const modelCallMetricAttrs = (evt: ModelCallLifecycleDiagnosticEvent) => ({
        "openclaw.provider": evt.provider,
        "openclaw.model": evt.model,
@@ -1383,6 +1413,9 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
            case "run.completed":
              recordRunCompleted(evt, metadata);
              return;
+            case "context.assembled":
+              recordContextAssembled(evt, metadata);
+              return;
            case "model.call.completed":
              recordModelCallCompleted(evt, metadata);
              return;
--- a/src/agents/pi-embedded-runner/run/attempt.ts
+++ b/src/agents/pi-embedded-runner/run/attempt.ts
@@ -2427,12 +2427,37 @@ export async function runEmbeddedAttempt(
            });
          }

+          const msgCount = activeSession.messages.length;
+          const systemLen = systemPromptText?.length ?? 0;
+          const promptLen = effectivePrompt.length;
+          const sessionSummary = summarizeSessionContext(activeSession.messages);
+          const reserveTokens = settingsManager.getCompactionReserveTokens();
+          const contextTokenBudget = params.contextTokenBudget ?? DEFAULT_CONTEXT_TOKENS;
+          emitTrustedDiagnosticEvent({
+            type: "context.assembled",
+            runId: params.runId,
+            ...(params.sessionKey && { sessionKey: params.sessionKey }),
+            ...(params.sessionId && { sessionId: params.sessionId }),
+            provider: params.provider,
+            model: params.modelId,
+            ...((params.messageChannel ?? params.messageProvider)
+              ? { channel: params.messageChannel ?? params.messageProvider }
+              : {}),
+            trigger: params.trigger,
+            messageCount: msgCount,
+            historyTextChars: sessionSummary.totalTextChars,
+            historyImageBlocks: sessionSummary.totalImageBlocks,
+            maxMessageTextChars: sessionSummary.maxMessageTextChars,
+            systemPromptChars: systemLen,
+            promptChars: promptLen,
+            promptImages: imageResult.images.length,
+            contextTokenBudget,
+            reserveTokens,
+            trace: freezeDiagnosticTraceContext(createChildDiagnosticTraceContext(runTrace)),
+          });
+
          // Diagnostic: log context sizes before prompt to help debug early overflow errors.
          if (log.isEnabled("debug")) {
-            const msgCount = activeSession.messages.length;
-            const systemLen = systemPromptText?.length ?? 0;
-            const promptLen = effectivePrompt.length;
-            const sessionSummary = summarizeSessionContext(activeSession.messages);
            log.debug(
              `[context-diag] pre-prompt: sessionKey=${params.sessionKey ?? params.sessionId} ` +
                `messages=${msgCount} roleCounts=${sessionSummary.roleCounts} ` +
@@ -2475,8 +2500,6 @@ export async function runEmbeddedAttempt(
              });
          }

-          const reserveTokens = settingsManager.getCompactionReserveTokens();
-          const contextTokenBudget = params.contextTokenBudget ?? DEFAULT_CONTEXT_TOKENS;
          const preemptiveCompaction = shouldPreemptivelyCompactBeforePrompt({
            messages: activeSession.messages,
            unwindowedMessages: unwindowedContextEngineMessagesForPrecheck,
--- a/src/infra/diagnostic-events.ts
+++ b/src/infra/diagnostic-events.ts
@@ -280,6 +280,26 @@ export type DiagnosticModelCallErrorEvent = DiagnosticModelCallBaseEvent & {
  errorCategory: string;
 };

+export type DiagnosticContextAssembledEvent = DiagnosticBaseEvent & {
+  type: "context.assembled";
+  runId: string;
+  sessionKey?: string;
+  sessionId?: string;
+  provider: string;
+  model: string;
+  channel?: string;
+  trigger?: string;
+  messageCount: number;
+  historyTextChars: number;
+  historyImageBlocks: number;
+  maxMessageTextChars: number;
+  systemPromptChars: number;
+  promptChars: number;
+  promptImages: number;
+  contextTokenBudget?: number;
+  reserveTokens?: number;
+};
+
 export type DiagnosticMemoryUsage = {
  rssBytes: number;
  heapTotalBytes: number;
@@ -355,6 +375,7 @@ export type DiagnosticEventPayload =
  | DiagnosticModelCallStartedEvent
  | DiagnosticModelCallCompletedEvent
  | DiagnosticModelCallErrorEvent
+  | DiagnosticContextAssembledEvent
  | DiagnosticMemorySampleEvent
  | DiagnosticMemoryPressureEvent
  | DiagnosticPayloadLargeEvent
@@ -401,6 +422,7 @@ const ASYNC_DIAGNOSTIC_EVENT_TYPES = new Set<DiagnosticEventPayload["type"]>([
  "model.call.started",
  "model.call.completed",
  "model.call.error",
+  "context.assembled",
  "log.record",
 ]);