feat(diagnostics-otel): add context assembly spans

2026-05-06 08:40:44 +00:00 · 2026-04-25 11:03:46 -07:00
parent afd6b5d6fc
commit ff172f46a5
5 changed files with 146 additions and 6 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -25,6 +25,7 @@ Docs: https://docs.openclaw.ai
 - Plugins/registry: ignore stale persisted registry reads when plugin policy no longer matches current config, and stamp generated registry files with a do-not-edit warning. Thanks @vincentkoc.
 - Diagnostics/OTEL: surface provider request identifiers as bounded hashes on model-call diagnostics and span events, without exporting raw request IDs or metric labels. Thanks @Lidang-Jiang and @vincentkoc.
 - Plugins/diagnostics: add metadata-only `model_call_started` and `model_call_ended` hooks for provider/model call telemetry without exposing prompts, responses, headers, request bodies, or raw provider request IDs. Thanks @vincentkoc.
 - Diagnostics/OTEL: emit bounded context assembly diagnostics and export `openclaw.context.assembled` spans with prompt/history sizes but no prompt, history, response, or session-key content. Thanks @vincentkoc.
 - Diagnostics/OTEL: add bounded outbound message delivery lifecycle diagnostics and export them as low-cardinality delivery spans/metrics without message body, recipient, room, or media-path data. (#71471) Thanks @vincentkoc and @jlapenna.
 - Diagnostics/OTEL: emit bounded exec-process diagnostics and export them as `openclaw.exec` spans without exposing command text, working directories, or container identifiers. (#71451) Thanks @vincentkoc and @jlapenna.
 - Diagnostics/OTEL: support `OPENCLAW_OTEL_PRELOADED=1` so the plugin can reuse an already-registered OpenTelemetry SDK while keeping OpenClaw diagnostic listeners wired. (#71450) Thanks @vincentkoc and @jlapenna.
--- a/extensions/diagnostics-otel/src/service.test.ts
+++ b/extensions/diagnostics-otel/src/service.test.ts
@@ -989,6 +989,67 @@ describe("diagnostics-otel service", () => {
    await service.stop?.(ctx);
  });
  test("exports trusted context assembly spans without prompt content", async () => {
    const service = createDiagnosticsOtelService();
    const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { traces: true, metrics: true });
    await service.start(ctx);
    emitTrustedDiagnosticEvent({
      type: "context.assembled",
      runId: "run-1",
      sessionKey: "session-key",
      sessionId: "session-id",
      provider: "openai",
      model: "gpt-5.4",
      channel: "webchat",
      trigger: "message",
      messageCount: 12,
      historyTextChars: 1234,
      historyImageBlocks: 2,
      maxMessageTextChars: 456,
      systemPromptChars: 789,
      promptChars: 42,
      promptImages: 1,
      contextTokenBudget: 128_000,
      reserveTokens: 4096,
      trace: {
        traceId: TRACE_ID,
        spanId: GRANDCHILD_SPAN_ID,
        parentSpanId: SPAN_ID,
        traceFlags: "01",
      },
    });
    await flushDiagnosticEvents();
    const contextCall = telemetryState.tracer.startSpan.mock.calls.find(
      (call) => call[0] === "openclaw.context.assembled",
    );
    expect(contextCall?.[1]).toMatchObject({
      attributes: {
        "openclaw.provider": "openai",
        "openclaw.model": "gpt-5.4",
        "openclaw.channel": "webchat",
        "openclaw.trigger": "message",
        "openclaw.context.message_count": 12,
        "openclaw.context.history_text_chars": 1234,
        "openclaw.context.history_image_blocks": 2,
        "openclaw.context.max_message_text_chars": 456,
        "openclaw.context.system_prompt_chars": 789,
        "openclaw.context.prompt_chars": 42,
        "openclaw.context.prompt_images": 1,
        "openclaw.context.token_budget": 128_000,
        "openclaw.context.reserve_tokens": 4096,
      },
    });
    expect(JSON.stringify(contextCall)).not.toContain("session-key");
    expect(JSON.stringify(contextCall)).not.toContain("prompt text");
    expect(telemetryState.tracer.setSpanContext).toHaveBeenCalledWith(
      expect.anything(),
      expect.objectContaining({ traceId: TRACE_ID, spanId: SPAN_ID }),
    );
    await service.stop?.(ctx);
  });
  test("parents trusted diagnostic lifecycle spans from explicit parent ids", async () => {
    const service = createDiagnosticsOtelService();
    const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { traces: true, metrics: true });
--- a/extensions/diagnostics-otel/src/service.ts
+++ b/extensions/diagnostics-otel/src/service.ts
@@ -1129,6 +1129,36 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
        span.end(evt.ts);
      };
      const recordContextAssembled = (
        evt: Extract<DiagnosticEventPayload, { type: "context.assembled" }>,
        metadata: DiagnosticEventMetadata,
      ) => {
        if (!tracesEnabled) {
          return;
        }
        const spanAttrs: Record<string, string | number | boolean> = {
          "openclaw.context.message_count": evt.messageCount,
          "openclaw.context.history_text_chars": evt.historyTextChars,
          "openclaw.context.history_image_blocks": evt.historyImageBlocks,
          "openclaw.context.max_message_text_chars": evt.maxMessageTextChars,
          "openclaw.context.system_prompt_chars": evt.systemPromptChars,
          "openclaw.context.prompt_chars": evt.promptChars,
          "openclaw.context.prompt_images": evt.promptImages,
        };
        addRunAttrs(spanAttrs, evt);
        if (evt.contextTokenBudget !== undefined) {
          spanAttrs["openclaw.context.token_budget"] = evt.contextTokenBudget;
        }
        if (evt.reserveTokens !== undefined) {
          spanAttrs["openclaw.context.reserve_tokens"] = evt.reserveTokens;
        }
        const span = spanWithDuration("openclaw.context.assembled", spanAttrs, 0, {
          parentContext: contextForTrustedDiagnosticSpanParent(evt, metadata),
          endTimeMs: evt.ts,
        });
        span.end(evt.ts);
      };
      const modelCallMetricAttrs = (evt: ModelCallLifecycleDiagnosticEvent) => ({
        "openclaw.provider": evt.provider,
        "openclaw.model": evt.model,
@@ -1383,6 +1413,9 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
            case "run.completed":
              recordRunCompleted(evt, metadata);
              return;
            case "context.assembled":
              recordContextAssembled(evt, metadata);
              return;
            case "model.call.completed":
              recordModelCallCompleted(evt, metadata);
              return;
--- a/src/agents/pi-embedded-runner/run/attempt.ts
+++ b/src/agents/pi-embedded-runner/run/attempt.ts
@@ -2427,12 +2427,37 @@ export async function runEmbeddedAttempt(
            });
          }
          const msgCount = activeSession.messages.length;
          const systemLen = systemPromptText?.length ?? 0;
          const promptLen = effectivePrompt.length;
          const sessionSummary = summarizeSessionContext(activeSession.messages);
          const reserveTokens = settingsManager.getCompactionReserveTokens();
          const contextTokenBudget = params.contextTokenBudget ?? DEFAULT_CONTEXT_TOKENS;
          emitTrustedDiagnosticEvent({
            type: "context.assembled",
            runId: params.runId,
            ...(params.sessionKey && { sessionKey: params.sessionKey }),
            ...(params.sessionId && { sessionId: params.sessionId }),
            provider: params.provider,
            model: params.modelId,
            ...((params.messageChannel ?? params.messageProvider)
              ? { channel: params.messageChannel ?? params.messageProvider }
              : {}),
            trigger: params.trigger,
            messageCount: msgCount,
            historyTextChars: sessionSummary.totalTextChars,
            historyImageBlocks: sessionSummary.totalImageBlocks,
            maxMessageTextChars: sessionSummary.maxMessageTextChars,
            systemPromptChars: systemLen,
            promptChars: promptLen,
            promptImages: imageResult.images.length,
            contextTokenBudget,
            reserveTokens,
            trace: freezeDiagnosticTraceContext(createChildDiagnosticTraceContext(runTrace)),
          });
          // Diagnostic: log context sizes before prompt to help debug early overflow errors.
          if (log.isEnabled("debug")) {
            const msgCount = activeSession.messages.length;
            const systemLen = systemPromptText?.length ?? 0;
            const promptLen = effectivePrompt.length;
            const sessionSummary = summarizeSessionContext(activeSession.messages);
            log.debug(
              `[context-diag] pre-prompt: sessionKey=${params.sessionKey ?? params.sessionId} ` +
                `messages=${msgCount} roleCounts=${sessionSummary.roleCounts} ` +
@@ -2475,8 +2500,6 @@ export async function runEmbeddedAttempt(
              });
          }
          const reserveTokens = settingsManager.getCompactionReserveTokens();
          const contextTokenBudget = params.contextTokenBudget ?? DEFAULT_CONTEXT_TOKENS;
          const preemptiveCompaction = shouldPreemptivelyCompactBeforePrompt({
            messages: activeSession.messages,
            unwindowedMessages: unwindowedContextEngineMessagesForPrecheck,
--- a/src/infra/diagnostic-events.ts
+++ b/src/infra/diagnostic-events.ts
@@ -280,6 +280,26 @@ export type DiagnosticModelCallErrorEvent = DiagnosticModelCallBaseEvent & {
  errorCategory: string;
 };
 export type DiagnosticContextAssembledEvent = DiagnosticBaseEvent & {
  type: "context.assembled";
  runId: string;
  sessionKey?: string;
  sessionId?: string;
  provider: string;
  model: string;
  channel?: string;
  trigger?: string;
  messageCount: number;
  historyTextChars: number;
  historyImageBlocks: number;
  maxMessageTextChars: number;
  systemPromptChars: number;
  promptChars: number;
  promptImages: number;
  contextTokenBudget?: number;
  reserveTokens?: number;
 };
 export type DiagnosticMemoryUsage = {
  rssBytes: number;
  heapTotalBytes: number;
@@ -355,6 +375,7 @@ export type DiagnosticEventPayload =
  | DiagnosticModelCallStartedEvent
  | DiagnosticModelCallCompletedEvent
  | DiagnosticModelCallErrorEvent
  | DiagnosticContextAssembledEvent
  | DiagnosticMemorySampleEvent
  | DiagnosticMemoryPressureEvent
  | DiagnosticPayloadLargeEvent
@@ -401,6 +422,7 @@ const ASYNC_DIAGNOSTIC_EVENT_TYPES = new Set<DiagnosticEventPayload["type"]>([
  "model.call.started",
  "model.call.completed",
  "model.call.error",
  "context.assembled",
  "log.record",
 ]);