From 82ddcf24f5875224d2afe559147a0a621ce8c383 Mon Sep 17 00:00:00 2001
From: Vincent Koc <vincentkoc@ieee.org>
Date: Sat, 25 Apr 2026 23:34:03 -0700
Subject: [PATCH] feat(diagnostics): add harness lifecycle telemetry

---
 CHANGELOG.md                                  |   1 +
 docs/concepts/qa-e2e-automation.md            |   6 +-
 .../diagnostics-otel/src/service.test.ts      |  78 +++++++++-
 extensions/diagnostics-otel/src/service.ts    |  91 +++++++++++
 qa/scenarios/runtime/otel-trace-smoke.md      |   2 +
 scripts/qa-otel-smoke.ts                      |   1 +
 src/agents/harness/v2.test.ts                 | 147 +++++++++++++++++-
 src/agents/harness/v2.ts                      | 121 +++++++++++++-
 src/infra/diagnostic-events.ts                |  47 ++++++
 src/logging/diagnostic-stability.ts           |  28 ++++
 10 files changed, 516 insertions(+), 6 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4512e18e12f..7d14dff836d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -26,6 +26,7 @@ Docs: https://docs.openclaw.ai
 - Diagnostics/OTEL: align model-call GenAI span attributes with OpenTelemetry stability opt-in semantics, keeping legacy `gen_ai.system` by default while emitting `gen_ai.provider.name` under `OTEL_SEMCONV_STABILITY_OPT_IN=gen_ai_latest_experimental`. Thanks @vincentkoc.
 - Diagnostics/OTEL: support signal-specific OTLP endpoint overrides for traces, metrics, and logs via config or standard OTEL environment variables. Thanks @vincentkoc.
 - Diagnostics/OTEL: emit bounded telemetry exporter health diagnostics for startup and log-export failures without exporting raw error text. Thanks @vincentkoc.
+- Diagnostics/OTEL: export agent harness lifecycle telemetry as bounded `openclaw.harness.run` spans and `openclaw.harness.duration_ms` metrics so QA-lab, Codex, and future harnesses share one trace shape. Thanks @vincentkoc.
 - Plugins/CLI: add `openclaw plugins registry` for explicit persisted-registry inspection and `--refresh` repair without making normal startup rescan plugin locations. Thanks @vincentkoc.
 - Plugins/CLI: make `openclaw plugins list` read the cold persisted registry snapshot by default, leaving module-aware diagnostics to `plugins doctor` and `plugins inspect`. Thanks @vincentkoc.
 - Plugins/startup: move gateway startup plugin planning onto the versioned cold registry index, with postinstall repair for older registry files that predate startup metadata. Thanks @vincentkoc.
diff --git a/docs/concepts/qa-e2e-automation.md b/docs/concepts/qa-e2e-automation.md
index 272d16a185a..d56e546b6f1 100644
--- a/docs/concepts/qa-e2e-automation.md
+++ b/docs/concepts/qa-e2e-automation.md
@@ -59,9 +59,9 @@ pnpm qa:otel:smoke
 That script starts a local OTLP/HTTP trace receiver, runs the
 `otel-trace-smoke` QA scenario with the `diagnostics-otel` plugin enabled, then
 decodes the exported protobuf spans and asserts the release-critical shape:
-`openclaw.run`, `openclaw.model.call`, `openclaw.context.assembled`, and
-`openclaw.message.delivery` must be present; model calls must not export
-`StreamAbandoned` on successful turns; raw diagnostic IDs and
+`openclaw.run`, `openclaw.harness.run`, `openclaw.model.call`,
+`openclaw.context.assembled`, and `openclaw.message.delivery` must be present;
+model calls must not export `StreamAbandoned` on successful turns; raw diagnostic IDs and
 `openclaw.content.*` attributes must stay out of the trace. It writes
 `otel-smoke-summary.json` next to the QA suite artifacts.
 
diff --git a/extensions/diagnostics-otel/src/service.test.ts b/extensions/diagnostics-otel/src/service.test.ts
index ff88364d756..0cf3b373e71 100644
--- a/extensions/diagnostics-otel/src/service.test.ts
+++ b/extensions/diagnostics-otel/src/service.test.ts
@@ -1140,6 +1140,28 @@ describe("diagnostics-otel service", () => {
         traceFlags: "01",
       },
     });
+    emitDiagnosticEvent({
+      type: "harness.run.completed",
+      runId: "run-1",
+      sessionKey: "session-key",
+      sessionId: "session-1",
+      provider: "codex",
+      model: "gpt-5.4",
+      channel: "qa",
+      harnessId: "codex",
+      pluginId: "codex-plugin",
+      outcome: "completed",
+      durationMs: 90,
+      resultClassification: "reasoning-only",
+      yieldDetected: true,
+      itemLifecycle: { startedCount: 3, completedCount: 2, activeCount: 1 },
+      trace: {
+        traceId: TRACE_ID,
+        spanId: GRANDCHILD_SPAN_ID,
+        parentSpanId: CHILD_SPAN_ID,
+        traceFlags: "01",
+      },
+    });
     emitDiagnosticEvent({
       type: "tool.execution.error",
       runId: "run-1",
@@ -1160,7 +1182,12 @@ describe("diagnostics-otel service", () => {
 
     const spanNames = telemetryState.tracer.startSpan.mock.calls.map((call) => call[0]);
     expect(spanNames).toEqual(
-      expect.arrayContaining(["openclaw.run", "openclaw.model.call", "openclaw.tool.execution"]),
+      expect.arrayContaining([
+        "openclaw.run",
+        "openclaw.model.call",
+        "openclaw.harness.run",
+        "openclaw.tool.execution",
+      ]),
     );
 
     const runCall = telemetryState.tracer.startSpan.mock.calls.find(
@@ -1207,6 +1234,36 @@ describe("diagnostics-otel service", () => {
     });
     expect(modelCall?.[2]).toBeUndefined();
 
+    const harnessCall = telemetryState.tracer.startSpan.mock.calls.find(
+      (call) => call[0] === "openclaw.harness.run",
+    );
+    expect(harnessCall?.[1]).toMatchObject({
+      attributes: {
+        "openclaw.harness.id": "codex",
+        "openclaw.harness.plugin": "codex-plugin",
+        "openclaw.outcome": "completed",
+        "openclaw.provider": "codex",
+        "openclaw.model": "gpt-5.4",
+        "openclaw.channel": "qa",
+        "openclaw.harness.result_classification": "reasoning-only",
+        "openclaw.harness.yield_detected": true,
+        "openclaw.harness.items.started": 3,
+        "openclaw.harness.items.completed": 2,
+        "openclaw.harness.items.active": 1,
+      },
+      startTime: expect.any(Number),
+    });
+    expect(harnessCall?.[1]).toEqual({
+      attributes: expect.not.objectContaining({
+        "openclaw.runId": expect.anything(),
+        "openclaw.sessionId": expect.anything(),
+        "openclaw.sessionKey": expect.anything(),
+        "openclaw.traceId": expect.anything(),
+      }),
+      startTime: expect.any(Number),
+    });
+    expect(harnessCall?.[2]).toBeUndefined();
+
     const toolCall = telemetryState.tracer.startSpan.mock.calls.find(
       (call) => call[0] === "openclaw.tool.execution",
     );
@@ -1244,6 +1301,25 @@ describe("diagnostics-otel service", () => {
         "openclaw.runId": expect.anything(),
       }),
     );
+    expect(
+      telemetryState.histograms.get("openclaw.harness.duration_ms")?.record,
+    ).toHaveBeenCalledWith(
+      90,
+      expect.objectContaining({
+        "openclaw.harness.id": "codex",
+        "openclaw.harness.plugin": "codex-plugin",
+        "openclaw.outcome": "completed",
+      }),
+    );
+    expect(
+      telemetryState.histograms.get("openclaw.harness.duration_ms")?.record,
+    ).toHaveBeenCalledWith(
+      90,
+      expect.not.objectContaining({
+        "openclaw.runId": expect.anything(),
+        "openclaw.sessionKey": expect.anything(),
+      }),
+    );
     expect(
       telemetryState.histograms.get("openclaw.tool.execution.duration_ms")?.record,
     ).toHaveBeenCalledWith(
diff --git a/extensions/diagnostics-otel/src/service.ts b/extensions/diagnostics-otel/src/service.ts
index 1fa67c694dc..c34ab6f1c6c 100644
--- a/extensions/diagnostics-otel/src/service.ts
+++ b/extensions/diagnostics-otel/src/service.ts
@@ -81,6 +81,10 @@ type ModelCallLifecycleDiagnosticEvent = Extract<
   DiagnosticEventPayload,
   { type: "model.call.completed" | "model.call.error" }
 >;
+type HarnessRunLifecycleDiagnosticEvent = Extract<
+  DiagnosticEventPayload,
+  { type: "harness.run.completed" | "harness.run.error" }
+>;
 type TelemetryExporterDiagnosticEvent = Extract<
   DiagnosticEventPayload,
   { type: "telemetry.exporter" }
@@ -720,6 +724,10 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
         unit: "ms",
         description: "Agent run duration",
       });
+      const harnessDurationHistogram = meter.createHistogram("openclaw.harness.duration_ms", {
+        unit: "ms",
+        description: "Agent harness lifecycle duration",
+      });
       const contextHistogram = meter.createHistogram("openclaw.context.tokens", {
         unit: "1",
         description: "Context window size and usage",
@@ -1426,6 +1434,82 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
         span.end(evt.ts);
       };
 
+      const harnessRunMetricAttrs = (evt: HarnessRunLifecycleDiagnosticEvent) => ({
+        "openclaw.harness.id": lowCardinalityAttr(evt.harnessId, "unknown"),
+        "openclaw.harness.plugin": lowCardinalityAttr(evt.pluginId),
+        "openclaw.outcome": evt.type === "harness.run.error" ? "error" : evt.outcome,
+        "openclaw.provider": lowCardinalityAttr(evt.provider, "unknown"),
+        "openclaw.model": lowCardinalityAttr(evt.model, "unknown"),
+        ...(evt.channel ? { "openclaw.channel": lowCardinalityAttr(evt.channel) } : {}),
+      });
+
+      const recordHarnessRunCompleted = (
+        evt: Extract<DiagnosticEventPayload, { type: "harness.run.completed" }>,
+        metadata: DiagnosticEventMetadata,
+      ) => {
+        harnessDurationHistogram.record(evt.durationMs, harnessRunMetricAttrs(evt));
+        if (!tracesEnabled) {
+          return;
+        }
+        const spanAttrs: Record<string, string | number | boolean> = {
+          ...harnessRunMetricAttrs(evt),
+        };
+        if (evt.resultClassification) {
+          spanAttrs["openclaw.harness.result_classification"] = lowCardinalityAttr(
+            evt.resultClassification,
+          );
+        }
+        if (typeof evt.yieldDetected === "boolean") {
+          spanAttrs["openclaw.harness.yield_detected"] = evt.yieldDetected;
+        }
+        if (evt.itemLifecycle) {
+          spanAttrs["openclaw.harness.items.started"] = evt.itemLifecycle.startedCount;
+          spanAttrs["openclaw.harness.items.completed"] = evt.itemLifecycle.completedCount;
+          spanAttrs["openclaw.harness.items.active"] = evt.itemLifecycle.activeCount;
+        }
+        const span = spanWithDuration("openclaw.harness.run", spanAttrs, evt.durationMs, {
+          parentContext: contextForTrustedDiagnosticSpanParent(evt, metadata),
+          endTimeMs: evt.ts,
+        });
+        if (evt.outcome === "error") {
+          span.setStatus({
+            code: SpanStatusCode.ERROR,
+            message: "error",
+          });
+        }
+        span.end(evt.ts);
+      };
+
+      const recordHarnessRunError = (
+        evt: Extract<DiagnosticEventPayload, { type: "harness.run.error" }>,
+        metadata: DiagnosticEventMetadata,
+      ) => {
+        const errorType = lowCardinalityAttr(evt.errorCategory, "other");
+        const attrs = {
+          ...harnessRunMetricAttrs(evt),
+          "openclaw.harness.phase": evt.phase,
+          "openclaw.errorCategory": errorType,
+        };
+        harnessDurationHistogram.record(evt.durationMs, attrs);
+        if (!tracesEnabled) {
+          return;
+        }
+        const spanAttrs: Record<string, string | number | boolean> = {
+          ...attrs,
+          "error.type": errorType,
+          ...(evt.cleanupFailed ? { "openclaw.harness.cleanup_failed": true } : {}),
+        };
+        const span = spanWithDuration("openclaw.harness.run", spanAttrs, evt.durationMs, {
+          parentContext: contextForTrustedDiagnosticSpanParent(evt, metadata),
+          endTimeMs: evt.ts,
+        });
+        span.setStatus({
+          code: SpanStatusCode.ERROR,
+          message: errorType,
+        });
+        span.end(evt.ts);
+      };
+
       const recordContextAssembled = (
         evt: Extract<DiagnosticEventPayload, { type: "context.assembled" }>,
         metadata: DiagnosticEventMetadata,
@@ -1746,6 +1830,12 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
             case "run.completed":
               recordRunCompleted(evt, metadata);
               return;
+            case "harness.run.completed":
+              recordHarnessRunCompleted(evt, metadata);
+              return;
+            case "harness.run.error":
+              recordHarnessRunError(evt, metadata);
+              return;
             case "context.assembled":
               recordContextAssembled(evt, metadata);
               return;
@@ -1781,6 +1871,7 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
               return;
             case "tool.execution.started":
             case "run.started":
+            case "harness.run.started":
             case "model.call.started":
             case "payload.large":
               return;
diff --git a/qa/scenarios/runtime/otel-trace-smoke.md b/qa/scenarios/runtime/otel-trace-smoke.md
index bafd3b39f86..f4a94e70b3f 100644
--- a/qa/scenarios/runtime/otel-trace-smoke.md
+++ b/qa/scenarios/runtime/otel-trace-smoke.md
@@ -13,6 +13,7 @@ objective: Verify a QA-lab gateway run emits bounded OpenTelemetry trace spans t
 successCriteria:
   - The diagnostics-otel plugin starts with trace export enabled.
   - A minimal QA-channel agent turn completes.
+  - The trace includes the selected agent harness lifecycle span.
   - The run emits low-cardinality OpenTelemetry trace spans without content or raw diagnostic identifiers.
 plugins:
   - diagnostics-otel
@@ -33,6 +34,7 @@ docsRefs:
   - docs/concepts/qa-e2e-automation.md
 codeRefs:
   - extensions/diagnostics-otel/src/service.ts
+  - src/agents/harness/v2.ts
   - extensions/qa-lab/src/suite.ts
 execution:
   kind: flow
diff --git a/scripts/qa-otel-smoke.ts b/scripts/qa-otel-smoke.ts
index d5444ba9562..a1660e8fedc 100644
--- a/scripts/qa-otel-smoke.ts
+++ b/scripts/qa-otel-smoke.ts
@@ -80,6 +80,7 @@ type CapturedSpan = {
 const DEFAULT_SCENARIO_ID = "otel-trace-smoke";
 const REQUIRED_SPAN_NAMES = [
   "openclaw.run",
+  "openclaw.harness.run",
   "openclaw.model.call",
   "openclaw.context.assembled",
   "openclaw.message.delivery",
diff --git a/src/agents/harness/v2.test.ts b/src/agents/harness/v2.test.ts
index dc6f196715c..e8ea68a9897 100644
--- a/src/agents/harness/v2.test.ts
+++ b/src/agents/harness/v2.test.ts
@@ -1,5 +1,11 @@
 import type { Api, Model } from "@mariozechner/pi-ai";
-import { describe, expect, it, vi } from "vitest";
+import { afterEach, describe, expect, it, vi } from "vitest";
+import {
+  onInternalDiagnosticEvent,
+  resetDiagnosticEventsForTest,
+  type DiagnosticEventMetadata,
+  type DiagnosticEventPayload,
+} from "../../infra/diagnostic-events.js";
 import type { EmbeddedRunAttemptResult } from "../pi-embedded-runner/run/types.js";
 import type { AgentHarness, AgentHarnessAttemptParams } from "./types.js";
 import type { AgentHarnessV2 } from "./v2.js";
@@ -9,6 +15,7 @@ function createAttemptParams(): AgentHarnessAttemptParams {
   return {
     prompt: "hello",
     sessionId: "session-1",
+    sessionKey: "session-key",
     runId: "run-1",
     sessionFile: "/tmp/session.jsonl",
     workspaceDir: "/tmp/workspace",
@@ -19,9 +26,19 @@ function createAttemptParams(): AgentHarnessAttemptParams {
     authStorage: {} as never,
     modelRegistry: {} as never,
     thinkLevel: "low",
+    messageChannel: "qa",
+    trigger: "manual",
   } as AgentHarnessAttemptParams;
 }
 
+function createDiagnosticTrace() {
+  return {
+    traceId: "11111111111111111111111111111111",
+    spanId: "2222222222222222",
+    traceFlags: "01",
+  };
+}
+
 function createAttemptResult(): EmbeddedRunAttemptResult {
   return {
     aborted: false,
@@ -32,6 +49,7 @@ function createAttemptResult(): EmbeddedRunAttemptResult {
     promptError: null,
     promptErrorSource: null,
     sessionIdUsed: "session-1",
+    diagnosticTrace: createDiagnosticTrace(),
     messagesSnapshot: [],
     assistantTexts: ["ok"],
     toolMetas: [],
@@ -46,7 +64,28 @@ function createAttemptResult(): EmbeddedRunAttemptResult {
   };
 }
 
+async function flushDiagnosticEvents(): Promise<void> {
+  await new Promise<void>((resolve) => setImmediate(resolve));
+}
+
+function captureDiagnosticEvents(): {
+  events: Array<{ event: DiagnosticEventPayload; metadata: DiagnosticEventMetadata }>;
+  unsubscribe: () => void;
+} {
+  const events: Array<{ event: DiagnosticEventPayload; metadata: DiagnosticEventMetadata }> = [];
+  const unsubscribe = onInternalDiagnosticEvent((event, metadata) => {
+    if (event.type.startsWith("harness.run.")) {
+      events.push({ event, metadata });
+    }
+  });
+  return { events, unsubscribe };
+}
+
 describe("AgentHarness V2 compatibility adapter", () => {
+  afterEach(() => {
+    resetDiagnosticEventsForTest();
+  });
+
   it("executes prepare/start/send/outcome/cleanup as one bounded lifecycle", async () => {
     const params = createAttemptParams();
     const result = createAttemptResult();
@@ -102,6 +141,112 @@ describe("AgentHarness V2 compatibility adapter", () => {
     ]);
   });
 
+  it("emits trusted harness lifecycle diagnostics for successful attempts", async () => {
+    resetDiagnosticEventsForTest();
+    const params = createAttemptParams();
+    const result = {
+      ...createAttemptResult(),
+      agentHarnessResultClassification: "reasoning-only",
+      yieldDetected: true,
+      itemLifecycle: { startedCount: 3, completedCount: 2, activeCount: 1 },
+    } as EmbeddedRunAttemptResult;
+    const harness: AgentHarnessV2 = {
+      id: "codex",
+      label: "Codex",
+      pluginId: "codex-plugin",
+      supports: () => ({ supported: true }),
+      prepare: async () => ({
+        harnessId: "codex",
+        label: "Codex",
+        pluginId: "codex-plugin",
+        params,
+        lifecycleState: "prepared",
+      }),
+      start: async (prepared) => ({ ...prepared, lifecycleState: "started" }),
+      send: async () => result,
+      resolveOutcome: async (_session, rawResult) => rawResult,
+      cleanup: async () => {},
+    };
+    const diagnostics = captureDiagnosticEvents();
+    try {
+      await runAgentHarnessV2LifecycleAttempt(harness, params);
+      await flushDiagnosticEvents();
+    } finally {
+      diagnostics.unsubscribe();
+    }
+
+    expect(diagnostics.events.map(({ event }) => event.type)).toEqual([
+      "harness.run.started",
+      "harness.run.completed",
+    ]);
+    expect(diagnostics.events.every(({ metadata }) => metadata.trusted)).toBe(true);
+    expect(diagnostics.events[1]?.event).toMatchObject({
+      type: "harness.run.completed",
+      runId: "run-1",
+      sessionKey: "session-key",
+      sessionId: "session-1",
+      provider: "codex",
+      model: "gpt-5.4",
+      channel: "qa",
+      trigger: "manual",
+      harnessId: "codex",
+      pluginId: "codex-plugin",
+      outcome: "completed",
+      resultClassification: "reasoning-only",
+      yieldDetected: true,
+      itemLifecycle: { startedCount: 3, completedCount: 2, activeCount: 1 },
+      durationMs: expect.any(Number),
+    });
+  });
+
+  it("emits trusted harness error diagnostics with the failing lifecycle phase", async () => {
+    resetDiagnosticEventsForTest();
+    const params = createAttemptParams();
+    const sendError = new Error("codex app-server send failed");
+    const harness: AgentHarnessV2 = {
+      id: "codex",
+      label: "Codex",
+      supports: () => ({ supported: true }),
+      prepare: async () => ({
+        harnessId: "codex",
+        label: "Codex",
+        params,
+        lifecycleState: "prepared",
+      }),
+      start: async (prepared) => ({ ...prepared, lifecycleState: "started" }),
+      send: async () => {
+        throw sendError;
+      },
+      resolveOutcome: async (_session, rawResult) => rawResult,
+      cleanup: async () => {
+        throw new Error("cleanup failed");
+      },
+    };
+    const diagnostics = captureDiagnosticEvents();
+    try {
+      await expect(runAgentHarnessV2LifecycleAttempt(harness, params)).rejects.toThrow(
+        "codex app-server send failed",
+      );
+      await flushDiagnosticEvents();
+    } finally {
+      diagnostics.unsubscribe();
+    }
+
+    expect(diagnostics.events.map(({ event }) => event.type)).toEqual([
+      "harness.run.started",
+      "harness.run.error",
+    ]);
+    expect(diagnostics.events.every(({ metadata }) => metadata.trusted)).toBe(true);
+    expect(diagnostics.events[1]?.event).toMatchObject({
+      type: "harness.run.error",
+      phase: "send",
+      errorCategory: "Error",
+      cleanupFailed: true,
+      harnessId: "codex",
+      durationMs: expect.any(Number),
+    });
+  });
+
   it("runs cleanup with the original failure and preserves that failure", async () => {
     const params = createAttemptParams();
     const sendError = new Error("codex app-server send failed");
diff --git a/src/agents/harness/v2.ts b/src/agents/harness/v2.ts
index 0993bddda1f..bf14a314cdc 100644
--- a/src/agents/harness/v2.ts
+++ b/src/agents/harness/v2.ts
@@ -1,3 +1,10 @@
+import { diagnosticErrorCategory } from "../../infra/diagnostic-error-metadata.js";
+import {
+  emitTrustedDiagnosticEvent,
+  type DiagnosticHarnessRunErrorEvent,
+  type DiagnosticHarnessRunOutcome,
+} from "../../infra/diagnostic-events.js";
+import type { DiagnosticTraceContext } from "../../infra/diagnostic-trace-context.js";
 import { formatErrorMessage } from "../../infra/errors.js";
 import { createSubsystemLogger } from "../../logging/subsystem.js";
 import { applyAgentHarnessResultClassification } from "./result-classification.js";
@@ -13,6 +20,7 @@ import type {
 } from "./types.js";
 
 const log = createSubsystemLogger("agents/harness/v2");
+type AgentHarnessV2LifecyclePhase = DiagnosticHarnessRunErrorEvent["phase"];
 
 type AgentHarnessV2RunBase = {
   harnessId: string;
@@ -95,6 +103,87 @@ export function adaptAgentHarnessToV2(harness: AgentHarness): AgentHarnessV2 {
   };
 }
 
+function agentHarnessDiagnosticBase(
+  harness: AgentHarnessV2,
+  params: AgentHarnessAttemptParams,
+  trace?: DiagnosticTraceContext,
+) {
+  return {
+    runId: params.runId,
+    sessionId: params.sessionId,
+    provider: params.provider,
+    model: params.modelId,
+    harnessId: harness.id,
+    ...(harness.pluginId ? { pluginId: harness.pluginId } : {}),
+    ...(params.sessionKey ? { sessionKey: params.sessionKey } : {}),
+    ...(params.trigger ? { trigger: params.trigger } : {}),
+    ...(params.messageChannel ? { channel: params.messageChannel } : {}),
+    ...(trace ? { trace } : {}),
+  };
+}
+
+function agentHarnessRunOutcome(result: AgentHarnessAttemptResult): DiagnosticHarnessRunOutcome {
+  if (result.promptError) {
+    return "error";
+  }
+  if (result.externalAbort || result.aborted) {
+    return "aborted";
+  }
+  if (result.timedOut || result.idleTimedOut || result.timedOutDuringCompaction) {
+    return "timed_out";
+  }
+  return "completed";
+}
+
+function emitAgentHarnessRunStarted(
+  harness: AgentHarnessV2,
+  params: AgentHarnessAttemptParams,
+): void {
+  emitTrustedDiagnosticEvent({
+    type: "harness.run.started",
+    ...agentHarnessDiagnosticBase(harness, params),
+  });
+}
+
+function emitAgentHarnessRunCompleted(params: {
+  harness: AgentHarnessV2;
+  attemptParams: AgentHarnessAttemptParams;
+  result: AgentHarnessAttemptResult;
+  startedAt: number;
+}): void {
+  const { harness, attemptParams, result, startedAt } = params;
+  emitTrustedDiagnosticEvent({
+    type: "harness.run.completed",
+    ...agentHarnessDiagnosticBase(harness, attemptParams, result.diagnosticTrace),
+    durationMs: Date.now() - startedAt,
+    outcome: agentHarnessRunOutcome(result),
+    ...(result.agentHarnessResultClassification
+      ? { resultClassification: result.agentHarnessResultClassification }
+      : {}),
+    ...(typeof result.yieldDetected === "boolean" ? { yieldDetected: result.yieldDetected } : {}),
+    itemLifecycle: { ...result.itemLifecycle },
+  });
+}
+
+function emitAgentHarnessRunError(params: {
+  harness: AgentHarnessV2;
+  attemptParams: AgentHarnessAttemptParams;
+  startedAt: number;
+  phase: AgentHarnessV2LifecyclePhase;
+  error: unknown;
+  cleanupFailed?: boolean;
+}): void {
+  const { harness, attemptParams, startedAt, phase, error, cleanupFailed } = params;
+  emitTrustedDiagnosticEvent({
+    type: "harness.run.error",
+    ...agentHarnessDiagnosticBase(harness, attemptParams),
+    durationMs: Date.now() - startedAt,
+    phase,
+    errorCategory: diagnosticErrorCategory(error),
+    ...(cleanupFailed ? { cleanupFailed: true } : {}),
+  });
+}
+
 export async function runAgentHarnessV2LifecycleAttempt(
   harness: AgentHarnessV2,
   params: AgentHarnessAttemptParams,
@@ -103,13 +192,21 @@ export async function runAgentHarnessV2LifecycleAttempt(
   let session: AgentHarnessV2Session | undefined;
   let rawResult: AgentHarnessAttemptResult | undefined;
   let result: AgentHarnessAttemptResult;
+  let phase: AgentHarnessV2LifecyclePhase = "prepare";
+  const startedAt = Date.now();
 
+  emitAgentHarnessRunStarted(harness, params);
   try {
+    phase = "prepare";
     prepared = await harness.prepare(params);
+    phase = "start";
     session = await harness.start(prepared);
+    phase = "send";
     rawResult = await harness.send(session);
+    phase = "resolve";
     result = await harness.resolveOutcome(session, rawResult);
   } catch (error) {
+    let cleanupFailed = false;
     try {
       await harness.cleanup({
         prepared,
@@ -118,6 +215,7 @@ export async function runAgentHarnessV2LifecycleAttempt(
         ...(rawResult === undefined ? {} : { result: rawResult }),
       });
     } catch (cleanupError) {
+      cleanupFailed = true;
       // Preserve the user-visible harness failure. Cleanup errors after a
       // failed lifecycle stage must not mask the actionable runtime error.
       log.warn("agent harness cleanup failed after attempt failure", {
@@ -128,9 +226,30 @@ export async function runAgentHarnessV2LifecycleAttempt(
         originalError: formatErrorMessage(error),
       });
     }
+    emitAgentHarnessRunError({
+      harness,
+      attemptParams: params,
+      startedAt,
+      phase,
+      error,
+      cleanupFailed,
+    });
     throw error;
   }
 
-  await harness.cleanup({ prepared, session, result });
+  try {
+    phase = "cleanup";
+    await harness.cleanup({ prepared, session, result });
+  } catch (error) {
+    emitAgentHarnessRunError({
+      harness,
+      attemptParams: params,
+      startedAt,
+      phase,
+      error,
+    });
+    throw error;
+  }
+  emitAgentHarnessRunCompleted({ harness, attemptParams: params, result, startedAt });
   return result;
 }
diff --git a/src/infra/diagnostic-events.ts b/src/infra/diagnostic-events.ts
index b93b44d9b64..d6ed9ebd091 100644
--- a/src/infra/diagnostic-events.ts
+++ b/src/infra/diagnostic-events.ts
@@ -256,6 +256,47 @@ export type DiagnosticRunCompletedEvent = DiagnosticRunBaseEvent & {
   errorCategory?: string;
 };
 
+export type DiagnosticHarnessRunPhase = "prepare" | "start" | "send" | "resolve" | "cleanup";
+export type DiagnosticHarnessRunOutcome = "completed" | "aborted" | "timed_out" | "error";
+
+type DiagnosticHarnessRunBaseEvent = DiagnosticBaseEvent & {
+  type: "harness.run.started" | "harness.run.completed" | "harness.run.error";
+  runId: string;
+  sessionKey?: string;
+  sessionId?: string;
+  provider?: string;
+  model?: string;
+  trigger?: string;
+  channel?: string;
+  harnessId: string;
+  pluginId?: string;
+};
+
+export type DiagnosticHarnessRunStartedEvent = DiagnosticHarnessRunBaseEvent & {
+  type: "harness.run.started";
+};
+
+export type DiagnosticHarnessRunCompletedEvent = DiagnosticHarnessRunBaseEvent & {
+  type: "harness.run.completed";
+  durationMs: number;
+  outcome: DiagnosticHarnessRunOutcome;
+  resultClassification?: "empty" | "reasoning-only" | "planning-only";
+  yieldDetected?: boolean;
+  itemLifecycle?: {
+    startedCount: number;
+    completedCount: number;
+    activeCount: number;
+  };
+};
+
+export type DiagnosticHarnessRunErrorEvent = DiagnosticHarnessRunBaseEvent & {
+  type: "harness.run.error";
+  durationMs: number;
+  phase: DiagnosticHarnessRunPhase;
+  errorCategory: string;
+  cleanupFailed?: boolean;
+};
+
 type DiagnosticModelCallBaseEvent = DiagnosticBaseEvent & {
   type: "model.call.started" | "model.call.completed" | "model.call.error";
   runId: string;
@@ -392,6 +433,9 @@ export type DiagnosticEventPayload =
   | DiagnosticExecProcessCompletedEvent
   | DiagnosticRunStartedEvent
   | DiagnosticRunCompletedEvent
+  | DiagnosticHarnessRunStartedEvent
+  | DiagnosticHarnessRunCompletedEvent
+  | DiagnosticHarnessRunErrorEvent
   | DiagnosticModelCallStartedEvent
   | DiagnosticModelCallCompletedEvent
   | DiagnosticModelCallErrorEvent
@@ -446,6 +490,9 @@ const ASYNC_DIAGNOSTIC_EVENT_TYPES = new Set<DiagnosticEventPayload["type"]>([
   "model.call.started",
   "model.call.completed",
   "model.call.error",
+  "harness.run.started",
+  "harness.run.completed",
+  "harness.run.error",
   "context.assembled",
   "log.record",
 ]);
diff --git a/src/logging/diagnostic-stability.ts b/src/logging/diagnostic-stability.ts
index 50a2bbad2ca..086a353fa37 100644
--- a/src/logging/diagnostic-stability.ts
+++ b/src/logging/diagnostic-stability.ts
@@ -305,6 +305,34 @@ function sanitizeDiagnosticEvent(event: DiagnosticEventPayload): DiagnosticStabi
       record.outcome = event.outcome;
       assignReasonCode(record, event.errorCategory);
       break;
+    case "harness.run.started":
+      record.source = event.harnessId;
+      record.pluginId = event.pluginId;
+      record.provider = event.provider;
+      record.model = event.model;
+      record.channel = event.channel;
+      break;
+    case "harness.run.completed":
+      record.source = event.harnessId;
+      record.pluginId = event.pluginId;
+      record.provider = event.provider;
+      record.model = event.model;
+      record.channel = event.channel;
+      record.durationMs = event.durationMs;
+      record.outcome = event.outcome;
+      record.count = event.itemLifecycle?.completedCount;
+      break;
+    case "harness.run.error":
+      record.source = event.harnessId;
+      record.pluginId = event.pluginId;
+      record.provider = event.provider;
+      record.model = event.model;
+      record.channel = event.channel;
+      record.durationMs = event.durationMs;
+      record.outcome = "error";
+      record.action = event.phase;
+      assignReasonCode(record, event.errorCategory);
+      break;
     case "model.call.started":
       record.provider = event.provider;
       record.model = event.model;