feat(diagnostics): add harness lifecycle telemetry

This commit is contained in:
Vincent Koc
2026-04-25 23:34:03 -07:00
parent 8bbb143ab8
commit 82ddcf24f5
10 changed files with 516 additions and 6 deletions

View File

@@ -26,6 +26,7 @@ Docs: https://docs.openclaw.ai
- Diagnostics/OTEL: align model-call GenAI span attributes with OpenTelemetry stability opt-in semantics, keeping legacy `gen_ai.system` by default while emitting `gen_ai.provider.name` under `OTEL_SEMCONV_STABILITY_OPT_IN=gen_ai_latest_experimental`. Thanks @vincentkoc.
- Diagnostics/OTEL: support signal-specific OTLP endpoint overrides for traces, metrics, and logs via config or standard OTEL environment variables. Thanks @vincentkoc.
- Diagnostics/OTEL: emit bounded telemetry exporter health diagnostics for startup and log-export failures without exporting raw error text. Thanks @vincentkoc.
- Diagnostics/OTEL: export agent harness lifecycle telemetry as bounded `openclaw.harness.run` spans and `openclaw.harness.duration_ms` metrics so QA-lab, Codex, and future harnesses share one trace shape. Thanks @vincentkoc.
- Plugins/CLI: add `openclaw plugins registry` for explicit persisted-registry inspection and `--refresh` repair without making normal startup rescan plugin locations. Thanks @vincentkoc.
- Plugins/CLI: make `openclaw plugins list` read the cold persisted registry snapshot by default, leaving module-aware diagnostics to `plugins doctor` and `plugins inspect`. Thanks @vincentkoc.
- Plugins/startup: move gateway startup plugin planning onto the versioned cold registry index, with postinstall repair for older registry files that predate startup metadata. Thanks @vincentkoc.

View File

@@ -59,9 +59,9 @@ pnpm qa:otel:smoke
That script starts a local OTLP/HTTP trace receiver, runs the
`otel-trace-smoke` QA scenario with the `diagnostics-otel` plugin enabled, then
decodes the exported protobuf spans and asserts the release-critical shape:
`openclaw.run`, `openclaw.model.call`, `openclaw.context.assembled`, and
`openclaw.message.delivery` must be present; model calls must not export
`StreamAbandoned` on successful turns; raw diagnostic IDs and
`openclaw.run`, `openclaw.harness.run`, `openclaw.model.call`,
`openclaw.context.assembled`, and `openclaw.message.delivery` must be present;
model calls must not export `StreamAbandoned` on successful turns; raw diagnostic IDs and
`openclaw.content.*` attributes must stay out of the trace. It writes
`otel-smoke-summary.json` next to the QA suite artifacts.

View File

@@ -1140,6 +1140,28 @@ describe("diagnostics-otel service", () => {
traceFlags: "01",
},
});
emitDiagnosticEvent({
type: "harness.run.completed",
runId: "run-1",
sessionKey: "session-key",
sessionId: "session-1",
provider: "codex",
model: "gpt-5.4",
channel: "qa",
harnessId: "codex",
pluginId: "codex-plugin",
outcome: "completed",
durationMs: 90,
resultClassification: "reasoning-only",
yieldDetected: true,
itemLifecycle: { startedCount: 3, completedCount: 2, activeCount: 1 },
trace: {
traceId: TRACE_ID,
spanId: GRANDCHILD_SPAN_ID,
parentSpanId: CHILD_SPAN_ID,
traceFlags: "01",
},
});
emitDiagnosticEvent({
type: "tool.execution.error",
runId: "run-1",
@@ -1160,7 +1182,12 @@ describe("diagnostics-otel service", () => {
const spanNames = telemetryState.tracer.startSpan.mock.calls.map((call) => call[0]);
expect(spanNames).toEqual(
expect.arrayContaining(["openclaw.run", "openclaw.model.call", "openclaw.tool.execution"]),
expect.arrayContaining([
"openclaw.run",
"openclaw.model.call",
"openclaw.harness.run",
"openclaw.tool.execution",
]),
);
const runCall = telemetryState.tracer.startSpan.mock.calls.find(
@@ -1207,6 +1234,36 @@ describe("diagnostics-otel service", () => {
});
expect(modelCall?.[2]).toBeUndefined();
const harnessCall = telemetryState.tracer.startSpan.mock.calls.find(
(call) => call[0] === "openclaw.harness.run",
);
expect(harnessCall?.[1]).toMatchObject({
attributes: {
"openclaw.harness.id": "codex",
"openclaw.harness.plugin": "codex-plugin",
"openclaw.outcome": "completed",
"openclaw.provider": "codex",
"openclaw.model": "gpt-5.4",
"openclaw.channel": "qa",
"openclaw.harness.result_classification": "reasoning-only",
"openclaw.harness.yield_detected": true,
"openclaw.harness.items.started": 3,
"openclaw.harness.items.completed": 2,
"openclaw.harness.items.active": 1,
},
startTime: expect.any(Number),
});
expect(harnessCall?.[1]).toEqual({
attributes: expect.not.objectContaining({
"openclaw.runId": expect.anything(),
"openclaw.sessionId": expect.anything(),
"openclaw.sessionKey": expect.anything(),
"openclaw.traceId": expect.anything(),
}),
startTime: expect.any(Number),
});
expect(harnessCall?.[2]).toBeUndefined();
const toolCall = telemetryState.tracer.startSpan.mock.calls.find(
(call) => call[0] === "openclaw.tool.execution",
);
@@ -1244,6 +1301,25 @@ describe("diagnostics-otel service", () => {
"openclaw.runId": expect.anything(),
}),
);
expect(
telemetryState.histograms.get("openclaw.harness.duration_ms")?.record,
).toHaveBeenCalledWith(
90,
expect.objectContaining({
"openclaw.harness.id": "codex",
"openclaw.harness.plugin": "codex-plugin",
"openclaw.outcome": "completed",
}),
);
expect(
telemetryState.histograms.get("openclaw.harness.duration_ms")?.record,
).toHaveBeenCalledWith(
90,
expect.not.objectContaining({
"openclaw.runId": expect.anything(),
"openclaw.sessionKey": expect.anything(),
}),
);
expect(
telemetryState.histograms.get("openclaw.tool.execution.duration_ms")?.record,
).toHaveBeenCalledWith(

View File

@@ -81,6 +81,10 @@ type ModelCallLifecycleDiagnosticEvent = Extract<
DiagnosticEventPayload,
{ type: "model.call.completed" | "model.call.error" }
>;
type HarnessRunLifecycleDiagnosticEvent = Extract<
DiagnosticEventPayload,
{ type: "harness.run.completed" | "harness.run.error" }
>;
type TelemetryExporterDiagnosticEvent = Extract<
DiagnosticEventPayload,
{ type: "telemetry.exporter" }
@@ -720,6 +724,10 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
unit: "ms",
description: "Agent run duration",
});
const harnessDurationHistogram = meter.createHistogram("openclaw.harness.duration_ms", {
unit: "ms",
description: "Agent harness lifecycle duration",
});
const contextHistogram = meter.createHistogram("openclaw.context.tokens", {
unit: "1",
description: "Context window size and usage",
@@ -1426,6 +1434,82 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
span.end(evt.ts);
};
const harnessRunMetricAttrs = (evt: HarnessRunLifecycleDiagnosticEvent) => ({
"openclaw.harness.id": lowCardinalityAttr(evt.harnessId, "unknown"),
"openclaw.harness.plugin": lowCardinalityAttr(evt.pluginId),
"openclaw.outcome": evt.type === "harness.run.error" ? "error" : evt.outcome,
"openclaw.provider": lowCardinalityAttr(evt.provider, "unknown"),
"openclaw.model": lowCardinalityAttr(evt.model, "unknown"),
...(evt.channel ? { "openclaw.channel": lowCardinalityAttr(evt.channel) } : {}),
});
const recordHarnessRunCompleted = (
evt: Extract<DiagnosticEventPayload, { type: "harness.run.completed" }>,
metadata: DiagnosticEventMetadata,
) => {
harnessDurationHistogram.record(evt.durationMs, harnessRunMetricAttrs(evt));
if (!tracesEnabled) {
return;
}
const spanAttrs: Record<string, string | number | boolean> = {
...harnessRunMetricAttrs(evt),
};
if (evt.resultClassification) {
spanAttrs["openclaw.harness.result_classification"] = lowCardinalityAttr(
evt.resultClassification,
);
}
if (typeof evt.yieldDetected === "boolean") {
spanAttrs["openclaw.harness.yield_detected"] = evt.yieldDetected;
}
if (evt.itemLifecycle) {
spanAttrs["openclaw.harness.items.started"] = evt.itemLifecycle.startedCount;
spanAttrs["openclaw.harness.items.completed"] = evt.itemLifecycle.completedCount;
spanAttrs["openclaw.harness.items.active"] = evt.itemLifecycle.activeCount;
}
const span = spanWithDuration("openclaw.harness.run", spanAttrs, evt.durationMs, {
parentContext: contextForTrustedDiagnosticSpanParent(evt, metadata),
endTimeMs: evt.ts,
});
if (evt.outcome === "error") {
span.setStatus({
code: SpanStatusCode.ERROR,
message: "error",
});
}
span.end(evt.ts);
};
const recordHarnessRunError = (
evt: Extract<DiagnosticEventPayload, { type: "harness.run.error" }>,
metadata: DiagnosticEventMetadata,
) => {
const errorType = lowCardinalityAttr(evt.errorCategory, "other");
const attrs = {
...harnessRunMetricAttrs(evt),
"openclaw.harness.phase": evt.phase,
"openclaw.errorCategory": errorType,
};
harnessDurationHistogram.record(evt.durationMs, attrs);
if (!tracesEnabled) {
return;
}
const spanAttrs: Record<string, string | number | boolean> = {
...attrs,
"error.type": errorType,
...(evt.cleanupFailed ? { "openclaw.harness.cleanup_failed": true } : {}),
};
const span = spanWithDuration("openclaw.harness.run", spanAttrs, evt.durationMs, {
parentContext: contextForTrustedDiagnosticSpanParent(evt, metadata),
endTimeMs: evt.ts,
});
span.setStatus({
code: SpanStatusCode.ERROR,
message: errorType,
});
span.end(evt.ts);
};
const recordContextAssembled = (
evt: Extract<DiagnosticEventPayload, { type: "context.assembled" }>,
metadata: DiagnosticEventMetadata,
@@ -1746,6 +1830,12 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
case "run.completed":
recordRunCompleted(evt, metadata);
return;
case "harness.run.completed":
recordHarnessRunCompleted(evt, metadata);
return;
case "harness.run.error":
recordHarnessRunError(evt, metadata);
return;
case "context.assembled":
recordContextAssembled(evt, metadata);
return;
@@ -1781,6 +1871,7 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
return;
case "tool.execution.started":
case "run.started":
case "harness.run.started":
case "model.call.started":
case "payload.large":
return;

View File

@@ -13,6 +13,7 @@ objective: Verify a QA-lab gateway run emits bounded OpenTelemetry trace spans t
successCriteria:
- The diagnostics-otel plugin starts with trace export enabled.
- A minimal QA-channel agent turn completes.
- The trace includes the selected agent harness lifecycle span.
- The run emits low-cardinality OpenTelemetry trace spans without content or raw diagnostic identifiers.
plugins:
- diagnostics-otel
@@ -33,6 +34,7 @@ docsRefs:
- docs/concepts/qa-e2e-automation.md
codeRefs:
- extensions/diagnostics-otel/src/service.ts
- src/agents/harness/v2.ts
- extensions/qa-lab/src/suite.ts
execution:
kind: flow

View File

@@ -80,6 +80,7 @@ type CapturedSpan = {
const DEFAULT_SCENARIO_ID = "otel-trace-smoke";
const REQUIRED_SPAN_NAMES = [
"openclaw.run",
"openclaw.harness.run",
"openclaw.model.call",
"openclaw.context.assembled",
"openclaw.message.delivery",

View File

@@ -1,5 +1,11 @@
import type { Api, Model } from "@mariozechner/pi-ai";
import { describe, expect, it, vi } from "vitest";
import { afterEach, describe, expect, it, vi } from "vitest";
import {
onInternalDiagnosticEvent,
resetDiagnosticEventsForTest,
type DiagnosticEventMetadata,
type DiagnosticEventPayload,
} from "../../infra/diagnostic-events.js";
import type { EmbeddedRunAttemptResult } from "../pi-embedded-runner/run/types.js";
import type { AgentHarness, AgentHarnessAttemptParams } from "./types.js";
import type { AgentHarnessV2 } from "./v2.js";
@@ -9,6 +15,7 @@ function createAttemptParams(): AgentHarnessAttemptParams {
return {
prompt: "hello",
sessionId: "session-1",
sessionKey: "session-key",
runId: "run-1",
sessionFile: "/tmp/session.jsonl",
workspaceDir: "/tmp/workspace",
@@ -19,9 +26,19 @@ function createAttemptParams(): AgentHarnessAttemptParams {
authStorage: {} as never,
modelRegistry: {} as never,
thinkLevel: "low",
messageChannel: "qa",
trigger: "manual",
} as AgentHarnessAttemptParams;
}
function createDiagnosticTrace() {
return {
traceId: "11111111111111111111111111111111",
spanId: "2222222222222222",
traceFlags: "01",
};
}
function createAttemptResult(): EmbeddedRunAttemptResult {
return {
aborted: false,
@@ -32,6 +49,7 @@ function createAttemptResult(): EmbeddedRunAttemptResult {
promptError: null,
promptErrorSource: null,
sessionIdUsed: "session-1",
diagnosticTrace: createDiagnosticTrace(),
messagesSnapshot: [],
assistantTexts: ["ok"],
toolMetas: [],
@@ -46,7 +64,28 @@ function createAttemptResult(): EmbeddedRunAttemptResult {
};
}
async function flushDiagnosticEvents(): Promise<void> {
await new Promise<void>((resolve) => setImmediate(resolve));
}
function captureDiagnosticEvents(): {
events: Array<{ event: DiagnosticEventPayload; metadata: DiagnosticEventMetadata }>;
unsubscribe: () => void;
} {
const events: Array<{ event: DiagnosticEventPayload; metadata: DiagnosticEventMetadata }> = [];
const unsubscribe = onInternalDiagnosticEvent((event, metadata) => {
if (event.type.startsWith("harness.run.")) {
events.push({ event, metadata });
}
});
return { events, unsubscribe };
}
describe("AgentHarness V2 compatibility adapter", () => {
afterEach(() => {
resetDiagnosticEventsForTest();
});
it("executes prepare/start/send/outcome/cleanup as one bounded lifecycle", async () => {
const params = createAttemptParams();
const result = createAttemptResult();
@@ -102,6 +141,112 @@ describe("AgentHarness V2 compatibility adapter", () => {
]);
});
it("emits trusted harness lifecycle diagnostics for successful attempts", async () => {
resetDiagnosticEventsForTest();
const params = createAttemptParams();
const result = {
...createAttemptResult(),
agentHarnessResultClassification: "reasoning-only",
yieldDetected: true,
itemLifecycle: { startedCount: 3, completedCount: 2, activeCount: 1 },
} as EmbeddedRunAttemptResult;
const harness: AgentHarnessV2 = {
id: "codex",
label: "Codex",
pluginId: "codex-plugin",
supports: () => ({ supported: true }),
prepare: async () => ({
harnessId: "codex",
label: "Codex",
pluginId: "codex-plugin",
params,
lifecycleState: "prepared",
}),
start: async (prepared) => ({ ...prepared, lifecycleState: "started" }),
send: async () => result,
resolveOutcome: async (_session, rawResult) => rawResult,
cleanup: async () => {},
};
const diagnostics = captureDiagnosticEvents();
try {
await runAgentHarnessV2LifecycleAttempt(harness, params);
await flushDiagnosticEvents();
} finally {
diagnostics.unsubscribe();
}
expect(diagnostics.events.map(({ event }) => event.type)).toEqual([
"harness.run.started",
"harness.run.completed",
]);
expect(diagnostics.events.every(({ metadata }) => metadata.trusted)).toBe(true);
expect(diagnostics.events[1]?.event).toMatchObject({
type: "harness.run.completed",
runId: "run-1",
sessionKey: "session-key",
sessionId: "session-1",
provider: "codex",
model: "gpt-5.4",
channel: "qa",
trigger: "manual",
harnessId: "codex",
pluginId: "codex-plugin",
outcome: "completed",
resultClassification: "reasoning-only",
yieldDetected: true,
itemLifecycle: { startedCount: 3, completedCount: 2, activeCount: 1 },
durationMs: expect.any(Number),
});
});
it("emits trusted harness error diagnostics with the failing lifecycle phase", async () => {
resetDiagnosticEventsForTest();
const params = createAttemptParams();
const sendError = new Error("codex app-server send failed");
const harness: AgentHarnessV2 = {
id: "codex",
label: "Codex",
supports: () => ({ supported: true }),
prepare: async () => ({
harnessId: "codex",
label: "Codex",
params,
lifecycleState: "prepared",
}),
start: async (prepared) => ({ ...prepared, lifecycleState: "started" }),
send: async () => {
throw sendError;
},
resolveOutcome: async (_session, rawResult) => rawResult,
cleanup: async () => {
throw new Error("cleanup failed");
},
};
const diagnostics = captureDiagnosticEvents();
try {
await expect(runAgentHarnessV2LifecycleAttempt(harness, params)).rejects.toThrow(
"codex app-server send failed",
);
await flushDiagnosticEvents();
} finally {
diagnostics.unsubscribe();
}
expect(diagnostics.events.map(({ event }) => event.type)).toEqual([
"harness.run.started",
"harness.run.error",
]);
expect(diagnostics.events.every(({ metadata }) => metadata.trusted)).toBe(true);
expect(diagnostics.events[1]?.event).toMatchObject({
type: "harness.run.error",
phase: "send",
errorCategory: "Error",
cleanupFailed: true,
harnessId: "codex",
durationMs: expect.any(Number),
});
});
it("runs cleanup with the original failure and preserves that failure", async () => {
const params = createAttemptParams();
const sendError = new Error("codex app-server send failed");

View File

@@ -1,3 +1,10 @@
import { diagnosticErrorCategory } from "../../infra/diagnostic-error-metadata.js";
import {
emitTrustedDiagnosticEvent,
type DiagnosticHarnessRunErrorEvent,
type DiagnosticHarnessRunOutcome,
} from "../../infra/diagnostic-events.js";
import type { DiagnosticTraceContext } from "../../infra/diagnostic-trace-context.js";
import { formatErrorMessage } from "../../infra/errors.js";
import { createSubsystemLogger } from "../../logging/subsystem.js";
import { applyAgentHarnessResultClassification } from "./result-classification.js";
@@ -13,6 +20,7 @@ import type {
} from "./types.js";
const log = createSubsystemLogger("agents/harness/v2");
type AgentHarnessV2LifecyclePhase = DiagnosticHarnessRunErrorEvent["phase"];
type AgentHarnessV2RunBase = {
harnessId: string;
@@ -95,6 +103,87 @@ export function adaptAgentHarnessToV2(harness: AgentHarness): AgentHarnessV2 {
};
}
function agentHarnessDiagnosticBase(
harness: AgentHarnessV2,
params: AgentHarnessAttemptParams,
trace?: DiagnosticTraceContext,
) {
return {
runId: params.runId,
sessionId: params.sessionId,
provider: params.provider,
model: params.modelId,
harnessId: harness.id,
...(harness.pluginId ? { pluginId: harness.pluginId } : {}),
...(params.sessionKey ? { sessionKey: params.sessionKey } : {}),
...(params.trigger ? { trigger: params.trigger } : {}),
...(params.messageChannel ? { channel: params.messageChannel } : {}),
...(trace ? { trace } : {}),
};
}
function agentHarnessRunOutcome(result: AgentHarnessAttemptResult): DiagnosticHarnessRunOutcome {
if (result.promptError) {
return "error";
}
if (result.externalAbort || result.aborted) {
return "aborted";
}
if (result.timedOut || result.idleTimedOut || result.timedOutDuringCompaction) {
return "timed_out";
}
return "completed";
}
function emitAgentHarnessRunStarted(
harness: AgentHarnessV2,
params: AgentHarnessAttemptParams,
): void {
emitTrustedDiagnosticEvent({
type: "harness.run.started",
...agentHarnessDiagnosticBase(harness, params),
});
}
function emitAgentHarnessRunCompleted(params: {
harness: AgentHarnessV2;
attemptParams: AgentHarnessAttemptParams;
result: AgentHarnessAttemptResult;
startedAt: number;
}): void {
const { harness, attemptParams, result, startedAt } = params;
emitTrustedDiagnosticEvent({
type: "harness.run.completed",
...agentHarnessDiagnosticBase(harness, attemptParams, result.diagnosticTrace),
durationMs: Date.now() - startedAt,
outcome: agentHarnessRunOutcome(result),
...(result.agentHarnessResultClassification
? { resultClassification: result.agentHarnessResultClassification }
: {}),
...(typeof result.yieldDetected === "boolean" ? { yieldDetected: result.yieldDetected } : {}),
itemLifecycle: { ...result.itemLifecycle },
});
}
function emitAgentHarnessRunError(params: {
harness: AgentHarnessV2;
attemptParams: AgentHarnessAttemptParams;
startedAt: number;
phase: AgentHarnessV2LifecyclePhase;
error: unknown;
cleanupFailed?: boolean;
}): void {
const { harness, attemptParams, startedAt, phase, error, cleanupFailed } = params;
emitTrustedDiagnosticEvent({
type: "harness.run.error",
...agentHarnessDiagnosticBase(harness, attemptParams),
durationMs: Date.now() - startedAt,
phase,
errorCategory: diagnosticErrorCategory(error),
...(cleanupFailed ? { cleanupFailed: true } : {}),
});
}
export async function runAgentHarnessV2LifecycleAttempt(
harness: AgentHarnessV2,
params: AgentHarnessAttemptParams,
@@ -103,13 +192,21 @@ export async function runAgentHarnessV2LifecycleAttempt(
let session: AgentHarnessV2Session | undefined;
let rawResult: AgentHarnessAttemptResult | undefined;
let result: AgentHarnessAttemptResult;
let phase: AgentHarnessV2LifecyclePhase = "prepare";
const startedAt = Date.now();
emitAgentHarnessRunStarted(harness, params);
try {
phase = "prepare";
prepared = await harness.prepare(params);
phase = "start";
session = await harness.start(prepared);
phase = "send";
rawResult = await harness.send(session);
phase = "resolve";
result = await harness.resolveOutcome(session, rawResult);
} catch (error) {
let cleanupFailed = false;
try {
await harness.cleanup({
prepared,
@@ -118,6 +215,7 @@ export async function runAgentHarnessV2LifecycleAttempt(
...(rawResult === undefined ? {} : { result: rawResult }),
});
} catch (cleanupError) {
cleanupFailed = true;
// Preserve the user-visible harness failure. Cleanup errors after a
// failed lifecycle stage must not mask the actionable runtime error.
log.warn("agent harness cleanup failed after attempt failure", {
@@ -128,9 +226,30 @@ export async function runAgentHarnessV2LifecycleAttempt(
originalError: formatErrorMessage(error),
});
}
emitAgentHarnessRunError({
harness,
attemptParams: params,
startedAt,
phase,
error,
cleanupFailed,
});
throw error;
}
await harness.cleanup({ prepared, session, result });
try {
phase = "cleanup";
await harness.cleanup({ prepared, session, result });
} catch (error) {
emitAgentHarnessRunError({
harness,
attemptParams: params,
startedAt,
phase,
error,
});
throw error;
}
emitAgentHarnessRunCompleted({ harness, attemptParams: params, result, startedAt });
return result;
}

View File

@@ -256,6 +256,47 @@ export type DiagnosticRunCompletedEvent = DiagnosticRunBaseEvent & {
errorCategory?: string;
};
export type DiagnosticHarnessRunPhase = "prepare" | "start" | "send" | "resolve" | "cleanup";
export type DiagnosticHarnessRunOutcome = "completed" | "aborted" | "timed_out" | "error";
type DiagnosticHarnessRunBaseEvent = DiagnosticBaseEvent & {
type: "harness.run.started" | "harness.run.completed" | "harness.run.error";
runId: string;
sessionKey?: string;
sessionId?: string;
provider?: string;
model?: string;
trigger?: string;
channel?: string;
harnessId: string;
pluginId?: string;
};
export type DiagnosticHarnessRunStartedEvent = DiagnosticHarnessRunBaseEvent & {
type: "harness.run.started";
};
export type DiagnosticHarnessRunCompletedEvent = DiagnosticHarnessRunBaseEvent & {
type: "harness.run.completed";
durationMs: number;
outcome: DiagnosticHarnessRunOutcome;
resultClassification?: "empty" | "reasoning-only" | "planning-only";
yieldDetected?: boolean;
itemLifecycle?: {
startedCount: number;
completedCount: number;
activeCount: number;
};
};
export type DiagnosticHarnessRunErrorEvent = DiagnosticHarnessRunBaseEvent & {
type: "harness.run.error";
durationMs: number;
phase: DiagnosticHarnessRunPhase;
errorCategory: string;
cleanupFailed?: boolean;
};
type DiagnosticModelCallBaseEvent = DiagnosticBaseEvent & {
type: "model.call.started" | "model.call.completed" | "model.call.error";
runId: string;
@@ -392,6 +433,9 @@ export type DiagnosticEventPayload =
| DiagnosticExecProcessCompletedEvent
| DiagnosticRunStartedEvent
| DiagnosticRunCompletedEvent
| DiagnosticHarnessRunStartedEvent
| DiagnosticHarnessRunCompletedEvent
| DiagnosticHarnessRunErrorEvent
| DiagnosticModelCallStartedEvent
| DiagnosticModelCallCompletedEvent
| DiagnosticModelCallErrorEvent
@@ -446,6 +490,9 @@ const ASYNC_DIAGNOSTIC_EVENT_TYPES = new Set<DiagnosticEventPayload["type"]>([
"model.call.started",
"model.call.completed",
"model.call.error",
"harness.run.started",
"harness.run.completed",
"harness.run.error",
"context.assembled",
"log.record",
]);

View File

@@ -305,6 +305,34 @@ function sanitizeDiagnosticEvent(event: DiagnosticEventPayload): DiagnosticStabi
record.outcome = event.outcome;
assignReasonCode(record, event.errorCategory);
break;
case "harness.run.started":
record.source = event.harnessId;
record.pluginId = event.pluginId;
record.provider = event.provider;
record.model = event.model;
record.channel = event.channel;
break;
case "harness.run.completed":
record.source = event.harnessId;
record.pluginId = event.pluginId;
record.provider = event.provider;
record.model = event.model;
record.channel = event.channel;
record.durationMs = event.durationMs;
record.outcome = event.outcome;
record.count = event.itemLifecycle?.completedCount;
break;
case "harness.run.error":
record.source = event.harnessId;
record.pluginId = event.pluginId;
record.provider = event.provider;
record.model = event.model;
record.channel = event.channel;
record.durationMs = event.durationMs;
record.outcome = "error";
record.action = event.phase;
assignReasonCode(record, event.errorCategory);
break;
case "model.call.started":
record.provider = event.provider;
record.model = event.model;