mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 06:50:43 +00:00
feat(diagnostics): add harness lifecycle telemetry
This commit is contained in:
@@ -26,6 +26,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Diagnostics/OTEL: align model-call GenAI span attributes with OpenTelemetry stability opt-in semantics, keeping legacy `gen_ai.system` by default while emitting `gen_ai.provider.name` under `OTEL_SEMCONV_STABILITY_OPT_IN=gen_ai_latest_experimental`. Thanks @vincentkoc.
|
||||
- Diagnostics/OTEL: support signal-specific OTLP endpoint overrides for traces, metrics, and logs via config or standard OTEL environment variables. Thanks @vincentkoc.
|
||||
- Diagnostics/OTEL: emit bounded telemetry exporter health diagnostics for startup and log-export failures without exporting raw error text. Thanks @vincentkoc.
|
||||
- Diagnostics/OTEL: export agent harness lifecycle telemetry as bounded `openclaw.harness.run` spans and `openclaw.harness.duration_ms` metrics so QA-lab, Codex, and future harnesses share one trace shape. Thanks @vincentkoc.
|
||||
- Plugins/CLI: add `openclaw plugins registry` for explicit persisted-registry inspection and `--refresh` repair without making normal startup rescan plugin locations. Thanks @vincentkoc.
|
||||
- Plugins/CLI: make `openclaw plugins list` read the cold persisted registry snapshot by default, leaving module-aware diagnostics to `plugins doctor` and `plugins inspect`. Thanks @vincentkoc.
|
||||
- Plugins/startup: move gateway startup plugin planning onto the versioned cold registry index, with postinstall repair for older registry files that predate startup metadata. Thanks @vincentkoc.
|
||||
|
||||
@@ -59,9 +59,9 @@ pnpm qa:otel:smoke
|
||||
That script starts a local OTLP/HTTP trace receiver, runs the
|
||||
`otel-trace-smoke` QA scenario with the `diagnostics-otel` plugin enabled, then
|
||||
decodes the exported protobuf spans and asserts the release-critical shape:
|
||||
`openclaw.run`, `openclaw.model.call`, `openclaw.context.assembled`, and
|
||||
`openclaw.message.delivery` must be present; model calls must not export
|
||||
`StreamAbandoned` on successful turns; raw diagnostic IDs and
|
||||
`openclaw.run`, `openclaw.harness.run`, `openclaw.model.call`,
|
||||
`openclaw.context.assembled`, and `openclaw.message.delivery` must be present;
|
||||
model calls must not export `StreamAbandoned` on successful turns; raw diagnostic IDs and
|
||||
`openclaw.content.*` attributes must stay out of the trace. It writes
|
||||
`otel-smoke-summary.json` next to the QA suite artifacts.
|
||||
|
||||
|
||||
@@ -1140,6 +1140,28 @@ describe("diagnostics-otel service", () => {
|
||||
traceFlags: "01",
|
||||
},
|
||||
});
|
||||
emitDiagnosticEvent({
|
||||
type: "harness.run.completed",
|
||||
runId: "run-1",
|
||||
sessionKey: "session-key",
|
||||
sessionId: "session-1",
|
||||
provider: "codex",
|
||||
model: "gpt-5.4",
|
||||
channel: "qa",
|
||||
harnessId: "codex",
|
||||
pluginId: "codex-plugin",
|
||||
outcome: "completed",
|
||||
durationMs: 90,
|
||||
resultClassification: "reasoning-only",
|
||||
yieldDetected: true,
|
||||
itemLifecycle: { startedCount: 3, completedCount: 2, activeCount: 1 },
|
||||
trace: {
|
||||
traceId: TRACE_ID,
|
||||
spanId: GRANDCHILD_SPAN_ID,
|
||||
parentSpanId: CHILD_SPAN_ID,
|
||||
traceFlags: "01",
|
||||
},
|
||||
});
|
||||
emitDiagnosticEvent({
|
||||
type: "tool.execution.error",
|
||||
runId: "run-1",
|
||||
@@ -1160,7 +1182,12 @@ describe("diagnostics-otel service", () => {
|
||||
|
||||
const spanNames = telemetryState.tracer.startSpan.mock.calls.map((call) => call[0]);
|
||||
expect(spanNames).toEqual(
|
||||
expect.arrayContaining(["openclaw.run", "openclaw.model.call", "openclaw.tool.execution"]),
|
||||
expect.arrayContaining([
|
||||
"openclaw.run",
|
||||
"openclaw.model.call",
|
||||
"openclaw.harness.run",
|
||||
"openclaw.tool.execution",
|
||||
]),
|
||||
);
|
||||
|
||||
const runCall = telemetryState.tracer.startSpan.mock.calls.find(
|
||||
@@ -1207,6 +1234,36 @@ describe("diagnostics-otel service", () => {
|
||||
});
|
||||
expect(modelCall?.[2]).toBeUndefined();
|
||||
|
||||
const harnessCall = telemetryState.tracer.startSpan.mock.calls.find(
|
||||
(call) => call[0] === "openclaw.harness.run",
|
||||
);
|
||||
expect(harnessCall?.[1]).toMatchObject({
|
||||
attributes: {
|
||||
"openclaw.harness.id": "codex",
|
||||
"openclaw.harness.plugin": "codex-plugin",
|
||||
"openclaw.outcome": "completed",
|
||||
"openclaw.provider": "codex",
|
||||
"openclaw.model": "gpt-5.4",
|
||||
"openclaw.channel": "qa",
|
||||
"openclaw.harness.result_classification": "reasoning-only",
|
||||
"openclaw.harness.yield_detected": true,
|
||||
"openclaw.harness.items.started": 3,
|
||||
"openclaw.harness.items.completed": 2,
|
||||
"openclaw.harness.items.active": 1,
|
||||
},
|
||||
startTime: expect.any(Number),
|
||||
});
|
||||
expect(harnessCall?.[1]).toEqual({
|
||||
attributes: expect.not.objectContaining({
|
||||
"openclaw.runId": expect.anything(),
|
||||
"openclaw.sessionId": expect.anything(),
|
||||
"openclaw.sessionKey": expect.anything(),
|
||||
"openclaw.traceId": expect.anything(),
|
||||
}),
|
||||
startTime: expect.any(Number),
|
||||
});
|
||||
expect(harnessCall?.[2]).toBeUndefined();
|
||||
|
||||
const toolCall = telemetryState.tracer.startSpan.mock.calls.find(
|
||||
(call) => call[0] === "openclaw.tool.execution",
|
||||
);
|
||||
@@ -1244,6 +1301,25 @@ describe("diagnostics-otel service", () => {
|
||||
"openclaw.runId": expect.anything(),
|
||||
}),
|
||||
);
|
||||
expect(
|
||||
telemetryState.histograms.get("openclaw.harness.duration_ms")?.record,
|
||||
).toHaveBeenCalledWith(
|
||||
90,
|
||||
expect.objectContaining({
|
||||
"openclaw.harness.id": "codex",
|
||||
"openclaw.harness.plugin": "codex-plugin",
|
||||
"openclaw.outcome": "completed",
|
||||
}),
|
||||
);
|
||||
expect(
|
||||
telemetryState.histograms.get("openclaw.harness.duration_ms")?.record,
|
||||
).toHaveBeenCalledWith(
|
||||
90,
|
||||
expect.not.objectContaining({
|
||||
"openclaw.runId": expect.anything(),
|
||||
"openclaw.sessionKey": expect.anything(),
|
||||
}),
|
||||
);
|
||||
expect(
|
||||
telemetryState.histograms.get("openclaw.tool.execution.duration_ms")?.record,
|
||||
).toHaveBeenCalledWith(
|
||||
|
||||
@@ -81,6 +81,10 @@ type ModelCallLifecycleDiagnosticEvent = Extract<
|
||||
DiagnosticEventPayload,
|
||||
{ type: "model.call.completed" | "model.call.error" }
|
||||
>;
|
||||
type HarnessRunLifecycleDiagnosticEvent = Extract<
|
||||
DiagnosticEventPayload,
|
||||
{ type: "harness.run.completed" | "harness.run.error" }
|
||||
>;
|
||||
type TelemetryExporterDiagnosticEvent = Extract<
|
||||
DiagnosticEventPayload,
|
||||
{ type: "telemetry.exporter" }
|
||||
@@ -720,6 +724,10 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
|
||||
unit: "ms",
|
||||
description: "Agent run duration",
|
||||
});
|
||||
const harnessDurationHistogram = meter.createHistogram("openclaw.harness.duration_ms", {
|
||||
unit: "ms",
|
||||
description: "Agent harness lifecycle duration",
|
||||
});
|
||||
const contextHistogram = meter.createHistogram("openclaw.context.tokens", {
|
||||
unit: "1",
|
||||
description: "Context window size and usage",
|
||||
@@ -1426,6 +1434,82 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
|
||||
span.end(evt.ts);
|
||||
};
|
||||
|
||||
const harnessRunMetricAttrs = (evt: HarnessRunLifecycleDiagnosticEvent) => ({
|
||||
"openclaw.harness.id": lowCardinalityAttr(evt.harnessId, "unknown"),
|
||||
"openclaw.harness.plugin": lowCardinalityAttr(evt.pluginId),
|
||||
"openclaw.outcome": evt.type === "harness.run.error" ? "error" : evt.outcome,
|
||||
"openclaw.provider": lowCardinalityAttr(evt.provider, "unknown"),
|
||||
"openclaw.model": lowCardinalityAttr(evt.model, "unknown"),
|
||||
...(evt.channel ? { "openclaw.channel": lowCardinalityAttr(evt.channel) } : {}),
|
||||
});
|
||||
|
||||
const recordHarnessRunCompleted = (
|
||||
evt: Extract<DiagnosticEventPayload, { type: "harness.run.completed" }>,
|
||||
metadata: DiagnosticEventMetadata,
|
||||
) => {
|
||||
harnessDurationHistogram.record(evt.durationMs, harnessRunMetricAttrs(evt));
|
||||
if (!tracesEnabled) {
|
||||
return;
|
||||
}
|
||||
const spanAttrs: Record<string, string | number | boolean> = {
|
||||
...harnessRunMetricAttrs(evt),
|
||||
};
|
||||
if (evt.resultClassification) {
|
||||
spanAttrs["openclaw.harness.result_classification"] = lowCardinalityAttr(
|
||||
evt.resultClassification,
|
||||
);
|
||||
}
|
||||
if (typeof evt.yieldDetected === "boolean") {
|
||||
spanAttrs["openclaw.harness.yield_detected"] = evt.yieldDetected;
|
||||
}
|
||||
if (evt.itemLifecycle) {
|
||||
spanAttrs["openclaw.harness.items.started"] = evt.itemLifecycle.startedCount;
|
||||
spanAttrs["openclaw.harness.items.completed"] = evt.itemLifecycle.completedCount;
|
||||
spanAttrs["openclaw.harness.items.active"] = evt.itemLifecycle.activeCount;
|
||||
}
|
||||
const span = spanWithDuration("openclaw.harness.run", spanAttrs, evt.durationMs, {
|
||||
parentContext: contextForTrustedDiagnosticSpanParent(evt, metadata),
|
||||
endTimeMs: evt.ts,
|
||||
});
|
||||
if (evt.outcome === "error") {
|
||||
span.setStatus({
|
||||
code: SpanStatusCode.ERROR,
|
||||
message: "error",
|
||||
});
|
||||
}
|
||||
span.end(evt.ts);
|
||||
};
|
||||
|
||||
const recordHarnessRunError = (
|
||||
evt: Extract<DiagnosticEventPayload, { type: "harness.run.error" }>,
|
||||
metadata: DiagnosticEventMetadata,
|
||||
) => {
|
||||
const errorType = lowCardinalityAttr(evt.errorCategory, "other");
|
||||
const attrs = {
|
||||
...harnessRunMetricAttrs(evt),
|
||||
"openclaw.harness.phase": evt.phase,
|
||||
"openclaw.errorCategory": errorType,
|
||||
};
|
||||
harnessDurationHistogram.record(evt.durationMs, attrs);
|
||||
if (!tracesEnabled) {
|
||||
return;
|
||||
}
|
||||
const spanAttrs: Record<string, string | number | boolean> = {
|
||||
...attrs,
|
||||
"error.type": errorType,
|
||||
...(evt.cleanupFailed ? { "openclaw.harness.cleanup_failed": true } : {}),
|
||||
};
|
||||
const span = spanWithDuration("openclaw.harness.run", spanAttrs, evt.durationMs, {
|
||||
parentContext: contextForTrustedDiagnosticSpanParent(evt, metadata),
|
||||
endTimeMs: evt.ts,
|
||||
});
|
||||
span.setStatus({
|
||||
code: SpanStatusCode.ERROR,
|
||||
message: errorType,
|
||||
});
|
||||
span.end(evt.ts);
|
||||
};
|
||||
|
||||
const recordContextAssembled = (
|
||||
evt: Extract<DiagnosticEventPayload, { type: "context.assembled" }>,
|
||||
metadata: DiagnosticEventMetadata,
|
||||
@@ -1746,6 +1830,12 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
|
||||
case "run.completed":
|
||||
recordRunCompleted(evt, metadata);
|
||||
return;
|
||||
case "harness.run.completed":
|
||||
recordHarnessRunCompleted(evt, metadata);
|
||||
return;
|
||||
case "harness.run.error":
|
||||
recordHarnessRunError(evt, metadata);
|
||||
return;
|
||||
case "context.assembled":
|
||||
recordContextAssembled(evt, metadata);
|
||||
return;
|
||||
@@ -1781,6 +1871,7 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
|
||||
return;
|
||||
case "tool.execution.started":
|
||||
case "run.started":
|
||||
case "harness.run.started":
|
||||
case "model.call.started":
|
||||
case "payload.large":
|
||||
return;
|
||||
|
||||
@@ -13,6 +13,7 @@ objective: Verify a QA-lab gateway run emits bounded OpenTelemetry trace spans t
|
||||
successCriteria:
|
||||
- The diagnostics-otel plugin starts with trace export enabled.
|
||||
- A minimal QA-channel agent turn completes.
|
||||
- The trace includes the selected agent harness lifecycle span.
|
||||
- The run emits low-cardinality OpenTelemetry trace spans without content or raw diagnostic identifiers.
|
||||
plugins:
|
||||
- diagnostics-otel
|
||||
@@ -33,6 +34,7 @@ docsRefs:
|
||||
- docs/concepts/qa-e2e-automation.md
|
||||
codeRefs:
|
||||
- extensions/diagnostics-otel/src/service.ts
|
||||
- src/agents/harness/v2.ts
|
||||
- extensions/qa-lab/src/suite.ts
|
||||
execution:
|
||||
kind: flow
|
||||
|
||||
@@ -80,6 +80,7 @@ type CapturedSpan = {
|
||||
const DEFAULT_SCENARIO_ID = "otel-trace-smoke";
|
||||
const REQUIRED_SPAN_NAMES = [
|
||||
"openclaw.run",
|
||||
"openclaw.harness.run",
|
||||
"openclaw.model.call",
|
||||
"openclaw.context.assembled",
|
||||
"openclaw.message.delivery",
|
||||
|
||||
@@ -1,5 +1,11 @@
|
||||
import type { Api, Model } from "@mariozechner/pi-ai";
|
||||
import { describe, expect, it, vi } from "vitest";
|
||||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
import {
|
||||
onInternalDiagnosticEvent,
|
||||
resetDiagnosticEventsForTest,
|
||||
type DiagnosticEventMetadata,
|
||||
type DiagnosticEventPayload,
|
||||
} from "../../infra/diagnostic-events.js";
|
||||
import type { EmbeddedRunAttemptResult } from "../pi-embedded-runner/run/types.js";
|
||||
import type { AgentHarness, AgentHarnessAttemptParams } from "./types.js";
|
||||
import type { AgentHarnessV2 } from "./v2.js";
|
||||
@@ -9,6 +15,7 @@ function createAttemptParams(): AgentHarnessAttemptParams {
|
||||
return {
|
||||
prompt: "hello",
|
||||
sessionId: "session-1",
|
||||
sessionKey: "session-key",
|
||||
runId: "run-1",
|
||||
sessionFile: "/tmp/session.jsonl",
|
||||
workspaceDir: "/tmp/workspace",
|
||||
@@ -19,9 +26,19 @@ function createAttemptParams(): AgentHarnessAttemptParams {
|
||||
authStorage: {} as never,
|
||||
modelRegistry: {} as never,
|
||||
thinkLevel: "low",
|
||||
messageChannel: "qa",
|
||||
trigger: "manual",
|
||||
} as AgentHarnessAttemptParams;
|
||||
}
|
||||
|
||||
function createDiagnosticTrace() {
|
||||
return {
|
||||
traceId: "11111111111111111111111111111111",
|
||||
spanId: "2222222222222222",
|
||||
traceFlags: "01",
|
||||
};
|
||||
}
|
||||
|
||||
function createAttemptResult(): EmbeddedRunAttemptResult {
|
||||
return {
|
||||
aborted: false,
|
||||
@@ -32,6 +49,7 @@ function createAttemptResult(): EmbeddedRunAttemptResult {
|
||||
promptError: null,
|
||||
promptErrorSource: null,
|
||||
sessionIdUsed: "session-1",
|
||||
diagnosticTrace: createDiagnosticTrace(),
|
||||
messagesSnapshot: [],
|
||||
assistantTexts: ["ok"],
|
||||
toolMetas: [],
|
||||
@@ -46,7 +64,28 @@ function createAttemptResult(): EmbeddedRunAttemptResult {
|
||||
};
|
||||
}
|
||||
|
||||
async function flushDiagnosticEvents(): Promise<void> {
|
||||
await new Promise<void>((resolve) => setImmediate(resolve));
|
||||
}
|
||||
|
||||
function captureDiagnosticEvents(): {
|
||||
events: Array<{ event: DiagnosticEventPayload; metadata: DiagnosticEventMetadata }>;
|
||||
unsubscribe: () => void;
|
||||
} {
|
||||
const events: Array<{ event: DiagnosticEventPayload; metadata: DiagnosticEventMetadata }> = [];
|
||||
const unsubscribe = onInternalDiagnosticEvent((event, metadata) => {
|
||||
if (event.type.startsWith("harness.run.")) {
|
||||
events.push({ event, metadata });
|
||||
}
|
||||
});
|
||||
return { events, unsubscribe };
|
||||
}
|
||||
|
||||
describe("AgentHarness V2 compatibility adapter", () => {
|
||||
afterEach(() => {
|
||||
resetDiagnosticEventsForTest();
|
||||
});
|
||||
|
||||
it("executes prepare/start/send/outcome/cleanup as one bounded lifecycle", async () => {
|
||||
const params = createAttemptParams();
|
||||
const result = createAttemptResult();
|
||||
@@ -102,6 +141,112 @@ describe("AgentHarness V2 compatibility adapter", () => {
|
||||
]);
|
||||
});
|
||||
|
||||
it("emits trusted harness lifecycle diagnostics for successful attempts", async () => {
|
||||
resetDiagnosticEventsForTest();
|
||||
const params = createAttemptParams();
|
||||
const result = {
|
||||
...createAttemptResult(),
|
||||
agentHarnessResultClassification: "reasoning-only",
|
||||
yieldDetected: true,
|
||||
itemLifecycle: { startedCount: 3, completedCount: 2, activeCount: 1 },
|
||||
} as EmbeddedRunAttemptResult;
|
||||
const harness: AgentHarnessV2 = {
|
||||
id: "codex",
|
||||
label: "Codex",
|
||||
pluginId: "codex-plugin",
|
||||
supports: () => ({ supported: true }),
|
||||
prepare: async () => ({
|
||||
harnessId: "codex",
|
||||
label: "Codex",
|
||||
pluginId: "codex-plugin",
|
||||
params,
|
||||
lifecycleState: "prepared",
|
||||
}),
|
||||
start: async (prepared) => ({ ...prepared, lifecycleState: "started" }),
|
||||
send: async () => result,
|
||||
resolveOutcome: async (_session, rawResult) => rawResult,
|
||||
cleanup: async () => {},
|
||||
};
|
||||
const diagnostics = captureDiagnosticEvents();
|
||||
try {
|
||||
await runAgentHarnessV2LifecycleAttempt(harness, params);
|
||||
await flushDiagnosticEvents();
|
||||
} finally {
|
||||
diagnostics.unsubscribe();
|
||||
}
|
||||
|
||||
expect(diagnostics.events.map(({ event }) => event.type)).toEqual([
|
||||
"harness.run.started",
|
||||
"harness.run.completed",
|
||||
]);
|
||||
expect(diagnostics.events.every(({ metadata }) => metadata.trusted)).toBe(true);
|
||||
expect(diagnostics.events[1]?.event).toMatchObject({
|
||||
type: "harness.run.completed",
|
||||
runId: "run-1",
|
||||
sessionKey: "session-key",
|
||||
sessionId: "session-1",
|
||||
provider: "codex",
|
||||
model: "gpt-5.4",
|
||||
channel: "qa",
|
||||
trigger: "manual",
|
||||
harnessId: "codex",
|
||||
pluginId: "codex-plugin",
|
||||
outcome: "completed",
|
||||
resultClassification: "reasoning-only",
|
||||
yieldDetected: true,
|
||||
itemLifecycle: { startedCount: 3, completedCount: 2, activeCount: 1 },
|
||||
durationMs: expect.any(Number),
|
||||
});
|
||||
});
|
||||
|
||||
it("emits trusted harness error diagnostics with the failing lifecycle phase", async () => {
|
||||
resetDiagnosticEventsForTest();
|
||||
const params = createAttemptParams();
|
||||
const sendError = new Error("codex app-server send failed");
|
||||
const harness: AgentHarnessV2 = {
|
||||
id: "codex",
|
||||
label: "Codex",
|
||||
supports: () => ({ supported: true }),
|
||||
prepare: async () => ({
|
||||
harnessId: "codex",
|
||||
label: "Codex",
|
||||
params,
|
||||
lifecycleState: "prepared",
|
||||
}),
|
||||
start: async (prepared) => ({ ...prepared, lifecycleState: "started" }),
|
||||
send: async () => {
|
||||
throw sendError;
|
||||
},
|
||||
resolveOutcome: async (_session, rawResult) => rawResult,
|
||||
cleanup: async () => {
|
||||
throw new Error("cleanup failed");
|
||||
},
|
||||
};
|
||||
const diagnostics = captureDiagnosticEvents();
|
||||
try {
|
||||
await expect(runAgentHarnessV2LifecycleAttempt(harness, params)).rejects.toThrow(
|
||||
"codex app-server send failed",
|
||||
);
|
||||
await flushDiagnosticEvents();
|
||||
} finally {
|
||||
diagnostics.unsubscribe();
|
||||
}
|
||||
|
||||
expect(diagnostics.events.map(({ event }) => event.type)).toEqual([
|
||||
"harness.run.started",
|
||||
"harness.run.error",
|
||||
]);
|
||||
expect(diagnostics.events.every(({ metadata }) => metadata.trusted)).toBe(true);
|
||||
expect(diagnostics.events[1]?.event).toMatchObject({
|
||||
type: "harness.run.error",
|
||||
phase: "send",
|
||||
errorCategory: "Error",
|
||||
cleanupFailed: true,
|
||||
harnessId: "codex",
|
||||
durationMs: expect.any(Number),
|
||||
});
|
||||
});
|
||||
|
||||
it("runs cleanup with the original failure and preserves that failure", async () => {
|
||||
const params = createAttemptParams();
|
||||
const sendError = new Error("codex app-server send failed");
|
||||
|
||||
@@ -1,3 +1,10 @@
|
||||
import { diagnosticErrorCategory } from "../../infra/diagnostic-error-metadata.js";
|
||||
import {
|
||||
emitTrustedDiagnosticEvent,
|
||||
type DiagnosticHarnessRunErrorEvent,
|
||||
type DiagnosticHarnessRunOutcome,
|
||||
} from "../../infra/diagnostic-events.js";
|
||||
import type { DiagnosticTraceContext } from "../../infra/diagnostic-trace-context.js";
|
||||
import { formatErrorMessage } from "../../infra/errors.js";
|
||||
import { createSubsystemLogger } from "../../logging/subsystem.js";
|
||||
import { applyAgentHarnessResultClassification } from "./result-classification.js";
|
||||
@@ -13,6 +20,7 @@ import type {
|
||||
} from "./types.js";
|
||||
|
||||
const log = createSubsystemLogger("agents/harness/v2");
|
||||
type AgentHarnessV2LifecyclePhase = DiagnosticHarnessRunErrorEvent["phase"];
|
||||
|
||||
type AgentHarnessV2RunBase = {
|
||||
harnessId: string;
|
||||
@@ -95,6 +103,87 @@ export function adaptAgentHarnessToV2(harness: AgentHarness): AgentHarnessV2 {
|
||||
};
|
||||
}
|
||||
|
||||
function agentHarnessDiagnosticBase(
|
||||
harness: AgentHarnessV2,
|
||||
params: AgentHarnessAttemptParams,
|
||||
trace?: DiagnosticTraceContext,
|
||||
) {
|
||||
return {
|
||||
runId: params.runId,
|
||||
sessionId: params.sessionId,
|
||||
provider: params.provider,
|
||||
model: params.modelId,
|
||||
harnessId: harness.id,
|
||||
...(harness.pluginId ? { pluginId: harness.pluginId } : {}),
|
||||
...(params.sessionKey ? { sessionKey: params.sessionKey } : {}),
|
||||
...(params.trigger ? { trigger: params.trigger } : {}),
|
||||
...(params.messageChannel ? { channel: params.messageChannel } : {}),
|
||||
...(trace ? { trace } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
function agentHarnessRunOutcome(result: AgentHarnessAttemptResult): DiagnosticHarnessRunOutcome {
|
||||
if (result.promptError) {
|
||||
return "error";
|
||||
}
|
||||
if (result.externalAbort || result.aborted) {
|
||||
return "aborted";
|
||||
}
|
||||
if (result.timedOut || result.idleTimedOut || result.timedOutDuringCompaction) {
|
||||
return "timed_out";
|
||||
}
|
||||
return "completed";
|
||||
}
|
||||
|
||||
function emitAgentHarnessRunStarted(
|
||||
harness: AgentHarnessV2,
|
||||
params: AgentHarnessAttemptParams,
|
||||
): void {
|
||||
emitTrustedDiagnosticEvent({
|
||||
type: "harness.run.started",
|
||||
...agentHarnessDiagnosticBase(harness, params),
|
||||
});
|
||||
}
|
||||
|
||||
function emitAgentHarnessRunCompleted(params: {
|
||||
harness: AgentHarnessV2;
|
||||
attemptParams: AgentHarnessAttemptParams;
|
||||
result: AgentHarnessAttemptResult;
|
||||
startedAt: number;
|
||||
}): void {
|
||||
const { harness, attemptParams, result, startedAt } = params;
|
||||
emitTrustedDiagnosticEvent({
|
||||
type: "harness.run.completed",
|
||||
...agentHarnessDiagnosticBase(harness, attemptParams, result.diagnosticTrace),
|
||||
durationMs: Date.now() - startedAt,
|
||||
outcome: agentHarnessRunOutcome(result),
|
||||
...(result.agentHarnessResultClassification
|
||||
? { resultClassification: result.agentHarnessResultClassification }
|
||||
: {}),
|
||||
...(typeof result.yieldDetected === "boolean" ? { yieldDetected: result.yieldDetected } : {}),
|
||||
itemLifecycle: { ...result.itemLifecycle },
|
||||
});
|
||||
}
|
||||
|
||||
function emitAgentHarnessRunError(params: {
|
||||
harness: AgentHarnessV2;
|
||||
attemptParams: AgentHarnessAttemptParams;
|
||||
startedAt: number;
|
||||
phase: AgentHarnessV2LifecyclePhase;
|
||||
error: unknown;
|
||||
cleanupFailed?: boolean;
|
||||
}): void {
|
||||
const { harness, attemptParams, startedAt, phase, error, cleanupFailed } = params;
|
||||
emitTrustedDiagnosticEvent({
|
||||
type: "harness.run.error",
|
||||
...agentHarnessDiagnosticBase(harness, attemptParams),
|
||||
durationMs: Date.now() - startedAt,
|
||||
phase,
|
||||
errorCategory: diagnosticErrorCategory(error),
|
||||
...(cleanupFailed ? { cleanupFailed: true } : {}),
|
||||
});
|
||||
}
|
||||
|
||||
export async function runAgentHarnessV2LifecycleAttempt(
|
||||
harness: AgentHarnessV2,
|
||||
params: AgentHarnessAttemptParams,
|
||||
@@ -103,13 +192,21 @@ export async function runAgentHarnessV2LifecycleAttempt(
|
||||
let session: AgentHarnessV2Session | undefined;
|
||||
let rawResult: AgentHarnessAttemptResult | undefined;
|
||||
let result: AgentHarnessAttemptResult;
|
||||
let phase: AgentHarnessV2LifecyclePhase = "prepare";
|
||||
const startedAt = Date.now();
|
||||
|
||||
emitAgentHarnessRunStarted(harness, params);
|
||||
try {
|
||||
phase = "prepare";
|
||||
prepared = await harness.prepare(params);
|
||||
phase = "start";
|
||||
session = await harness.start(prepared);
|
||||
phase = "send";
|
||||
rawResult = await harness.send(session);
|
||||
phase = "resolve";
|
||||
result = await harness.resolveOutcome(session, rawResult);
|
||||
} catch (error) {
|
||||
let cleanupFailed = false;
|
||||
try {
|
||||
await harness.cleanup({
|
||||
prepared,
|
||||
@@ -118,6 +215,7 @@ export async function runAgentHarnessV2LifecycleAttempt(
|
||||
...(rawResult === undefined ? {} : { result: rawResult }),
|
||||
});
|
||||
} catch (cleanupError) {
|
||||
cleanupFailed = true;
|
||||
// Preserve the user-visible harness failure. Cleanup errors after a
|
||||
// failed lifecycle stage must not mask the actionable runtime error.
|
||||
log.warn("agent harness cleanup failed after attempt failure", {
|
||||
@@ -128,9 +226,30 @@ export async function runAgentHarnessV2LifecycleAttempt(
|
||||
originalError: formatErrorMessage(error),
|
||||
});
|
||||
}
|
||||
emitAgentHarnessRunError({
|
||||
harness,
|
||||
attemptParams: params,
|
||||
startedAt,
|
||||
phase,
|
||||
error,
|
||||
cleanupFailed,
|
||||
});
|
||||
throw error;
|
||||
}
|
||||
|
||||
await harness.cleanup({ prepared, session, result });
|
||||
try {
|
||||
phase = "cleanup";
|
||||
await harness.cleanup({ prepared, session, result });
|
||||
} catch (error) {
|
||||
emitAgentHarnessRunError({
|
||||
harness,
|
||||
attemptParams: params,
|
||||
startedAt,
|
||||
phase,
|
||||
error,
|
||||
});
|
||||
throw error;
|
||||
}
|
||||
emitAgentHarnessRunCompleted({ harness, attemptParams: params, result, startedAt });
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -256,6 +256,47 @@ export type DiagnosticRunCompletedEvent = DiagnosticRunBaseEvent & {
|
||||
errorCategory?: string;
|
||||
};
|
||||
|
||||
export type DiagnosticHarnessRunPhase = "prepare" | "start" | "send" | "resolve" | "cleanup";
|
||||
export type DiagnosticHarnessRunOutcome = "completed" | "aborted" | "timed_out" | "error";
|
||||
|
||||
type DiagnosticHarnessRunBaseEvent = DiagnosticBaseEvent & {
|
||||
type: "harness.run.started" | "harness.run.completed" | "harness.run.error";
|
||||
runId: string;
|
||||
sessionKey?: string;
|
||||
sessionId?: string;
|
||||
provider?: string;
|
||||
model?: string;
|
||||
trigger?: string;
|
||||
channel?: string;
|
||||
harnessId: string;
|
||||
pluginId?: string;
|
||||
};
|
||||
|
||||
export type DiagnosticHarnessRunStartedEvent = DiagnosticHarnessRunBaseEvent & {
|
||||
type: "harness.run.started";
|
||||
};
|
||||
|
||||
export type DiagnosticHarnessRunCompletedEvent = DiagnosticHarnessRunBaseEvent & {
|
||||
type: "harness.run.completed";
|
||||
durationMs: number;
|
||||
outcome: DiagnosticHarnessRunOutcome;
|
||||
resultClassification?: "empty" | "reasoning-only" | "planning-only";
|
||||
yieldDetected?: boolean;
|
||||
itemLifecycle?: {
|
||||
startedCount: number;
|
||||
completedCount: number;
|
||||
activeCount: number;
|
||||
};
|
||||
};
|
||||
|
||||
export type DiagnosticHarnessRunErrorEvent = DiagnosticHarnessRunBaseEvent & {
|
||||
type: "harness.run.error";
|
||||
durationMs: number;
|
||||
phase: DiagnosticHarnessRunPhase;
|
||||
errorCategory: string;
|
||||
cleanupFailed?: boolean;
|
||||
};
|
||||
|
||||
type DiagnosticModelCallBaseEvent = DiagnosticBaseEvent & {
|
||||
type: "model.call.started" | "model.call.completed" | "model.call.error";
|
||||
runId: string;
|
||||
@@ -392,6 +433,9 @@ export type DiagnosticEventPayload =
|
||||
| DiagnosticExecProcessCompletedEvent
|
||||
| DiagnosticRunStartedEvent
|
||||
| DiagnosticRunCompletedEvent
|
||||
| DiagnosticHarnessRunStartedEvent
|
||||
| DiagnosticHarnessRunCompletedEvent
|
||||
| DiagnosticHarnessRunErrorEvent
|
||||
| DiagnosticModelCallStartedEvent
|
||||
| DiagnosticModelCallCompletedEvent
|
||||
| DiagnosticModelCallErrorEvent
|
||||
@@ -446,6 +490,9 @@ const ASYNC_DIAGNOSTIC_EVENT_TYPES = new Set<DiagnosticEventPayload["type"]>([
|
||||
"model.call.started",
|
||||
"model.call.completed",
|
||||
"model.call.error",
|
||||
"harness.run.started",
|
||||
"harness.run.completed",
|
||||
"harness.run.error",
|
||||
"context.assembled",
|
||||
"log.record",
|
||||
]);
|
||||
|
||||
@@ -305,6 +305,34 @@ function sanitizeDiagnosticEvent(event: DiagnosticEventPayload): DiagnosticStabi
|
||||
record.outcome = event.outcome;
|
||||
assignReasonCode(record, event.errorCategory);
|
||||
break;
|
||||
case "harness.run.started":
|
||||
record.source = event.harnessId;
|
||||
record.pluginId = event.pluginId;
|
||||
record.provider = event.provider;
|
||||
record.model = event.model;
|
||||
record.channel = event.channel;
|
||||
break;
|
||||
case "harness.run.completed":
|
||||
record.source = event.harnessId;
|
||||
record.pluginId = event.pluginId;
|
||||
record.provider = event.provider;
|
||||
record.model = event.model;
|
||||
record.channel = event.channel;
|
||||
record.durationMs = event.durationMs;
|
||||
record.outcome = event.outcome;
|
||||
record.count = event.itemLifecycle?.completedCount;
|
||||
break;
|
||||
case "harness.run.error":
|
||||
record.source = event.harnessId;
|
||||
record.pluginId = event.pluginId;
|
||||
record.provider = event.provider;
|
||||
record.model = event.model;
|
||||
record.channel = event.channel;
|
||||
record.durationMs = event.durationMs;
|
||||
record.outcome = "error";
|
||||
record.action = event.phase;
|
||||
assignReasonCode(record, event.errorCategory);
|
||||
break;
|
||||
case "model.call.started":
|
||||
record.provider = event.provider;
|
||||
record.model = event.model;
|
||||
|
||||
Reference in New Issue
Block a user