[codex] Consolidate RuntimePlan and Harness V2 package (#71722)

* refactor: centralize runtime plan policy surface

* refactor: route embedded attempts through runtime plan

* feat: add agent harness v2 lifecycle adapter

* docs: document agent harness runtime plan

---------

Co-authored-by: Eva <eva@100yen.org>
Co-authored-by: Peter Steinberger <steipete@gmail.com>
This commit is contained in:
EVA
2026-04-26 06:07:04 +07:00
committed by GitHub
parent 114c9a2f3e
commit 2c35a6e599
22 changed files with 1685 additions and 149 deletions

View File

@@ -46,6 +46,23 @@ Before a harness is selected, OpenClaw has already resolved:
That split is intentional. A harness runs a prepared attempt; it does not pick
providers, replace channel delivery, or silently switch models.
The prepared attempt also includes `params.runtimePlan`, an OpenClaw-owned
policy bundle for runtime decisions that must stay shared across PI and native
harnesses:
- `runtimePlan.tools.normalize(...)` and
`runtimePlan.tools.logDiagnostics(...)` for provider-aware tool schema policy
- `runtimePlan.transcript.resolvePolicy(...)` for transcript sanitization and
tool-call repair policy
- `runtimePlan.delivery.isSilentPayload(...)` for shared `NO_REPLY` and media
delivery suppression
- `runtimePlan.outcome.classifyRunResult(...)` for model fallback classification
- `runtimePlan.observability` for resolved provider/model/harness metadata
Harnesses may use the plan for decisions that need to match PI behavior, but
should still treat it as host-owned attempt state. Do not mutate it or use it to
switch providers/models inside a turn.
## Register a harness
**Import:** `openclaw/plugin-sdk/agent-harness`
@@ -162,6 +179,16 @@ middleware, but new result transforms should use the runtime-neutral API.
The Pi-only `api.registerEmbeddedExtensionFactory(...)` hook has been removed;
Pi tool-result transforms must use runtime-neutral middleware.
### Terminal outcome classification
Native harnesses that own their own protocol projection can use
`classifyAgentHarnessTerminalOutcome(...)` from
`openclaw/plugin-sdk/agent-harness-runtime` when a completed turn produced no
visible assistant text. The helper returns `empty`, `reasoning-only`, or
`planning-only` so OpenClaw's fallback policy can decide whether to retry on a
different model. It intentionally leaves prompt errors, in-flight turns, and
intentional silent replies such as `NO_REPLY` unclassified.
### Native Codex harness mode
The bundled `codex` harness is the native Codex mode for embedded OpenClaw

View File

@@ -191,7 +191,7 @@ For the plugin authoring guide, see [Plugin SDK overview](/plugins/sdk-overview)
| `plugin-sdk/models-provider-runtime` | `/models` command/provider reply helpers |
| `plugin-sdk/skill-commands-runtime` | Skill command listing helpers |
| `plugin-sdk/native-command-registry` | Native command registry/build/serialize helpers |
| `plugin-sdk/agent-harness` | Experimental trusted-plugin surface for low-level agent harnesses: harness types, active-run steer/abort helpers, OpenClaw tool bridge helpers, tool progress formatting/detail helpers, and attempt result utilities |
| `plugin-sdk/agent-harness` | Experimental trusted-plugin surface for low-level agent harnesses: harness types, active-run steer/abort helpers, OpenClaw tool bridge helpers, runtime-plan tool policy helpers, terminal outcome classification, tool progress formatting/detail helpers, and attempt result utilities |
| `plugin-sdk/provider-zai-endpoint` | Z.AI endpoint detection helpers |
| `plugin-sdk/infra-runtime` | System event/heartbeat helpers |
| `plugin-sdk/collection-runtime` | Small bounded cache helpers |

View File

@@ -2,18 +2,19 @@ import type { AgentMessage } from "@mariozechner/pi-agent-core";
import type { AssistantMessage, Usage } from "@mariozechner/pi-ai";
import { SessionManager } from "@mariozechner/pi-coding-agent";
import {
classifyAgentHarnessTerminalOutcome,
embeddedAgentLog,
emitAgentEvent as emitGlobalAgentEvent,
formatErrorMessage,
formatToolAggregate,
formatToolProgressOutput,
inferToolMetaFromArgs,
normalizeUsage,
runAgentHarnessAfterCompactionHook,
runAgentHarnessBeforeCompactionHook,
TOOL_PROGRESS_OUTPUT_MAX_CHARS,
type EmbeddedRunAttemptParams,
type EmbeddedRunAttemptResult,
TOOL_PROGRESS_OUTPUT_MAX_CHARS,
formatToolAggregate,
type MessagingToolSend,
} from "openclaw/plugin-sdk/agent-harness-runtime";
import { readCodexTurn } from "./protocol-validators.js";
@@ -36,10 +37,6 @@ export type CodexAppServerToolTelemetry = {
successfulCronAdds?: number;
};
type AgentHarnessResultClassification = NonNullable<
EmbeddedRunAttemptResult["agentHarnessResultClassification"]
>;
const ZERO_USAGE: Usage = {
input: 0,
output: 0,
@@ -66,25 +63,6 @@ const CURRENT_TOKEN_USAGE_KEYS = [
const MAX_TOOL_OUTPUT_DELTA_MESSAGES_PER_ITEM = 20;
function classifyTerminalResult(params: {
assistantTexts: string[];
reasoningText: string;
planText: string;
promptError: unknown;
turnCompleted: boolean;
}): AgentHarnessResultClassification | undefined {
if (!params.turnCompleted || params.promptError || params.assistantTexts.length > 0) {
return undefined;
}
if (params.planText.trim()) {
return "planning-only";
}
if (params.reasoningText.trim()) {
return "reasoning-only";
}
return "empty";
}
export class CodexAppServerEventProjector {
private readonly assistantTextByItem = new Map<string, string>();
private readonly assistantItemOrder: string[] = [];
@@ -217,7 +195,7 @@ export class CodexAppServerEventProjector {
const promptError =
this.promptError ??
(turnFailed ? (this.completedTurn?.error?.message ?? "codex app-server turn failed") : null);
const agentHarnessResultClassification = classifyTerminalResult({
const agentHarnessResultClassification = classifyAgentHarnessTerminalOutcome({
assistantTexts,
reasoningText,
planText,

View File

@@ -9,6 +9,7 @@ import {
} from "openclaw/plugin-sdk/agent-harness";
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
import { __testing as nativeHookRelayTesting } from "../../../../src/agents/harness/native-hook-relay.js";
import { buildAgentRuntimePlan } from "../../../../src/agents/runtime-plan/build.js";
import {
onAgentEvent,
resetAgentEventsForTest,
@@ -52,6 +53,28 @@ function createParams(sessionFile: string, workspaceDir: string): EmbeddedRunAtt
} as EmbeddedRunAttemptParams;
}
function createParamsWithRuntimePlan(
sessionFile: string,
workspaceDir: string,
): EmbeddedRunAttemptParams {
const params = createParams(sessionFile, workspaceDir);
return {
...params,
runtimePlan: buildAgentRuntimePlan({
provider: params.provider,
modelId: params.modelId,
model: params.model,
modelApi: params.model.api,
harnessId: "codex",
harnessRuntime: "codex",
config: params.config,
workspaceDir,
agentDir: tempDir,
thinkingLevel: params.thinkLevel,
}),
} as EmbeddedRunAttemptParams;
}
function threadStartResult(threadId = "thread-1") {
return {
thread: {
@@ -364,7 +387,7 @@ describe("runCodexAppServerAttempt", () => {
sessionManager.appendMessage(assistantMessage("existing context", Date.now()));
const harness = createStartedThreadHarness();
const params = createParams(sessionFile, workspaceDir);
const params = createParamsWithRuntimePlan(sessionFile, workspaceDir);
params.onAgentEvent = onRunAgentEvent;
const run = runCodexAppServerAttempt(params);
await harness.waitForMethod("turn/start");
@@ -460,6 +483,8 @@ describe("runCodexAppServerAttempt", () => {
sessionId: "session-1",
provider: "codex",
model: "gpt-5.4-codex",
resolvedRef: "codex/gpt-5.4-codex",
harnessId: "codex",
assistantTexts: ["hello back"],
lastAssistant: expect.objectContaining({
role: "assistant",
@@ -675,9 +700,9 @@ describe("runCodexAppServerAttempt", () => {
return undefined;
});
await expect(runCodexAppServerAttempt(createParams(sessionFile, workspaceDir))).rejects.toThrow(
"turn start exploded",
);
await expect(
runCodexAppServerAttempt(createParamsWithRuntimePlan(sessionFile, workspaceDir)),
).rejects.toThrow("turn start exploded");
await vi.waitFor(() => expect(llmInput).toHaveBeenCalledTimes(1), { interval: 1 });
await vi.waitFor(() => expect(llmOutput).toHaveBeenCalledTimes(1), { interval: 1 });
@@ -687,6 +712,8 @@ describe("runCodexAppServerAttempt", () => {
assistantTexts: [],
model: "gpt-5.4-codex",
provider: "codex",
resolvedRef: "codex/gpt-5.4-codex",
harnessId: "codex",
runId: "run-1",
sessionId: "session-1",
}),

View File

@@ -14,7 +14,7 @@ import {
formatErrorMessage,
isActiveHarnessContextEngine,
isSubagentSessionKey,
normalizeProviderToolSchemas,
normalizeAgentRuntimeTools,
resolveAttemptSpawnWorkspaceDir,
resolveAgentHarnessBeforePromptBuildResult,
resolveModelAuthMode,
@@ -906,23 +906,17 @@ async function buildDynamicTools(input: DynamicToolBuildParams) {
params.toolsAllow && params.toolsAllow.length > 0
? visionFilteredTools.filter((tool) => params.toolsAllow?.includes(tool.name))
: visionFilteredTools;
return (
params.runtimePlan?.tools.normalize(filteredTools, {
workspaceDir: input.effectiveWorkspace,
modelApi: params.model.api,
model: params.model,
}) ??
normalizeProviderToolSchemas({
tools: filteredTools,
provider: params.provider,
config: params.config,
workspaceDir: input.effectiveWorkspace,
env: process.env,
modelId: params.modelId,
modelApi: params.model.api,
model: params.model,
})
);
return normalizeAgentRuntimeTools({
runtimePlan: params.runtimePlan,
tools: filteredTools,
provider: params.provider,
config: params.config,
workspaceDir: input.effectiveWorkspace,
env: process.env,
modelId: params.modelId,
modelApi: params.model.api,
model: params.model,
});
}
async function withCodexStartupTimeout<T>(params: {

View File

@@ -0,0 +1,26 @@
import type {
AgentHarness,
AgentHarnessAttemptParams,
AgentHarnessAttemptResult,
} from "./types.js";
export function applyAgentHarnessResultClassification(
harness: Pick<AgentHarness, "id" | "classify">,
result: AgentHarnessAttemptResult,
params: AgentHarnessAttemptParams,
): AgentHarnessAttemptResult {
if (!harness.classify) {
return { ...result, agentHarnessId: harness.id };
}
const { agentHarnessResultClassification: _previousClassification, ...resultWithoutPrevious } =
result;
const classification = harness.classify(resultWithoutPrevious, params);
if (!classification || classification === "ok") {
return { ...resultWithoutPrevious, agentHarnessId: harness.id };
}
return {
...resultWithoutPrevious,
agentHarnessId: harness.id,
agentHarnessResultClassification: classification,
};
}

View File

@@ -20,6 +20,7 @@ import type { EmbeddedPiCompactResult } from "../pi-embedded-runner/types.js";
import { createPiAgentHarness } from "./builtin-pi.js";
import { listRegisteredAgentHarnesses } from "./registry.js";
import type { AgentHarness, AgentHarnessSupport } from "./types.js";
import { adaptAgentHarnessToV2, runAgentHarnessV2LifecycleAttempt } from "./v2.js";
const log = createSubsystemLogger("agents/harness");
@@ -187,14 +188,13 @@ export async function runAgentHarnessAttemptWithFallback(
sessionKey: params.sessionKey,
agentId: params.agentId,
});
const v2Harness = adaptAgentHarnessToV2(harness);
if (harness.id === "pi") {
const result = await harness.runAttempt(params);
return applyHarnessResultClassification(harness, result, params);
return await runAgentHarnessV2LifecycleAttempt(v2Harness, params);
}
try {
const result = await harness.runAttempt(params);
return applyHarnessResultClassification(harness, result, params);
return await runAgentHarnessV2LifecycleAttempt(v2Harness, params);
} catch (error) {
log.warn(`${harness.label} failed; not falling back to embedded PI backend`, {
harnessId: harness.id,
@@ -263,22 +263,6 @@ function logAgentHarnessSelection(
});
}
function applyHarnessResultClassification(
harness: AgentHarness,
result: EmbeddedRunAttemptResult,
params: EmbeddedRunAttemptParams,
): EmbeddedRunAttemptResult {
const classification = harness.classify?.(result, params);
if (!classification || classification === "ok") {
return { ...result, agentHarnessId: harness.id };
}
return {
...result,
agentHarnessId: harness.id,
agentHarnessResultClassification: classification,
};
}
function resolvePinnedAgentHarnessPolicy(
agentHarnessId: string | undefined,
): AgentHarnessPolicy | undefined {

View File

@@ -0,0 +1,399 @@
import type { Api, Model } from "@mariozechner/pi-ai";
import { describe, expect, it, vi } from "vitest";
import type { EmbeddedRunAttemptResult } from "../pi-embedded-runner/run/types.js";
import type { AgentHarness, AgentHarnessAttemptParams } from "./types.js";
import type { AgentHarnessV2 } from "./v2.js";
import { adaptAgentHarnessToV2, runAgentHarnessV2LifecycleAttempt } from "./v2.js";
function createAttemptParams(): AgentHarnessAttemptParams {
return {
prompt: "hello",
sessionId: "session-1",
runId: "run-1",
sessionFile: "/tmp/session.jsonl",
workspaceDir: "/tmp/workspace",
timeoutMs: 5_000,
provider: "codex",
modelId: "gpt-5.4",
model: { id: "gpt-5.4", provider: "codex" } as Model<Api>,
authStorage: {} as never,
modelRegistry: {} as never,
thinkLevel: "low",
} as AgentHarnessAttemptParams;
}
function createAttemptResult(): EmbeddedRunAttemptResult {
return {
aborted: false,
externalAbort: false,
timedOut: false,
idleTimedOut: false,
timedOutDuringCompaction: false,
promptError: null,
promptErrorSource: null,
sessionIdUsed: "session-1",
messagesSnapshot: [],
assistantTexts: ["ok"],
toolMetas: [],
lastAssistant: undefined,
didSendViaMessagingTool: false,
messagingToolSentTexts: [],
messagingToolSentMediaUrls: [],
messagingToolSentTargets: [],
cloudCodeAssistFormatError: false,
replayMetadata: { hadPotentialSideEffects: false, replaySafe: true },
itemLifecycle: { startedCount: 0, completedCount: 0, activeCount: 0 },
};
}
describe("AgentHarness V2 compatibility adapter", () => {
it("executes prepare/start/send/outcome/cleanup as one bounded lifecycle", async () => {
const params = createAttemptParams();
const result = createAttemptResult();
const events: string[] = [];
const harness: AgentHarnessV2 = {
id: "native-v2",
label: "Native V2",
supports: () => ({ supported: true }),
prepare: async (attemptParams) => {
events.push("prepare");
expect(attemptParams).toBe(params);
return {
harnessId: "native-v2",
label: "Native V2",
params,
lifecycleState: "prepared",
};
},
start: async (prepared) => {
events.push(`start:${prepared.lifecycleState}`);
return { ...prepared, lifecycleState: "started" };
},
send: async (session) => {
events.push(`send:${session.lifecycleState}`);
return result;
},
resolveOutcome: async (session, rawResult) => {
events.push(`outcome:${session.lifecycleState}`);
return { ...rawResult, agentHarnessId: session.harnessId };
},
cleanup: async ({ prepared, session, result: cleanupResult, error }) => {
expect(prepared?.lifecycleState).toBe("prepared");
expect(session?.lifecycleState).toBe("started");
if (!session) {
throw new Error("expected started session during successful cleanup");
}
events.push(`cleanup:${session.lifecycleState}`);
expect(cleanupResult).toMatchObject({ agentHarnessId: "native-v2" });
expect(error).toBeUndefined();
},
};
await expect(runAgentHarnessV2LifecycleAttempt(harness, params)).resolves.toMatchObject({
agentHarnessId: "native-v2",
sessionIdUsed: "session-1",
});
expect(events).toEqual([
"prepare",
"start:prepared",
"send:started",
"outcome:started",
"cleanup:started",
]);
});
it("runs cleanup with the original failure and preserves that failure", async () => {
const params = createAttemptParams();
const sendError = new Error("codex app-server send failed");
const cleanup = vi.fn(async () => {
throw new Error("cleanup should not mask send failure");
});
const harness: AgentHarnessV2 = {
id: "native-v2",
label: "Native V2",
supports: () => ({ supported: true }),
prepare: async () => ({
harnessId: "native-v2",
label: "Native V2",
params,
lifecycleState: "prepared",
}),
start: async (prepared) => ({ ...prepared, lifecycleState: "started" }),
send: async () => {
throw sendError;
},
resolveOutcome: async (_session, rawResult) => rawResult,
cleanup,
};
await expect(runAgentHarnessV2LifecycleAttempt(harness, params)).rejects.toThrow(
"codex app-server send failed",
);
expect(cleanup).toHaveBeenCalledWith(
expect.objectContaining({
error: sendError,
prepared: expect.objectContaining({ lifecycleState: "prepared" }),
session: expect.objectContaining({ lifecycleState: "started" }),
}),
);
});
it("runs cleanup for failed prepare/start lifecycle stages", async () => {
const params = createAttemptParams();
const startError = new Error("codex app-server start failed");
const cleanup = vi.fn(async () => {});
const harness: AgentHarnessV2 = {
id: "native-v2",
label: "Native V2",
supports: () => ({ supported: true }),
prepare: async () => ({
harnessId: "native-v2",
label: "Native V2",
params,
lifecycleState: "prepared",
}),
start: async () => {
throw startError;
},
send: async () => createAttemptResult(),
resolveOutcome: async (_session, rawResult) => rawResult,
cleanup,
};
await expect(runAgentHarnessV2LifecycleAttempt(harness, params)).rejects.toThrow(
"codex app-server start failed",
);
expect(cleanup).toHaveBeenCalledWith({
error: startError,
prepared: expect.objectContaining({ lifecycleState: "prepared" }),
session: undefined,
});
});
it("passes raw send results to cleanup when outcome resolution fails", async () => {
const params = createAttemptParams();
const rawResult = createAttemptResult();
const outcomeError = new Error("outcome classification failed");
const cleanup = vi.fn(async () => {});
const harness: AgentHarnessV2 = {
id: "native-v2",
label: "Native V2",
supports: () => ({ supported: true }),
prepare: async () => ({
harnessId: "native-v2",
label: "Native V2",
params,
lifecycleState: "prepared",
}),
start: async (prepared) => ({ ...prepared, lifecycleState: "started" }),
send: async () => rawResult,
resolveOutcome: async () => {
throw outcomeError;
},
cleanup,
};
await expect(runAgentHarnessV2LifecycleAttempt(harness, params)).rejects.toThrow(
"outcome classification failed",
);
expect(cleanup).toHaveBeenCalledWith(
expect.objectContaining({
error: outcomeError,
result: rawResult,
prepared: expect.objectContaining({ lifecycleState: "prepared" }),
session: expect.objectContaining({ lifecycleState: "started" }),
}),
);
});
it("surfaces cleanup failures after successful outcomes", async () => {
const params = createAttemptParams();
const harness: AgentHarnessV2 = {
id: "native-v2",
label: "Native V2",
supports: () => ({ supported: true }),
prepare: async () => ({
harnessId: "native-v2",
label: "Native V2",
params,
lifecycleState: "prepared",
}),
start: async (prepared) => ({ ...prepared, lifecycleState: "started" }),
send: async () => createAttemptResult(),
resolveOutcome: async (_session, rawResult) => rawResult,
cleanup: async () => {
throw new Error("cleanup failed");
},
};
await expect(runAgentHarnessV2LifecycleAttempt(harness, params)).rejects.toThrow(
"cleanup failed",
);
});
it("runs a V1 harness through prepare/start/send without changing attempt params", async () => {
const params = createAttemptParams();
const result = createAttemptResult();
const runAttempt = vi.fn(async () => result);
const harness: AgentHarness = {
id: "codex",
label: "Codex",
pluginId: "codex-plugin",
supports: () => ({ supported: true, priority: 100 }),
runAttempt,
};
const v2 = adaptAgentHarnessToV2(harness);
const prepared = await v2.prepare(params);
const session = await v2.start(prepared);
expect(v2.resume).toBeUndefined();
expect(await v2.send(session)).toBe(result);
expect(runAttempt).toHaveBeenCalledWith(params);
expect(session).toMatchObject({
harnessId: "codex",
label: "Codex",
pluginId: "codex-plugin",
params,
lifecycleState: "started",
});
expect(prepared.lifecycleState).toBe("prepared");
});
it("keeps result classification as an explicit outcome stage", async () => {
const params = createAttemptParams();
const result = createAttemptResult();
const classify = vi.fn<NonNullable<AgentHarness["classify"]>>(() => "empty");
const harness: AgentHarness = {
id: "codex",
label: "Codex",
supports: () => ({ supported: true }),
runAttempt: vi.fn(async () => result),
classify,
};
const v2 = adaptAgentHarnessToV2(harness);
const session = await v2.start(await v2.prepare(params));
expect(await v2.resolveOutcome(session, result)).toMatchObject({
agentHarnessId: "codex",
agentHarnessResultClassification: "empty",
});
expect(harness.classify).toHaveBeenCalledWith(result, params);
});
it("preserves harness-supplied classification when no classify hook is registered", async () => {
const params = createAttemptParams();
const result = {
...createAttemptResult(),
agentHarnessResultClassification: "reasoning-only",
} as EmbeddedRunAttemptResult;
const harness: AgentHarness = {
id: "codex",
label: "Codex",
supports: () => ({ supported: true }),
runAttempt: vi.fn(async () => result),
};
const v2 = adaptAgentHarnessToV2(harness);
const session = await v2.start(await v2.prepare(params));
expect(await v2.resolveOutcome(session, result)).toMatchObject({
agentHarnessId: "codex",
agentHarnessResultClassification: "reasoning-only",
});
});
it("clears stale non-ok classification when classification resolves to ok", async () => {
const params = createAttemptParams();
const result = {
...createAttemptResult(),
agentHarnessResultClassification: "empty",
} as EmbeddedRunAttemptResult;
const classify = vi.fn<NonNullable<AgentHarness["classify"]>>(() => "ok");
const harness: AgentHarness = {
id: "codex",
label: "Codex",
supports: () => ({ supported: true }),
runAttempt: vi.fn(async () => result),
classify,
};
const v2 = adaptAgentHarnessToV2(harness);
const session = await v2.start(await v2.prepare(params));
const classified = await v2.resolveOutcome(session, result);
expect(classified).toMatchObject({ agentHarnessId: "codex" });
expect(classified).not.toHaveProperty("agentHarnessResultClassification");
});
it("preserves existing compact/reset/dispose hook this binding as compatibility methods", async () => {
const harness: AgentHarness & {
compactCalls: number;
resetCalls: number;
disposeCalls: number;
} = {
id: "custom",
label: "Custom",
compactCalls: 0,
resetCalls: 0,
disposeCalls: 0,
supports: () => ({ supported: true }),
runAttempt: vi.fn(async () => createAttemptResult()),
async compact() {
this.compactCalls += 1;
return {
ok: true,
compacted: true,
result: {
summary: "done",
firstKeptEntryId: "entry-1",
tokensBefore: 100,
},
};
},
reset(params) {
expect(params).toEqual({ reason: "reset" });
this.resetCalls += 1;
},
dispose() {
this.disposeCalls += 1;
},
};
const v2 = adaptAgentHarnessToV2(harness);
await expect(
v2.compact?.({
sessionId: "session-1",
sessionFile: "/tmp/session.jsonl",
workspaceDir: "/tmp/workspace",
}),
).resolves.toMatchObject({
compacted: true,
});
await v2.reset?.({ reason: "reset" });
await v2.dispose?.();
expect(harness.compactCalls).toBe(1);
expect(harness.resetCalls).toBe(1);
expect(harness.disposeCalls).toBe(1);
});
it("does not dispose V1 harnesses during per-attempt cleanup", async () => {
const dispose = vi.fn();
const harness: AgentHarness = {
id: "custom",
label: "Custom",
supports: () => ({ supported: true }),
runAttempt: vi.fn(async () => createAttemptResult()),
dispose,
};
const v2 = adaptAgentHarnessToV2(harness);
const session = await v2.start(await v2.prepare(createAttemptParams()));
await v2.cleanup({ session, result: createAttemptResult() });
expect(dispose).not.toHaveBeenCalled();
});
});

136
src/agents/harness/v2.ts Normal file
View File

@@ -0,0 +1,136 @@
import { formatErrorMessage } from "../../infra/errors.js";
import { createSubsystemLogger } from "../../logging/subsystem.js";
import { applyAgentHarnessResultClassification } from "./result-classification.js";
import type {
AgentHarness,
AgentHarnessAttemptParams,
AgentHarnessAttemptResult,
AgentHarnessCompactParams,
AgentHarnessCompactResult,
AgentHarnessResetParams,
AgentHarnessSupport,
AgentHarnessSupportContext,
} from "./types.js";
const log = createSubsystemLogger("agents/harness/v2");
type AgentHarnessV2RunBase = {
harnessId: string;
label: string;
pluginId?: string;
params: AgentHarnessAttemptParams;
};
export type AgentHarnessV2PreparedRun = AgentHarnessV2RunBase & {
lifecycleState: "prepared";
};
export type AgentHarnessV2Session = AgentHarnessV2RunBase & {
lifecycleState: "started";
};
export type AgentHarnessV2ToolCall = {
id?: string;
name: string;
input?: unknown;
};
export type AgentHarnessV2CleanupParams = {
prepared?: AgentHarnessV2PreparedRun;
session?: AgentHarnessV2Session;
result?: AgentHarnessAttemptResult;
error?: unknown;
};
export type AgentHarnessV2 = {
id: string;
label: string;
pluginId?: string;
supports(ctx: AgentHarnessSupportContext): AgentHarnessSupport;
prepare(params: AgentHarnessAttemptParams): Promise<AgentHarnessV2PreparedRun>;
start(prepared: AgentHarnessV2PreparedRun): Promise<AgentHarnessV2Session>;
resume?(session: AgentHarnessV2Session): Promise<AgentHarnessV2Session>;
send(session: AgentHarnessV2Session): Promise<AgentHarnessAttemptResult>;
handleToolCall?(session: AgentHarnessV2Session, call: AgentHarnessV2ToolCall): Promise<unknown>;
resolveOutcome(
session: AgentHarnessV2Session,
result: AgentHarnessAttemptResult,
): Promise<AgentHarnessAttemptResult>;
cleanup(params: AgentHarnessV2CleanupParams): Promise<void>;
compact?(params: AgentHarnessCompactParams): Promise<AgentHarnessCompactResult | undefined>;
reset?(params: AgentHarnessResetParams): Promise<void> | void;
dispose?(): Promise<void> | void;
};
export function adaptAgentHarnessToV2(harness: AgentHarness): AgentHarnessV2 {
return {
id: harness.id,
label: harness.label,
pluginId: harness.pluginId,
supports: (ctx) => harness.supports(ctx),
prepare: async (params) => ({
harnessId: harness.id,
label: harness.label,
pluginId: harness.pluginId,
params,
lifecycleState: "prepared",
}),
start: async (prepared) => ({
harnessId: prepared.harnessId,
label: prepared.label,
pluginId: prepared.pluginId,
params: prepared.params,
lifecycleState: "started",
}),
send: async (session) => harness.runAttempt(session.params),
resolveOutcome: async (session, result) =>
applyAgentHarnessResultClassification(harness, result, session.params),
cleanup: async (_params) => {
// V1 harnesses have no per-attempt cleanup hook. Global cleanup remains
// on dispose(), which must not run after every attempt.
},
compact: harness.compact ? (params) => harness.compact!(params) : undefined,
reset: harness.reset ? (params) => harness.reset!(params) : undefined,
dispose: harness.dispose ? () => harness.dispose!() : undefined,
};
}
export async function runAgentHarnessV2LifecycleAttempt(
harness: AgentHarnessV2,
params: AgentHarnessAttemptParams,
): Promise<AgentHarnessAttemptResult> {
let prepared: AgentHarnessV2PreparedRun | undefined;
let session: AgentHarnessV2Session | undefined;
let rawResult: AgentHarnessAttemptResult | undefined;
let result: AgentHarnessAttemptResult;
try {
prepared = await harness.prepare(params);
session = await harness.start(prepared);
rawResult = await harness.send(session);
result = await harness.resolveOutcome(session, rawResult);
} catch (error) {
try {
await harness.cleanup({
prepared,
session,
error,
...(rawResult === undefined ? {} : { result: rawResult }),
});
} catch (cleanupError) {
// Preserve the user-visible harness failure. Cleanup errors after a
// failed lifecycle stage must not mask the actionable runtime error.
log.warn("agent harness cleanup failed after attempt failure", {
harnessId: harness.id,
provider: params.provider,
modelId: params.modelId,
error: formatErrorMessage(cleanupError),
originalError: formatErrorMessage(error),
});
}
throw error;
}
await harness.cleanup({ prepared, session, result });
return result;
}

View File

@@ -1,4 +1,7 @@
import { beforeAll, beforeEach, describe, expect, it, vi } from "vitest";
import type { AgentHarness } from "../harness/types.js";
import type { AgentInternalEvent } from "../internal-events.js";
import type { AgentRuntimePlan } from "../runtime-plan/types.js";
import {
makeAttemptResult,
makeCompactionSuccess,
@@ -8,6 +11,7 @@ import {
} from "./run.overflow-compaction.fixture.js";
import {
loadRunOverflowCompactionHarness,
mockedBuildAgentRuntimePlan,
mockedBuildEmbeddedRunPayloads,
mockedCoerceToFailoverError,
mockedCompactDirect,
@@ -26,8 +30,111 @@ import {
overflowBaseRunParams,
resetRunOverflowCompactionHarnessMocks,
} from "./run.overflow-compaction.harness.js";
import type { RunEmbeddedPiAgentParams } from "./run/params.js";
import type { EmbeddedRunAttemptParams } from "./run/types.js";
let runEmbeddedPiAgent: typeof import("./run.js").runEmbeddedPiAgent;
type RuntimePlanOverrides = Partial<Omit<AgentRuntimePlan, "auth" | "resolvedRef">> & {
auth?: Partial<AgentRuntimePlan["auth"]>;
resolvedRef?: Partial<AgentRuntimePlan["resolvedRef"]>;
};
function makeForwardingCase(internalEvents: AgentInternalEvent[]) {
return {
runId: "forward-attempt-params",
params: {
toolsAllow: ["exec", "read"],
bootstrapContextMode: "lightweight",
bootstrapContextRunKind: "cron",
disableMessageTool: true,
forceMessageTool: true,
requireExplicitMessageTarget: true,
internalEvents,
},
expected: {
toolsAllow: ["exec", "read"],
bootstrapContextMode: "lightweight",
bootstrapContextRunKind: "cron",
disableMessageTool: true,
forceMessageTool: true,
requireExplicitMessageTarget: true,
},
} satisfies {
runId: string;
params: Partial<RunEmbeddedPiAgentParams>;
expected: Record<string, unknown>;
};
}
function makeForwardedRuntimePlan(overrides: RuntimePlanOverrides = {}): AgentRuntimePlan {
const transcriptPolicy = {
sanitizeMode: "full",
sanitizeToolCallIds: true,
preserveNativeAnthropicToolUseIds: false,
repairToolUseResultPairing: true,
preserveSignatures: false,
sanitizeThinkingSignatures: true,
dropThinkingBlocks: false,
applyGoogleTurnOrdering: false,
validateGeminiTurns: false,
validateAnthropicTurns: false,
allowSyntheticToolResults: false,
} satisfies AgentRuntimePlan["transcript"]["policy"];
const basePlan: AgentRuntimePlan = {
auth: {
authProfileProviderForAuth: "anthropic",
providerForAuth: "anthropic",
},
delivery: {
isSilentPayload: vi.fn(() => false),
resolveFollowupRoute: vi.fn(),
},
observability: {
provider: "anthropic",
resolvedRef: "anthropic/test-model",
modelId: "test-model",
},
outcome: {
classifyRunResult: vi.fn(() => undefined),
},
prompt: {
provider: "anthropic",
modelId: "test-model",
resolveSystemPromptContribution: vi.fn(),
},
transcript: {
policy: transcriptPolicy,
resolvePolicy: vi.fn((params): AgentRuntimePlan["transcript"]["policy"] => ({
...transcriptPolicy,
sanitizeMode: params?.modelApi === "anthropic-messages" ? "full" : "images-only",
})),
},
transport: {
extraParams: {},
resolveExtraParams: vi.fn(() => ({})),
},
resolvedRef: {
provider: "anthropic",
modelId: "test-model",
harnessId: "pi",
},
tools: {
normalize: vi.fn((tools) => tools),
logDiagnostics: vi.fn(),
},
};
return {
...basePlan,
...overrides,
auth: {
...basePlan.auth,
...overrides.auth,
},
resolvedRef: {
...basePlan.resolvedRef,
...overrides.resolvedRef,
},
};
}
describe("runEmbeddedPiAgent overflow compaction trigger routing", () => {
beforeAll(async () => {
@@ -83,9 +190,61 @@ describe("runEmbeddedPiAgent overflow compaction trigger routing", () => {
);
});
it("forwards optional attempt params and the runtime plan into one attempt call", async () => {
const internalEvents: AgentInternalEvent[] = [];
const forwardingCase = makeForwardingCase(internalEvents);
const runtimePlan = makeForwardedRuntimePlan();
mockedBuildAgentRuntimePlan.mockReturnValueOnce(runtimePlan);
mockedRunEmbeddedAttempt.mockResolvedValueOnce(makeAttemptResult({ promptError: null }));
await runEmbeddedPiAgent({
...overflowBaseRunParams,
...forwardingCase.params,
runId: forwardingCase.runId,
});
expect(mockedBuildAgentRuntimePlan).toHaveBeenCalledTimes(1);
expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(1);
expect(mockedRunEmbeddedAttempt).toHaveBeenCalledWith(
expect.objectContaining({
...forwardingCase.expected,
runtimePlan: expect.objectContaining({
resolvedRef: expect.objectContaining({
provider: "anthropic",
modelId: "test-model",
}),
tools: expect.objectContaining({
normalize: expect.any(Function),
}),
transport: expect.objectContaining({
resolveExtraParams: expect.any(Function),
}),
}),
}),
);
const attemptParams = mockedRunEmbeddedAttempt.mock.calls[0]?.[0] as
| EmbeddedRunAttemptParams
| undefined;
expect(attemptParams?.runtimePlan).toBe(runtimePlan);
expect(attemptParams?.internalEvents).toBe(internalEvents);
});
it("forwards explicit OpenAI Codex auth profiles to codex plugin harnesses", async () => {
const { clearAgentHarnesses, registerAgentHarness } = await import("../harness/registry.js");
const pluginRunAttempt = vi.fn(async () => makeAttemptResult({ assistantTexts: ["ok"] }));
const pluginRunAttempt = vi.fn<AgentHarness["runAttempt"]>(async () =>
makeAttemptResult({ assistantTexts: ["ok"] }),
);
const runtimePlan = makeForwardedRuntimePlan({
resolvedRef: {
provider: "codex",
modelId: "gpt-5.4",
harnessId: "codex",
},
auth: {
harnessAuthProvider: "openai-codex",
forwardedAuthProfileId: "openai-codex:work",
},
});
clearAgentHarnesses();
registerAgentHarness({
id: "codex",
@@ -94,6 +253,7 @@ describe("runEmbeddedPiAgent overflow compaction trigger routing", () => {
ctx.provider === "codex" ? { supported: true, priority: 100 } : { supported: false },
runAttempt: pluginRunAttempt,
});
mockedBuildAgentRuntimePlan.mockReturnValueOnce(runtimePlan);
mockedGetApiKeyForModel.mockRejectedValueOnce(new Error("generic auth should be skipped"));
try {
@@ -117,18 +277,47 @@ describe("runEmbeddedPiAgent overflow compaction trigger routing", () => {
}
expect(mockedGetApiKeyForModel).not.toHaveBeenCalled();
expect(mockedBuildAgentRuntimePlan).toHaveBeenCalledTimes(1);
expect(pluginRunAttempt).toHaveBeenCalledTimes(1);
expect(pluginRunAttempt).toHaveBeenCalledWith(
expect.objectContaining({
provider: "codex",
authProfileId: "openai-codex:work",
authProfileIdSource: "user",
runtimePlan: expect.objectContaining({
resolvedRef: expect.objectContaining({
provider: "codex",
modelId: "gpt-5.4",
harnessId: "codex",
}),
auth: expect.objectContaining({
harnessAuthProvider: "openai-codex",
forwardedAuthProfileId: "openai-codex:work",
}),
}),
}),
);
const harnessParams = pluginRunAttempt.mock.calls[0]?.[0];
expect(harnessParams?.runtimePlan).toBe(runtimePlan);
});
it("forwards OpenAI Codex auth profiles when openai/* is forced through codex", async () => {
const { clearAgentHarnesses, registerAgentHarness } = await import("../harness/registry.js");
const pluginRunAttempt = vi.fn(async () => makeAttemptResult({ assistantTexts: ["ok"] }));
const pluginRunAttempt = vi.fn<AgentHarness["runAttempt"]>(async () =>
makeAttemptResult({ assistantTexts: ["ok"] }),
);
const runtimePlan = makeForwardedRuntimePlan({
resolvedRef: {
provider: "openai",
modelId: "gpt-5.4",
harnessId: "codex",
},
auth: {
providerForAuth: "openai",
harnessAuthProvider: "openai-codex",
forwardedAuthProfileId: "openai-codex:work",
},
});
clearAgentHarnesses();
registerAgentHarness({
id: "codex",
@@ -136,6 +325,7 @@ describe("runEmbeddedPiAgent overflow compaction trigger routing", () => {
supports: () => ({ supported: false }),
runAttempt: pluginRunAttempt,
});
mockedBuildAgentRuntimePlan.mockReturnValueOnce(runtimePlan);
mockedGetApiKeyForModel.mockRejectedValueOnce(new Error("generic auth should be skipped"));
try {
@@ -159,13 +349,29 @@ describe("runEmbeddedPiAgent overflow compaction trigger routing", () => {
}
expect(mockedGetApiKeyForModel).not.toHaveBeenCalled();
expect(mockedBuildAgentRuntimePlan).toHaveBeenCalledTimes(1);
expect(pluginRunAttempt).toHaveBeenCalledTimes(1);
expect(pluginRunAttempt).toHaveBeenCalledWith(
expect.objectContaining({
provider: "openai",
authProfileId: "openai-codex:work",
authProfileIdSource: "user",
runtimePlan: expect.objectContaining({
resolvedRef: expect.objectContaining({
provider: "openai",
modelId: "gpt-5.4",
harnessId: "codex",
}),
auth: expect.objectContaining({
providerForAuth: "openai",
harnessAuthProvider: "openai-codex",
forwardedAuthProfileId: "openai-codex:work",
}),
}),
}),
);
const harnessParams = pluginRunAttempt.mock.calls[0]?.[0];
expect(harnessParams?.runtimePlan).toBe(runtimePlan);
});
it("blocks undersized models before dispatching a provider attempt", async () => {

View File

@@ -0,0 +1,93 @@
import { beforeEach, describe, expect, it, vi } from "vitest";
import type { ProviderRuntimeModel } from "../../../plugins/provider-runtime-model.types.js";
import type { AgentRuntimePlan } from "../../runtime-plan/types.js";
import { resolveAttemptTranscriptPolicy } from "./attempt.transcript-policy.js";
const resolveProviderRuntimePluginMock = vi.hoisted(() => vi.fn());
vi.mock("../../../plugins/provider-hook-runtime.js", () => ({
resolveProviderRuntimePlugin: resolveProviderRuntimePluginMock,
}));
describe("resolveAttemptTranscriptPolicy", () => {
beforeEach(() => {
resolveProviderRuntimePluginMock.mockReset();
resolveProviderRuntimePluginMock.mockReturnValue(undefined);
});
it("uses RuntimePlan transcript policy when available", () => {
const plannedPolicy = {
sanitizeMode: "full",
sanitizeToolCallIds: true,
toolCallIdMode: "strict",
preserveNativeAnthropicToolUseIds: false,
repairToolUseResultPairing: true,
preserveSignatures: true,
sanitizeThinkingSignatures: false,
dropThinkingBlocks: true,
applyGoogleTurnOrdering: false,
validateGeminiTurns: false,
validateAnthropicTurns: true,
allowSyntheticToolResults: true,
} as const;
const resolvePolicy = vi.fn(() => plannedPolicy);
const runtimePlan = {
transcript: {
resolvePolicy,
},
} as unknown as AgentRuntimePlan;
const runtimePlanModelContext = {
workspaceDir: "/tmp/openclaw-transcript-policy",
modelApi: "anthropic-messages",
model: {
id: "claude-opus-4.6",
name: "Claude Opus 4.6",
api: "anthropic-messages",
provider: "anthropic",
baseUrl: "https://api.anthropic.com",
reasoning: true,
input: ["text"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 200_000,
maxTokens: 8_192,
} satisfies ProviderRuntimeModel,
};
expect(
resolveAttemptTranscriptPolicy({
runtimePlan,
runtimePlanModelContext,
provider: "anthropic",
modelId: "claude-opus-4.6",
}),
).toBe(plannedPolicy);
expect(resolvePolicy).toHaveBeenCalledWith(runtimePlanModelContext);
});
it("keeps the legacy provider transcript fallback when no RuntimePlan is available", () => {
const env = { OPENCLAW_TEST_TRANSCRIPT_POLICY: "1" } as NodeJS.ProcessEnv;
const policy = resolveAttemptTranscriptPolicy({
runtimePlanModelContext: {
workspaceDir: "/tmp/openclaw-transcript-policy",
modelApi: "openai-responses",
},
provider: "custom-openai-compatible",
modelId: "gpt-5.4",
env,
});
expect(policy).toMatchObject({
sanitizeMode: "images-only",
sanitizeToolCallIds: true,
toolCallIdMode: "strict",
repairToolUseResultPairing: true,
allowSyntheticToolResults: false,
});
expect(resolveProviderRuntimePluginMock).toHaveBeenCalledWith({
provider: "custom-openai-compatible",
config: undefined,
workspaceDir: "/tmp/openclaw-transcript-policy",
env,
});
});
});

View File

@@ -0,0 +1,36 @@
import type { OpenClawConfig } from "../../../config/types.openclaw.js";
import type { ProviderRuntimeModel } from "../../../plugins/provider-runtime-model.types.js";
import type { AgentRuntimePlan } from "../../runtime-plan/types.js";
import { resolveTranscriptPolicy, type TranscriptPolicy } from "../../transcript-policy.js";
export type AttemptRuntimeModelContext = NonNullable<
Parameters<AgentRuntimePlan["transcript"]["resolvePolicy"]>[0]
>;
function asProviderRuntimeModel(
model: AttemptRuntimeModelContext["model"],
): ProviderRuntimeModel | undefined {
return typeof model?.id === "string" ? (model as ProviderRuntimeModel) : undefined;
}
export function resolveAttemptTranscriptPolicy(params: {
runtimePlan?: AgentRuntimePlan;
runtimePlanModelContext: AttemptRuntimeModelContext;
provider: string;
modelId: string;
config?: OpenClawConfig;
env?: NodeJS.ProcessEnv;
}): TranscriptPolicy {
return (
params.runtimePlan?.transcript.resolvePolicy(params.runtimePlanModelContext) ??
resolveTranscriptPolicy({
modelApi: params.runtimePlanModelContext.modelApi,
provider: params.provider,
modelId: params.modelId,
config: params.config,
workspaceDir: params.runtimePlanModelContext.workspaceDir,
env: params.env ?? process.env,
model: asProviderRuntimeModel(params.runtimePlanModelContext.model),
})
);
}

View File

@@ -117,6 +117,10 @@ import {
import { wrapStreamFnTextTransforms } from "../../plugin-text-transforms.js";
import { describeProviderRequestRoutingSummary } from "../../provider-attribution.js";
import { registerProviderStreamForModel } from "../../provider-stream.js";
import {
logAgentRuntimeToolDiagnostics,
normalizeAgentRuntimeTools,
} from "../../runtime-plan/tools.js";
import { resolveSandboxContext } from "../../sandbox.js";
import { resolveSandboxRuntimeStatus } from "../../sandbox/runtime-status.js";
import { repairSessionFileIfNeeded } from "../../session-file-repair.js";
@@ -148,10 +152,7 @@ import {
collectExplicitToolAllowlistSources,
} from "../../tool-allowlist-guard.js";
import { UNKNOWN_TOOL_THRESHOLD } from "../../tool-loop-detection.js";
import {
resolveTranscriptPolicy,
shouldAllowProviderOwnedThinkingReplay,
} from "../../transcript-policy.js";
import { shouldAllowProviderOwnedThinkingReplay } from "../../transcript-policy.js";
import { normalizeUsage, type NormalizedUsage } from "../../usage.js";
import { DEFAULT_BOOTSTRAP_FILENAME } from "../../workspace.js";
import { isRunnerAbortError } from "../abort.js";
@@ -219,10 +220,6 @@ import {
resolveLiveToolResultMaxChars,
truncateOversizedToolResultsInSessionManager,
} from "../tool-result-truncation.js";
import {
logProviderToolSchemaDiagnostics,
normalizeProviderToolSchemas,
} from "../tool-schema-runtime.js";
import { splitSdkTools } from "../tool-split.js";
import { mapThinkingLevel } from "../utils.js";
import { flushPendingToolResultsAfterIdle } from "../wait-for-idle-before-flush.js";
@@ -290,6 +287,7 @@ import {
wrapStreamFnTrimToolCallNames,
} from "./attempt.tool-call-normalization.js";
import { buildEmbeddedAttemptToolRunContext } from "./attempt.tool-run-context.js";
import { resolveAttemptTranscriptPolicy } from "./attempt.transcript-policy.js";
import { waitForCompactionRetryWithAggregateTimeout } from "./compaction-retry-aggregate-timeout.js";
import {
resolveRunTimeoutDuringCompaction,
@@ -844,18 +842,17 @@ export async function runEmbeddedAttempt(
modelApi: params.model.api,
model: params.model,
};
const tools =
params.runtimePlan?.tools.normalize(toolsEnabled ? toolsRaw : [], runtimePlanModelContext) ??
normalizeProviderToolSchemas({
tools: toolsEnabled ? toolsRaw : [],
provider: params.provider,
config: params.config,
workspaceDir: effectiveWorkspace,
env: process.env,
modelId: params.modelId,
modelApi: params.model.api,
model: params.model,
});
const tools = normalizeAgentRuntimeTools({
runtimePlan: params.runtimePlan,
tools: toolsEnabled ? toolsRaw : [],
provider: params.provider,
config: params.config,
workspaceDir: effectiveWorkspace,
env: process.env,
modelId: params.modelId,
modelApi: params.model.api,
model: params.model,
});
const clientTools = toolsEnabled ? params.clientTools : undefined;
const bundleMcpEnabled = shouldCreateBundleMcpRuntimeForAttempt({
toolsEnabled,
@@ -942,20 +939,17 @@ export async function runEmbeddedAttempt(
toolsEnabled,
disableTools: params.disableTools,
});
if (params.runtimePlan) {
params.runtimePlan.tools.logDiagnostics(effectiveTools, runtimePlanModelContext);
} else {
logProviderToolSchemaDiagnostics({
tools: effectiveTools,
provider: params.provider,
config: params.config,
workspaceDir: effectiveWorkspace,
env: process.env,
modelId: params.modelId,
modelApi: params.model.api,
model: params.model,
});
}
logAgentRuntimeToolDiagnostics({
runtimePlan: params.runtimePlan,
tools: effectiveTools,
provider: params.provider,
config: params.config,
workspaceDir: effectiveWorkspace,
env: process.env,
modelId: params.modelId,
modelApi: params.model.api,
model: params.model,
});
const machineName = await getMachineDisplayName();
const runtimeChannel = normalizeMessageChannel(params.messageChannel ?? params.messageProvider);
@@ -1201,17 +1195,14 @@ export async function runEmbeddedAttempt(
.then(() => true)
.catch(() => false);
const transcriptPolicy =
params.runtimePlan?.transcript.resolvePolicy(runtimePlanModelContext) ??
resolveTranscriptPolicy({
modelApi: params.model?.api,
provider: params.provider,
modelId: params.modelId,
config: params.config,
workspaceDir: effectiveWorkspace,
env: process.env,
model: params.model,
});
const transcriptPolicy = resolveAttemptTranscriptPolicy({
runtimePlan: params.runtimePlan,
runtimePlanModelContext,
provider: params.provider,
modelId: params.modelId,
config: params.config,
env: process.env,
});
await prewarmSessionFile(params.sessionFile);
sessionManager = guardSessionManager(SessionManager.open(params.sessionFile), {

View File

@@ -1,7 +1,10 @@
import type { AgentTool } from "@mariozechner/pi-agent-core";
import { resolveSendableOutboundReplyParts } from "openclaw/plugin-sdk/reply-payload";
import type { TSchema } from "typebox";
import type { ThinkLevel } from "../../auto-reply/thinking.js";
import { isSilentReplyPayloadText, SILENT_REPLY_TOKEN } from "../../auto-reply/tokens.js";
import type { OpenClawConfig } from "../../config/types.openclaw.js";
import type { ProviderRuntimeModel } from "../../plugins/provider-runtime-model.types.js";
import {
resolveProviderFollowupFallbackRoute,
resolveProviderSystemPromptContribution,
@@ -30,9 +33,26 @@ function hasMedia(payload: { mediaUrl?: string; mediaUrls?: string[] }): boolean
return resolveSendableOutboundReplyParts(payload).hasMedia;
}
function asOpenClawConfig(value: unknown): OpenClawConfig | undefined {
return value !== null && typeof value === "object" && !Array.isArray(value)
? (value as OpenClawConfig)
: undefined;
}
function asProviderRuntimeModel(
value: BuildAgentRuntimePlanParams["model"],
): ProviderRuntimeModel | undefined {
return value !== undefined ? (value as ProviderRuntimeModel) : undefined;
}
function asThinkLevel(value: BuildAgentRuntimePlanParams["thinkingLevel"]): ThinkLevel | undefined {
return value !== undefined ? (value as ThinkLevel) : undefined;
}
export function buildAgentRuntimeDeliveryPlan(
params: BuildAgentRuntimeDeliveryPlanParams,
): AgentRuntimeDeliveryPlan {
const config = asOpenClawConfig(params.config);
return {
isSilentPayload(payload): boolean {
return isSilentReplyPayloadText(payload.text, SILENT_REPLY_TOKEN) && !hasMedia(payload);
@@ -40,10 +60,10 @@ export function buildAgentRuntimeDeliveryPlan(
resolveFollowupRoute(routeParams) {
return resolveProviderFollowupFallbackRoute({
provider: params.provider,
config: params.config,
config,
workspaceDir: params.workspaceDir,
context: {
config: params.config,
config,
agentDir: params.agentDir,
workspaceDir: params.workspaceDir,
provider: params.provider,
@@ -66,13 +86,15 @@ export function buildAgentRuntimeOutcomePlan(): AgentRuntimeOutcomePlan {
}
export function buildAgentRuntimePlan(params: BuildAgentRuntimePlanParams): AgentRuntimePlan {
const config = asOpenClawConfig(params.config);
const model = asProviderRuntimeModel(params.model);
const modelApi = params.modelApi ?? params.model?.api ?? undefined;
const transport = params.resolvedTransport;
const auth = buildAgentRuntimeAuthPlan({
provider: params.provider,
authProfileProvider: params.authProfileProvider,
sessionAuthProfileId: params.sessionAuthProfileId,
config: params.config,
config,
workspaceDir: params.workspaceDir,
harnessId: params.harnessId,
harnessRuntime: params.harnessRuntime,
@@ -87,12 +109,12 @@ export function buildAgentRuntimePlan(params: BuildAgentRuntimePlanParams): Agen
};
const toolContext = {
provider: params.provider,
config: params.config,
config,
workspaceDir: params.workspaceDir,
env: process.env,
modelId: params.modelId,
modelApi,
model: params.model,
model,
};
const resolveToolContext = (overrides?: {
workspaceDir?: string;
@@ -102,7 +124,7 @@ export function buildAgentRuntimePlan(params: BuildAgentRuntimePlanParams): Agen
...toolContext,
...(overrides?.workspaceDir !== undefined ? { workspaceDir: overrides.workspaceDir } : {}),
...(overrides?.modelApi !== undefined ? { modelApi: overrides.modelApi } : {}),
...(overrides?.model !== undefined ? { model: overrides.model } : {}),
...(overrides?.model !== undefined ? { model: asProviderRuntimeModel(overrides.model) } : {}),
});
const resolveTranscriptRuntimePolicy = (overrides?: {
workspaceDir?: string;
@@ -112,25 +134,25 @@ export function buildAgentRuntimePlan(params: BuildAgentRuntimePlanParams): Agen
resolveTranscriptPolicy({
provider: params.provider,
modelId: params.modelId,
config: params.config,
config,
workspaceDir: overrides?.workspaceDir ?? params.workspaceDir,
env: process.env,
modelApi: overrides?.modelApi ?? modelApi,
model: overrides?.model ?? params.model,
model: asProviderRuntimeModel(overrides?.model) ?? model,
});
const resolveTransportExtraParams = (
overrides: Parameters<AgentRuntimePlan["transport"]["resolveExtraParams"]>[0] = {},
) =>
resolvePreparedExtraParams({
cfg: params.config,
cfg: config,
provider: params.provider,
modelId: params.modelId,
agentDir: params.agentDir,
workspaceDir: overrides.workspaceDir ?? params.workspaceDir,
extraParamsOverride: overrides.extraParamsOverride ?? params.extraParamsOverride,
thinkingLevel: overrides.thinkingLevel ?? params.thinkingLevel,
thinkingLevel: asThinkLevel(overrides.thinkingLevel ?? params.thinkingLevel),
agentId: overrides.agentId ?? params.agentId,
model: overrides.model ?? params.model,
model: asProviderRuntimeModel(overrides.model) ?? model,
resolvedTransport: overrides.resolvedTransport ?? transport,
});
@@ -143,9 +165,12 @@ export function buildAgentRuntimePlan(params: BuildAgentRuntimePlanParams): Agen
resolveSystemPromptContribution(context) {
return resolveProviderSystemPromptContribution({
provider: params.provider,
config: params.config,
config,
workspaceDir: context.workspaceDir ?? params.workspaceDir,
context,
context: {
...context,
config: asOpenClawConfig(context.config),
},
});
},
},

View File

@@ -0,0 +1,37 @@
import { describe, expect, it, vi } from "vitest";
const mocks = vi.hoisted(() => ({
logProviderToolSchemaDiagnostics: vi.fn(),
normalizeProviderToolSchemas: vi.fn((params: { tools: unknown[] }) => params.tools),
}));
vi.mock("../pi-embedded-runner/tool-schema-runtime.js", () => ({
logProviderToolSchemaDiagnostics: mocks.logProviderToolSchemaDiagnostics,
normalizeProviderToolSchemas: mocks.normalizeProviderToolSchemas,
}));
const { logAgentRuntimeToolDiagnostics } = await import("./tools.js");
describe("AgentRuntimePlan tool diagnostics legacy fallback", () => {
it("falls back to provider diagnostics when no RuntimePlan is available", () => {
const tools = [{ name: "alpha" }] as never;
logAgentRuntimeToolDiagnostics({
tools,
provider: "openai",
modelId: "gpt-5.4",
modelApi: "openai-responses",
workspaceDir: "/tmp/openclaw-runtime-plan-tools",
});
expect(mocks.logProviderToolSchemaDiagnostics).toHaveBeenCalledWith(
expect.objectContaining({
tools,
provider: "openai",
modelId: "gpt-5.4",
modelApi: "openai-responses",
workspaceDir: "/tmp/openclaw-runtime-plan-tools",
}),
);
});
});

View File

@@ -0,0 +1,107 @@
import type { AgentTool } from "@mariozechner/pi-agent-core";
import { describe, expect, it, vi } from "vitest";
import {
createNativeOpenAIResponsesModel,
createParameterFreeTool,
normalizedParameterFreeSchema,
} from "../../../test/helpers/agents/schema-normalization-runtime-contract.js";
import { logAgentRuntimeToolDiagnostics, normalizeAgentRuntimeTools } from "./tools.js";
import type { AgentRuntimePlan } from "./types.js";
describe("AgentRuntimePlan tool policy helpers", () => {
it("uses RuntimePlan-owned tool normalization when a plan is available", () => {
const tools = [createParameterFreeTool()] as AgentTool[];
const normalized = [{ ...tools[0], name: "normalized" }] as AgentTool[];
const model = createNativeOpenAIResponsesModel() as never;
const normalize = vi.fn(() => normalized);
const runtimePlan = {
tools: {
normalize,
logDiagnostics: vi.fn(),
},
} as unknown as AgentRuntimePlan;
expect(
normalizeAgentRuntimeTools({
runtimePlan,
tools,
provider: "openai",
modelId: "gpt-5.4",
modelApi: "openai-responses",
workspaceDir: "/tmp/openclaw-runtime-plan-tools",
model,
}),
).toBe(normalized);
expect(normalize).toHaveBeenCalledWith(tools, {
workspaceDir: "/tmp/openclaw-runtime-plan-tools",
modelApi: "openai-responses",
model,
});
});
it("accepts legacy optional model fields while normalizing RuntimePlan context", () => {
const tools = [createParameterFreeTool()] as AgentTool[];
const normalize = vi.fn(() => tools);
const runtimePlan = {
tools: {
normalize,
logDiagnostics: vi.fn(),
},
} as unknown as AgentRuntimePlan;
expect(
normalizeAgentRuntimeTools({
runtimePlan,
tools,
provider: "openai",
modelApi: null,
}),
).toBe(tools);
expect(normalize).toHaveBeenCalledWith(tools, {
workspaceDir: undefined,
modelApi: undefined,
model: undefined,
});
});
it("falls back to legacy provider schema normalization when no plan is available", () => {
const normalized = normalizeAgentRuntimeTools({
tools: [createParameterFreeTool()] as AgentTool[],
provider: "openai",
modelId: "gpt-5.4",
modelApi: "openai-responses",
workspaceDir: "/tmp/openclaw-runtime-plan-tools",
model: createNativeOpenAIResponsesModel() as never,
});
expect(normalized[0]?.parameters).toEqual(normalizedParameterFreeSchema());
});
it("routes diagnostics through RuntimePlan when a plan is available", () => {
const tools = [createParameterFreeTool()] as AgentTool[];
const model = createNativeOpenAIResponsesModel() as never;
const logDiagnostics = vi.fn();
const runtimePlan = {
tools: {
normalize: vi.fn(),
logDiagnostics,
},
} as unknown as AgentRuntimePlan;
logAgentRuntimeToolDiagnostics({
runtimePlan,
tools,
provider: "openai",
modelId: "gpt-5.4",
modelApi: "openai-responses",
workspaceDir: "/tmp/openclaw-runtime-plan-tools",
model,
});
expect(logDiagnostics).toHaveBeenCalledWith(tools, {
workspaceDir: "/tmp/openclaw-runtime-plan-tools",
modelApi: "openai-responses",
model,
});
});
});

View File

@@ -0,0 +1,71 @@
import type { AgentTool } from "@mariozechner/pi-agent-core";
import type { TSchema } from "typebox";
import type { OpenClawConfig } from "../../config/types.openclaw.js";
import type { ProviderRuntimeModel } from "../../plugins/provider-runtime-model.types.js";
import {
logProviderToolSchemaDiagnostics,
normalizeProviderToolSchemas,
} from "../pi-embedded-runner/tool-schema-runtime.js";
import type { AgentRuntimePlan } from "./types.js";
type AgentRuntimeToolPolicyParams<TSchemaType extends TSchema = TSchema, TResult = unknown> = {
runtimePlan?: AgentRuntimePlan;
tools: AgentTool<TSchemaType, TResult>[];
provider: string;
config?: OpenClawConfig;
workspaceDir?: string;
env?: NodeJS.ProcessEnv;
modelId?: string;
modelApi?: string | null;
model?: ProviderRuntimeModel;
};
function runtimePlanToolContext(params: {
workspaceDir?: string;
modelApi?: string | null;
model?: ProviderRuntimeModel;
}) {
return {
workspaceDir: params.workspaceDir,
modelApi: params.modelApi ?? undefined,
model: params.model,
};
}
export function normalizeAgentRuntimeTools<
TSchemaType extends TSchema = TSchema,
TResult = unknown,
>(params: AgentRuntimeToolPolicyParams<TSchemaType, TResult>): AgentTool<TSchemaType, TResult>[] {
const planContext = runtimePlanToolContext(params);
return (
params.runtimePlan?.tools.normalize(params.tools, planContext) ??
normalizeProviderToolSchemas({
tools: params.tools,
provider: params.provider,
config: params.config,
workspaceDir: params.workspaceDir,
env: params.env ?? process.env,
modelId: params.modelId,
modelApi: params.modelApi,
model: params.model,
})
);
}
export function logAgentRuntimeToolDiagnostics(params: AgentRuntimeToolPolicyParams): void {
const planContext = runtimePlanToolContext(params);
if (params.runtimePlan) {
params.runtimePlan.tools.logDiagnostics(params.tools, planContext);
return;
}
logProviderToolSchemaDiagnostics({
tools: params.tools,
provider: params.provider,
config: params.config,
workspaceDir: params.workspaceDir,
env: params.env ?? process.env,
modelId: params.modelId,
modelApi: params.modelApi,
model: params.model,
});
}

View File

@@ -0,0 +1,43 @@
import { describe, expectTypeOf, it } from "vitest";
import type { ReplyPayload } from "../../auto-reply/reply-payload.js";
import type { ThinkLevel } from "../../auto-reply/thinking.js";
import type { FailoverReason } from "../pi-embedded-helpers/types.js";
import type { PromptMode } from "../system-prompt.types.js";
import type { buildAgentRuntimeDeliveryPlan, buildAgentRuntimePlan } from "./build.js";
import type {
AgentRuntimeFailoverReason,
AgentRuntimePromptMode,
AgentRuntimeReplyPayload,
AgentRuntimeThinkLevel,
BuildAgentRuntimeDeliveryPlanParams,
BuildAgentRuntimePlanParams,
} from "./types.js";
type Equal<X, Y> = [X] extends [Y] ? ([Y] extends [X] ? true : false) : false;
type Assert<T extends true> = T;
describe("AgentRuntimePlan structural type compatibility", () => {
it("keeps copied scalar unions aligned with their source contracts", () => {
expectTypeOf<AgentRuntimeThinkLevel>().toEqualTypeOf<ThinkLevel>();
expectTypeOf<AgentRuntimeFailoverReason>().toEqualTypeOf<FailoverReason>();
expectTypeOf<AgentRuntimePromptMode>().toEqualTypeOf<PromptMode>();
});
it("keeps reply payload shapes structurally compatible with the runtime leaf payload shape", () => {
type _ReplyPayloadKeysStayInSync = Assert<
Equal<keyof ReplyPayload, keyof AgentRuntimeReplyPayload>
>;
expectTypeOf<ReplyPayload>().toMatchTypeOf<AgentRuntimeReplyPayload>();
expectTypeOf<AgentRuntimeReplyPayload>().toMatchTypeOf<ReplyPayload>();
});
it("keeps builder call signatures aligned with exported structural params", () => {
expectTypeOf<
Parameters<typeof buildAgentRuntimeDeliveryPlan>[0]
>().toEqualTypeOf<BuildAgentRuntimeDeliveryPlanParams>();
expectTypeOf<
Parameters<typeof buildAgentRuntimePlan>[0]
>().toEqualTypeOf<BuildAgentRuntimePlanParams>();
});
});

View File

@@ -0,0 +1,37 @@
import fs from "node:fs/promises";
import { fileURLToPath } from "node:url";
import { describe, expect, it } from "vitest";
const TYPES_PATH = fileURLToPath(new URL("./types.ts", import.meta.url));
const concreteRuntimePolicyImportPatterns = [
/from\s+["'][^"']*auto-reply(?:\/|\.js|["'])/,
/from\s+["'](?:[^"']*\/)?config(?:\/|\.js|["'])/,
/from\s+["'](?:[^"']*\/)?plugins(?:\/|\.js|["'])/,
/from\s+["'][^"']*pi-embedded-/,
/from\s+["'][^"']*transcript-policy(?:\.[^/"']+)?(?:\/|\.js|["'])/,
/from\s+["'][^"']*system-prompt(?:\.[^/"']+)?(?:\/|\.js|["'])/,
];
describe("AgentRuntimePlan leaf contracts", () => {
it("keeps runtime plan type contracts independent from concrete runtime policy modules", async () => {
const source = await fs.readFile(TYPES_PATH, "utf8");
for (const pattern of concreteRuntimePolicyImportPatterns) {
expect(source).not.toMatch(pattern);
}
});
it("guards against policy type imports re-entering the leaf contract", () => {
const forbiddenImports = [
'import type { PromptContribution } from "../system-prompt.types.js";',
'import type { TranscriptPolicy } from "../transcript-policy.types.js";',
];
for (const importStatement of forbiddenImports) {
expect(
concreteRuntimePolicyImportPatterns.some((pattern) => pattern.test(importStatement)),
).toBe(true);
}
});
});

View File

@@ -1,14 +1,155 @@
import type { AgentTool } from "@mariozechner/pi-agent-core";
import type { TSchema } from "typebox";
import type { ThinkLevel } from "../../auto-reply/thinking.js";
import type { ReplyPayload } from "../../auto-reply/types.js";
import type { OpenClawConfig } from "../../config/types.openclaw.js";
import type { ProviderRuntimeModel } from "../../plugins/provider-runtime-model.types.js";
import type { FailoverReason } from "../pi-embedded-helpers/types.js";
import type { PromptMode } from "../system-prompt.types.js";
export type AgentRuntimeTransport = "sse" | "websocket" | "auto";
export type AgentRuntimeThinkLevel =
| "off"
| "minimal"
| "low"
| "medium"
| "high"
| "xhigh"
| "adaptive"
| "max";
export type AgentRuntimePromptMode = "full" | "minimal" | "none";
export type AgentRuntimeFailoverReason =
| "auth"
| "auth_permanent"
| "format"
| "rate_limit"
| "overloaded"
| "billing"
| "timeout"
| "model_not_found"
| "session_expired"
| "unknown";
export type AgentRuntimeConfig = unknown;
export type AgentRuntimeModel = {
id?: string;
name?: string;
api?: string;
provider?: string;
baseUrl?: string;
reasoning?: boolean;
input?: string[];
cost?: {
input: number;
output: number;
cacheRead: number;
cacheWrite: number;
};
contextWindow?: number;
maxTokens?: number;
contextTokens?: number;
compat?: unknown;
};
export type AgentRuntimeInteractiveButtonStyle = "primary" | "secondary" | "success" | "danger";
export type AgentRuntimeInteractiveReplyButton = {
label: string;
value?: string;
url?: string;
style?: AgentRuntimeInteractiveButtonStyle;
};
export type AgentRuntimeInteractiveReplyOption = {
label: string;
value: string;
};
export type AgentRuntimeInteractiveReplyBlock =
| {
type: "text";
text: string;
}
| {
type: "buttons";
buttons: AgentRuntimeInteractiveReplyButton[];
}
| {
type: "select";
placeholder?: string;
options: AgentRuntimeInteractiveReplyOption[];
};
export type AgentRuntimeInteractiveReply = {
blocks: AgentRuntimeInteractiveReplyBlock[];
};
export type AgentRuntimeMessagePresentationTone =
| "info"
| "success"
| "warning"
| "danger"
| "neutral";
export type AgentRuntimeMessagePresentationBlock =
| {
type: "text";
text: string;
}
| {
type: "context";
text: string;
}
| {
type: "divider";
}
| {
type: "buttons";
buttons: AgentRuntimeInteractiveReplyButton[];
}
| {
type: "select";
placeholder?: string;
options: AgentRuntimeInteractiveReplyOption[];
};
export type AgentRuntimeMessagePresentation = {
title?: string;
tone?: AgentRuntimeMessagePresentationTone;
blocks: AgentRuntimeMessagePresentationBlock[];
};
export type AgentRuntimeReplyPayloadDeliveryPin = {
enabled: boolean;
notify?: boolean;
required?: boolean;
};
export type AgentRuntimeReplyPayloadDelivery = {
pin?: boolean | AgentRuntimeReplyPayloadDeliveryPin;
};
export type AgentRuntimeReplyPayload = {
text?: string;
mediaUrl?: string;
mediaUrls?: string[];
trustedLocalMedia?: boolean;
sensitiveMedia?: boolean;
presentation?: AgentRuntimeMessagePresentation;
delivery?: AgentRuntimeReplyPayloadDelivery;
interactive?: AgentRuntimeInteractiveReply;
btw?: {
question: string;
};
replyToId?: string;
replyToTag?: boolean;
replyToCurrent?: boolean;
audioAsVoice?: boolean;
spokenText?: string;
isError?: boolean;
isReasoning?: boolean;
isCompactionNotice?: boolean;
channelData?: Record<string, unknown>;
};
export type AgentRuntimeSystemPromptSectionId =
| "interaction_style"
| "tool_call_style"
@@ -21,12 +162,12 @@ export type AgentRuntimeSystemPromptContribution = {
};
export type AgentRuntimeSystemPromptContributionContext = {
config?: OpenClawConfig;
config?: AgentRuntimeConfig;
agentDir?: string;
workspaceDir?: string;
provider: string;
modelId: string;
promptMode: PromptMode;
promptMode: AgentRuntimePromptMode;
runtimeChannel?: string;
runtimeCapabilities?: string[];
agentId?: string;
@@ -61,7 +202,7 @@ export type AgentRuntimeTranscriptPolicy = {
export type AgentRuntimeOutcomeClassification =
| {
message: string;
reason?: FailoverReason;
reason?: AgentRuntimeFailoverReason;
status?: number;
code?: string;
rawError?: string;
@@ -109,7 +250,7 @@ export type AgentRuntimeToolPlan = {
params?: {
workspaceDir?: string;
modelApi?: string;
model?: ProviderRuntimeModel;
model?: AgentRuntimeModel;
},
): AgentTool<TSchemaType, TResult>[];
logDiagnostics(
@@ -117,15 +258,17 @@ export type AgentRuntimeToolPlan = {
params?: {
workspaceDir?: string;
modelApi?: string;
model?: ProviderRuntimeModel;
model?: AgentRuntimeModel;
},
): void;
};
export type AgentRuntimeDeliveryPlan = {
isSilentPayload(payload: Pick<ReplyPayload, "text" | "mediaUrl" | "mediaUrls">): boolean;
isSilentPayload(
payload: Pick<AgentRuntimeReplyPayload, "text" | "mediaUrl" | "mediaUrls">,
): boolean;
resolveFollowupRoute(params: {
payload: ReplyPayload;
payload: AgentRuntimeReplyPayload;
originatingChannel?: string;
originatingTo?: string;
originRoutable: boolean;
@@ -141,10 +284,10 @@ export type AgentRuntimeTransportPlan = {
extraParams: Record<string, unknown>;
resolveExtraParams(params?: {
extraParamsOverride?: Record<string, unknown>;
thinkingLevel?: ThinkLevel;
thinkingLevel?: AgentRuntimeThinkLevel;
agentId?: string;
workspaceDir?: string;
model?: ProviderRuntimeModel;
model?: AgentRuntimeModel;
resolvedTransport?: AgentRuntimeTransport;
}): Record<string, unknown>;
};
@@ -159,7 +302,7 @@ export type AgentRuntimePlan = {
resolvePolicy(params?: {
workspaceDir?: string;
modelApi?: string;
model?: ProviderRuntimeModel;
model?: AgentRuntimeModel;
}): AgentRuntimeTranscriptPolicy;
};
delivery: AgentRuntimeDeliveryPlan;
@@ -177,7 +320,7 @@ export type AgentRuntimePlan = {
};
export type BuildAgentRuntimeDeliveryPlanParams = {
config?: OpenClawConfig;
config?: AgentRuntimeConfig;
workspaceDir?: string;
agentDir?: string;
provider: string;
@@ -185,12 +328,12 @@ export type BuildAgentRuntimeDeliveryPlanParams = {
};
export type BuildAgentRuntimePlanParams = {
config?: OpenClawConfig;
config?: AgentRuntimeConfig;
workspaceDir?: string;
agentDir?: string;
provider: string;
modelId: string;
model?: ProviderRuntimeModel;
model?: AgentRuntimeModel;
modelApi?: string | null;
harnessId?: string;
harnessRuntime?: string;
@@ -198,7 +341,7 @@ export type BuildAgentRuntimePlanParams = {
authProfileProvider?: string;
sessionAuthProfileId?: string;
agentId?: string;
thinkingLevel?: ThinkLevel;
thinkingLevel?: AgentRuntimeThinkLevel;
extraParamsOverride?: Record<string, unknown>;
resolvedTransport?: AgentRuntimeTransport;
};

View File

@@ -0,0 +1,128 @@
import { describe, expect, it } from "vitest";
import {
classifyAgentHarnessTerminalOutcome,
type AgentHarnessTerminalOutcomeClassification,
} from "./agent-harness-runtime.js";
describe("classifyAgentHarnessTerminalOutcome", () => {
it("does not classify an in-flight turn", () => {
expect(
classifyAgentHarnessTerminalOutcome({
assistantTexts: [],
reasoningText: "",
planText: "",
promptError: null,
turnCompleted: false,
}),
).toBeUndefined();
});
it("does not classify prompt errors as terminal empty-output outcomes", () => {
expect(
classifyAgentHarnessTerminalOutcome({
assistantTexts: [],
reasoningText: "",
planText: "",
promptError: new Error("turn failed"),
turnCompleted: true,
}),
).toBeUndefined();
});
it("does not classify deliberate silent replies such as NO_REPLY", () => {
expect(
classifyAgentHarnessTerminalOutcome({
assistantTexts: ["NO_REPLY"],
reasoningText: "",
planText: "",
promptError: null,
turnCompleted: true,
}),
).toBeUndefined();
});
it("treats empty-string prompt errors as terminal errors", () => {
expect(
classifyAgentHarnessTerminalOutcome({
assistantTexts: [],
reasoningText: "",
planText: "",
promptError: "",
turnCompleted: true,
}),
).toBeUndefined();
});
it("treats whitespace-only assistant text as not visible", () => {
expect(
classifyAgentHarnessTerminalOutcome({
assistantTexts: [" ", "\n\t"],
reasoningText: "",
planText: "",
promptError: null,
turnCompleted: true,
}),
).toBe("empty");
});
it("classifies a completed turn with plan text only as planning-only", () => {
expect(
classifyAgentHarnessTerminalOutcome({
assistantTexts: [],
reasoningText: "",
planText: "1. inspect\n2. patch\n3. test",
promptError: null,
turnCompleted: true,
}),
).toBe("planning-only");
});
it("prefers planning-only when both plan and reasoning text are present", () => {
expect(
classifyAgentHarnessTerminalOutcome({
assistantTexts: [],
reasoningText: "I need to inspect the files.",
planText: "I will inspect, patch, and test.",
promptError: null,
turnCompleted: true,
}),
).toBe("planning-only");
});
it("classifies a completed turn with reasoning text only as reasoning-only", () => {
expect(
classifyAgentHarnessTerminalOutcome({
assistantTexts: [],
reasoningText: "The answer depends on the current repository state.",
planText: "",
promptError: null,
turnCompleted: true,
}),
).toBe("reasoning-only");
});
it("classifies a completed turn with no visible output as empty", () => {
expect(
classifyAgentHarnessTerminalOutcome({
assistantTexts: [],
reasoningText: " ",
planText: "\n",
promptError: null,
turnCompleted: true,
}),
).toBe("empty");
});
it("returns only terminal fallback classifications, not ok", () => {
const classification: AgentHarnessTerminalOutcomeClassification =
classifyAgentHarnessTerminalOutcome({
assistantTexts: [],
reasoningText: "",
planText: "",
promptError: null,
turnCompleted: true,
}) ?? "empty";
expect(classification).toBe("empty");
});
});

View File

@@ -2,6 +2,7 @@
// Keep heavyweight tool construction out of this module so harness imports can
// register quickly inside gateway startup and Docker e2e runs.
import type { EmbeddedRunAttemptResult } from "../agents/pi-embedded-runner/run/types.js";
import { formatToolDetail, resolveToolDisplay } from "../agents/tool-display.js";
import { redactToolDetail } from "../logging/redact.js";
import { truncateUtf16Safe } from "../utils.js";
@@ -81,6 +82,10 @@ export {
setActiveEmbeddedRun,
} from "../agents/pi-embedded-runner/runs.js";
export { disposeRegisteredAgentHarnesses } from "../agents/harness/registry.js";
export {
logAgentRuntimeToolDiagnostics,
normalizeAgentRuntimeTools,
} from "../agents/runtime-plan/tools.js";
export { normalizeProviderToolSchemas } from "../agents/pi-embedded-runner/tool-schema-runtime.js";
export { resolveSandboxContext } from "../agents/sandbox.js";
export { isSubagentSessionKey } from "../routing/session-key.js";
@@ -146,3 +151,46 @@ export function formatToolProgressOutput(
}
return `${truncateUtf16Safe(redacted, maxChars)}\n...(truncated)...`;
}
export type AgentHarnessTerminalOutcomeInput = {
assistantTexts: readonly string[];
reasoningText?: string | null;
planText?: string | null;
promptError?: unknown;
turnCompleted: boolean;
};
export type AgentHarnessTerminalOutcomeClassification = NonNullable<
EmbeddedRunAttemptResult["agentHarnessResultClassification"]
>;
/**
* Classify terminal harness turns that completed without assistant output that
* should advance fallback. Deliberate silent replies such as NO_REPLY count as
* intentional output, while whitespace-only text remains fallback-eligible.
* This is intentionally SDK-level so plugin harness adapters such as Codex
* preserve the same OpenClaw-owned fallback signals as the built-in PI path
* without re-implementing terminal-result policy.
*/
export function classifyAgentHarnessTerminalOutcome(
params: AgentHarnessTerminalOutcomeInput,
): AgentHarnessTerminalOutcomeClassification | undefined {
if (
!params.turnCompleted ||
(params.promptError !== undefined && params.promptError !== null) ||
hasVisibleAssistantText(params.assistantTexts)
) {
return undefined;
}
if (params.planText?.trim()) {
return "planning-only";
}
if (params.reasoningText?.trim()) {
return "reasoning-only";
}
return "empty";
}
function hasVisibleAssistantText(assistantTexts: readonly string[]): boolean {
return assistantTexts.some((text) => text.trim().length > 0);
}