[codex] Consolidate RuntimePlan and Harness V2 package (#71722)

* refactor: centralize runtime plan policy surface

* refactor: route embedded attempts through runtime plan

* feat: add agent harness v2 lifecycle adapter

* docs: document agent harness runtime plan

---------

Co-authored-by: Eva <eva@100yen.org>
Co-authored-by: Peter Steinberger <steipete@gmail.com>
This commit is contained in:
EVA
2026-04-26 06:07:04 +07:00
committed by GitHub
parent 114c9a2f3e
commit 2c35a6e599
22 changed files with 1685 additions and 149 deletions

View File

@@ -2,18 +2,19 @@ import type { AgentMessage } from "@mariozechner/pi-agent-core";
import type { AssistantMessage, Usage } from "@mariozechner/pi-ai";
import { SessionManager } from "@mariozechner/pi-coding-agent";
import {
classifyAgentHarnessTerminalOutcome,
embeddedAgentLog,
emitAgentEvent as emitGlobalAgentEvent,
formatErrorMessage,
formatToolAggregate,
formatToolProgressOutput,
inferToolMetaFromArgs,
normalizeUsage,
runAgentHarnessAfterCompactionHook,
runAgentHarnessBeforeCompactionHook,
TOOL_PROGRESS_OUTPUT_MAX_CHARS,
type EmbeddedRunAttemptParams,
type EmbeddedRunAttemptResult,
TOOL_PROGRESS_OUTPUT_MAX_CHARS,
formatToolAggregate,
type MessagingToolSend,
} from "openclaw/plugin-sdk/agent-harness-runtime";
import { readCodexTurn } from "./protocol-validators.js";
@@ -36,10 +37,6 @@ export type CodexAppServerToolTelemetry = {
successfulCronAdds?: number;
};
type AgentHarnessResultClassification = NonNullable<
EmbeddedRunAttemptResult["agentHarnessResultClassification"]
>;
const ZERO_USAGE: Usage = {
input: 0,
output: 0,
@@ -66,25 +63,6 @@ const CURRENT_TOKEN_USAGE_KEYS = [
const MAX_TOOL_OUTPUT_DELTA_MESSAGES_PER_ITEM = 20;
function classifyTerminalResult(params: {
assistantTexts: string[];
reasoningText: string;
planText: string;
promptError: unknown;
turnCompleted: boolean;
}): AgentHarnessResultClassification | undefined {
if (!params.turnCompleted || params.promptError || params.assistantTexts.length > 0) {
return undefined;
}
if (params.planText.trim()) {
return "planning-only";
}
if (params.reasoningText.trim()) {
return "reasoning-only";
}
return "empty";
}
export class CodexAppServerEventProjector {
private readonly assistantTextByItem = new Map<string, string>();
private readonly assistantItemOrder: string[] = [];
@@ -217,7 +195,7 @@ export class CodexAppServerEventProjector {
const promptError =
this.promptError ??
(turnFailed ? (this.completedTurn?.error?.message ?? "codex app-server turn failed") : null);
const agentHarnessResultClassification = classifyTerminalResult({
const agentHarnessResultClassification = classifyAgentHarnessTerminalOutcome({
assistantTexts,
reasoningText,
planText,

View File

@@ -9,6 +9,7 @@ import {
} from "openclaw/plugin-sdk/agent-harness";
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
import { __testing as nativeHookRelayTesting } from "../../../../src/agents/harness/native-hook-relay.js";
import { buildAgentRuntimePlan } from "../../../../src/agents/runtime-plan/build.js";
import {
onAgentEvent,
resetAgentEventsForTest,
@@ -52,6 +53,28 @@ function createParams(sessionFile: string, workspaceDir: string): EmbeddedRunAtt
} as EmbeddedRunAttemptParams;
}
function createParamsWithRuntimePlan(
sessionFile: string,
workspaceDir: string,
): EmbeddedRunAttemptParams {
const params = createParams(sessionFile, workspaceDir);
return {
...params,
runtimePlan: buildAgentRuntimePlan({
provider: params.provider,
modelId: params.modelId,
model: params.model,
modelApi: params.model.api,
harnessId: "codex",
harnessRuntime: "codex",
config: params.config,
workspaceDir,
agentDir: tempDir,
thinkingLevel: params.thinkLevel,
}),
} as EmbeddedRunAttemptParams;
}
function threadStartResult(threadId = "thread-1") {
return {
thread: {
@@ -364,7 +387,7 @@ describe("runCodexAppServerAttempt", () => {
sessionManager.appendMessage(assistantMessage("existing context", Date.now()));
const harness = createStartedThreadHarness();
const params = createParams(sessionFile, workspaceDir);
const params = createParamsWithRuntimePlan(sessionFile, workspaceDir);
params.onAgentEvent = onRunAgentEvent;
const run = runCodexAppServerAttempt(params);
await harness.waitForMethod("turn/start");
@@ -460,6 +483,8 @@ describe("runCodexAppServerAttempt", () => {
sessionId: "session-1",
provider: "codex",
model: "gpt-5.4-codex",
resolvedRef: "codex/gpt-5.4-codex",
harnessId: "codex",
assistantTexts: ["hello back"],
lastAssistant: expect.objectContaining({
role: "assistant",
@@ -675,9 +700,9 @@ describe("runCodexAppServerAttempt", () => {
return undefined;
});
await expect(runCodexAppServerAttempt(createParams(sessionFile, workspaceDir))).rejects.toThrow(
"turn start exploded",
);
await expect(
runCodexAppServerAttempt(createParamsWithRuntimePlan(sessionFile, workspaceDir)),
).rejects.toThrow("turn start exploded");
await vi.waitFor(() => expect(llmInput).toHaveBeenCalledTimes(1), { interval: 1 });
await vi.waitFor(() => expect(llmOutput).toHaveBeenCalledTimes(1), { interval: 1 });
@@ -687,6 +712,8 @@ describe("runCodexAppServerAttempt", () => {
assistantTexts: [],
model: "gpt-5.4-codex",
provider: "codex",
resolvedRef: "codex/gpt-5.4-codex",
harnessId: "codex",
runId: "run-1",
sessionId: "session-1",
}),

View File

@@ -14,7 +14,7 @@ import {
formatErrorMessage,
isActiveHarnessContextEngine,
isSubagentSessionKey,
normalizeProviderToolSchemas,
normalizeAgentRuntimeTools,
resolveAttemptSpawnWorkspaceDir,
resolveAgentHarnessBeforePromptBuildResult,
resolveModelAuthMode,
@@ -906,23 +906,17 @@ async function buildDynamicTools(input: DynamicToolBuildParams) {
params.toolsAllow && params.toolsAllow.length > 0
? visionFilteredTools.filter((tool) => params.toolsAllow?.includes(tool.name))
: visionFilteredTools;
return (
params.runtimePlan?.tools.normalize(filteredTools, {
workspaceDir: input.effectiveWorkspace,
modelApi: params.model.api,
model: params.model,
}) ??
normalizeProviderToolSchemas({
tools: filteredTools,
provider: params.provider,
config: params.config,
workspaceDir: input.effectiveWorkspace,
env: process.env,
modelId: params.modelId,
modelApi: params.model.api,
model: params.model,
})
);
return normalizeAgentRuntimeTools({
runtimePlan: params.runtimePlan,
tools: filteredTools,
provider: params.provider,
config: params.config,
workspaceDir: input.effectiveWorkspace,
env: process.env,
modelId: params.modelId,
modelApi: params.model.api,
model: params.model,
});
}
async function withCodexStartupTimeout<T>(params: {