mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 07:30:43 +00:00
[codex] Consolidate RuntimePlan and Harness V2 package (#71722)
* refactor: centralize runtime plan policy surface * refactor: route embedded attempts through runtime plan * feat: add agent harness v2 lifecycle adapter * docs: document agent harness runtime plan --------- Co-authored-by: Eva <eva@100yen.org> Co-authored-by: Peter Steinberger <steipete@gmail.com>
This commit is contained in:
@@ -46,6 +46,23 @@ Before a harness is selected, OpenClaw has already resolved:
|
||||
That split is intentional. A harness runs a prepared attempt; it does not pick
|
||||
providers, replace channel delivery, or silently switch models.
|
||||
|
||||
The prepared attempt also includes `params.runtimePlan`, an OpenClaw-owned
|
||||
policy bundle for runtime decisions that must stay shared across PI and native
|
||||
harnesses:
|
||||
|
||||
- `runtimePlan.tools.normalize(...)` and
|
||||
`runtimePlan.tools.logDiagnostics(...)` for provider-aware tool schema policy
|
||||
- `runtimePlan.transcript.resolvePolicy(...)` for transcript sanitization and
|
||||
tool-call repair policy
|
||||
- `runtimePlan.delivery.isSilentPayload(...)` for shared `NO_REPLY` and media
|
||||
delivery suppression
|
||||
- `runtimePlan.outcome.classifyRunResult(...)` for model fallback classification
|
||||
- `runtimePlan.observability` for resolved provider/model/harness metadata
|
||||
|
||||
Harnesses may use the plan for decisions that need to match PI behavior, but
|
||||
should still treat it as host-owned attempt state. Do not mutate it or use it to
|
||||
switch providers/models inside a turn.
|
||||
|
||||
## Register a harness
|
||||
|
||||
**Import:** `openclaw/plugin-sdk/agent-harness`
|
||||
@@ -162,6 +179,16 @@ middleware, but new result transforms should use the runtime-neutral API.
|
||||
The Pi-only `api.registerEmbeddedExtensionFactory(...)` hook has been removed;
|
||||
Pi tool-result transforms must use runtime-neutral middleware.
|
||||
|
||||
### Terminal outcome classification
|
||||
|
||||
Native harnesses that own their own protocol projection can use
|
||||
`classifyAgentHarnessTerminalOutcome(...)` from
|
||||
`openclaw/plugin-sdk/agent-harness-runtime` when a completed turn produced no
|
||||
visible assistant text. The helper returns `empty`, `reasoning-only`, or
|
||||
`planning-only` so OpenClaw's fallback policy can decide whether to retry on a
|
||||
different model. It intentionally leaves prompt errors, in-flight turns, and
|
||||
intentional silent replies such as `NO_REPLY` unclassified.
|
||||
|
||||
### Native Codex harness mode
|
||||
|
||||
The bundled `codex` harness is the native Codex mode for embedded OpenClaw
|
||||
|
||||
@@ -191,7 +191,7 @@ For the plugin authoring guide, see [Plugin SDK overview](/plugins/sdk-overview)
|
||||
| `plugin-sdk/models-provider-runtime` | `/models` command/provider reply helpers |
|
||||
| `plugin-sdk/skill-commands-runtime` | Skill command listing helpers |
|
||||
| `plugin-sdk/native-command-registry` | Native command registry/build/serialize helpers |
|
||||
| `plugin-sdk/agent-harness` | Experimental trusted-plugin surface for low-level agent harnesses: harness types, active-run steer/abort helpers, OpenClaw tool bridge helpers, tool progress formatting/detail helpers, and attempt result utilities |
|
||||
| `plugin-sdk/agent-harness` | Experimental trusted-plugin surface for low-level agent harnesses: harness types, active-run steer/abort helpers, OpenClaw tool bridge helpers, runtime-plan tool policy helpers, terminal outcome classification, tool progress formatting/detail helpers, and attempt result utilities |
|
||||
| `plugin-sdk/provider-zai-endpoint` | Z.AI endpoint detection helpers |
|
||||
| `plugin-sdk/infra-runtime` | System event/heartbeat helpers |
|
||||
| `plugin-sdk/collection-runtime` | Small bounded cache helpers |
|
||||
|
||||
@@ -2,18 +2,19 @@ import type { AgentMessage } from "@mariozechner/pi-agent-core";
|
||||
import type { AssistantMessage, Usage } from "@mariozechner/pi-ai";
|
||||
import { SessionManager } from "@mariozechner/pi-coding-agent";
|
||||
import {
|
||||
classifyAgentHarnessTerminalOutcome,
|
||||
embeddedAgentLog,
|
||||
emitAgentEvent as emitGlobalAgentEvent,
|
||||
formatErrorMessage,
|
||||
formatToolAggregate,
|
||||
formatToolProgressOutput,
|
||||
inferToolMetaFromArgs,
|
||||
normalizeUsage,
|
||||
runAgentHarnessAfterCompactionHook,
|
||||
runAgentHarnessBeforeCompactionHook,
|
||||
TOOL_PROGRESS_OUTPUT_MAX_CHARS,
|
||||
type EmbeddedRunAttemptParams,
|
||||
type EmbeddedRunAttemptResult,
|
||||
TOOL_PROGRESS_OUTPUT_MAX_CHARS,
|
||||
formatToolAggregate,
|
||||
type MessagingToolSend,
|
||||
} from "openclaw/plugin-sdk/agent-harness-runtime";
|
||||
import { readCodexTurn } from "./protocol-validators.js";
|
||||
@@ -36,10 +37,6 @@ export type CodexAppServerToolTelemetry = {
|
||||
successfulCronAdds?: number;
|
||||
};
|
||||
|
||||
type AgentHarnessResultClassification = NonNullable<
|
||||
EmbeddedRunAttemptResult["agentHarnessResultClassification"]
|
||||
>;
|
||||
|
||||
const ZERO_USAGE: Usage = {
|
||||
input: 0,
|
||||
output: 0,
|
||||
@@ -66,25 +63,6 @@ const CURRENT_TOKEN_USAGE_KEYS = [
|
||||
|
||||
const MAX_TOOL_OUTPUT_DELTA_MESSAGES_PER_ITEM = 20;
|
||||
|
||||
function classifyTerminalResult(params: {
|
||||
assistantTexts: string[];
|
||||
reasoningText: string;
|
||||
planText: string;
|
||||
promptError: unknown;
|
||||
turnCompleted: boolean;
|
||||
}): AgentHarnessResultClassification | undefined {
|
||||
if (!params.turnCompleted || params.promptError || params.assistantTexts.length > 0) {
|
||||
return undefined;
|
||||
}
|
||||
if (params.planText.trim()) {
|
||||
return "planning-only";
|
||||
}
|
||||
if (params.reasoningText.trim()) {
|
||||
return "reasoning-only";
|
||||
}
|
||||
return "empty";
|
||||
}
|
||||
|
||||
export class CodexAppServerEventProjector {
|
||||
private readonly assistantTextByItem = new Map<string, string>();
|
||||
private readonly assistantItemOrder: string[] = [];
|
||||
@@ -217,7 +195,7 @@ export class CodexAppServerEventProjector {
|
||||
const promptError =
|
||||
this.promptError ??
|
||||
(turnFailed ? (this.completedTurn?.error?.message ?? "codex app-server turn failed") : null);
|
||||
const agentHarnessResultClassification = classifyTerminalResult({
|
||||
const agentHarnessResultClassification = classifyAgentHarnessTerminalOutcome({
|
||||
assistantTexts,
|
||||
reasoningText,
|
||||
planText,
|
||||
|
||||
@@ -9,6 +9,7 @@ import {
|
||||
} from "openclaw/plugin-sdk/agent-harness";
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
|
||||
import { __testing as nativeHookRelayTesting } from "../../../../src/agents/harness/native-hook-relay.js";
|
||||
import { buildAgentRuntimePlan } from "../../../../src/agents/runtime-plan/build.js";
|
||||
import {
|
||||
onAgentEvent,
|
||||
resetAgentEventsForTest,
|
||||
@@ -52,6 +53,28 @@ function createParams(sessionFile: string, workspaceDir: string): EmbeddedRunAtt
|
||||
} as EmbeddedRunAttemptParams;
|
||||
}
|
||||
|
||||
function createParamsWithRuntimePlan(
|
||||
sessionFile: string,
|
||||
workspaceDir: string,
|
||||
): EmbeddedRunAttemptParams {
|
||||
const params = createParams(sessionFile, workspaceDir);
|
||||
return {
|
||||
...params,
|
||||
runtimePlan: buildAgentRuntimePlan({
|
||||
provider: params.provider,
|
||||
modelId: params.modelId,
|
||||
model: params.model,
|
||||
modelApi: params.model.api,
|
||||
harnessId: "codex",
|
||||
harnessRuntime: "codex",
|
||||
config: params.config,
|
||||
workspaceDir,
|
||||
agentDir: tempDir,
|
||||
thinkingLevel: params.thinkLevel,
|
||||
}),
|
||||
} as EmbeddedRunAttemptParams;
|
||||
}
|
||||
|
||||
function threadStartResult(threadId = "thread-1") {
|
||||
return {
|
||||
thread: {
|
||||
@@ -364,7 +387,7 @@ describe("runCodexAppServerAttempt", () => {
|
||||
sessionManager.appendMessage(assistantMessage("existing context", Date.now()));
|
||||
const harness = createStartedThreadHarness();
|
||||
|
||||
const params = createParams(sessionFile, workspaceDir);
|
||||
const params = createParamsWithRuntimePlan(sessionFile, workspaceDir);
|
||||
params.onAgentEvent = onRunAgentEvent;
|
||||
const run = runCodexAppServerAttempt(params);
|
||||
await harness.waitForMethod("turn/start");
|
||||
@@ -460,6 +483,8 @@ describe("runCodexAppServerAttempt", () => {
|
||||
sessionId: "session-1",
|
||||
provider: "codex",
|
||||
model: "gpt-5.4-codex",
|
||||
resolvedRef: "codex/gpt-5.4-codex",
|
||||
harnessId: "codex",
|
||||
assistantTexts: ["hello back"],
|
||||
lastAssistant: expect.objectContaining({
|
||||
role: "assistant",
|
||||
@@ -675,9 +700,9 @@ describe("runCodexAppServerAttempt", () => {
|
||||
return undefined;
|
||||
});
|
||||
|
||||
await expect(runCodexAppServerAttempt(createParams(sessionFile, workspaceDir))).rejects.toThrow(
|
||||
"turn start exploded",
|
||||
);
|
||||
await expect(
|
||||
runCodexAppServerAttempt(createParamsWithRuntimePlan(sessionFile, workspaceDir)),
|
||||
).rejects.toThrow("turn start exploded");
|
||||
|
||||
await vi.waitFor(() => expect(llmInput).toHaveBeenCalledTimes(1), { interval: 1 });
|
||||
await vi.waitFor(() => expect(llmOutput).toHaveBeenCalledTimes(1), { interval: 1 });
|
||||
@@ -687,6 +712,8 @@ describe("runCodexAppServerAttempt", () => {
|
||||
assistantTexts: [],
|
||||
model: "gpt-5.4-codex",
|
||||
provider: "codex",
|
||||
resolvedRef: "codex/gpt-5.4-codex",
|
||||
harnessId: "codex",
|
||||
runId: "run-1",
|
||||
sessionId: "session-1",
|
||||
}),
|
||||
|
||||
@@ -14,7 +14,7 @@ import {
|
||||
formatErrorMessage,
|
||||
isActiveHarnessContextEngine,
|
||||
isSubagentSessionKey,
|
||||
normalizeProviderToolSchemas,
|
||||
normalizeAgentRuntimeTools,
|
||||
resolveAttemptSpawnWorkspaceDir,
|
||||
resolveAgentHarnessBeforePromptBuildResult,
|
||||
resolveModelAuthMode,
|
||||
@@ -906,23 +906,17 @@ async function buildDynamicTools(input: DynamicToolBuildParams) {
|
||||
params.toolsAllow && params.toolsAllow.length > 0
|
||||
? visionFilteredTools.filter((tool) => params.toolsAllow?.includes(tool.name))
|
||||
: visionFilteredTools;
|
||||
return (
|
||||
params.runtimePlan?.tools.normalize(filteredTools, {
|
||||
workspaceDir: input.effectiveWorkspace,
|
||||
modelApi: params.model.api,
|
||||
model: params.model,
|
||||
}) ??
|
||||
normalizeProviderToolSchemas({
|
||||
tools: filteredTools,
|
||||
provider: params.provider,
|
||||
config: params.config,
|
||||
workspaceDir: input.effectiveWorkspace,
|
||||
env: process.env,
|
||||
modelId: params.modelId,
|
||||
modelApi: params.model.api,
|
||||
model: params.model,
|
||||
})
|
||||
);
|
||||
return normalizeAgentRuntimeTools({
|
||||
runtimePlan: params.runtimePlan,
|
||||
tools: filteredTools,
|
||||
provider: params.provider,
|
||||
config: params.config,
|
||||
workspaceDir: input.effectiveWorkspace,
|
||||
env: process.env,
|
||||
modelId: params.modelId,
|
||||
modelApi: params.model.api,
|
||||
model: params.model,
|
||||
});
|
||||
}
|
||||
|
||||
async function withCodexStartupTimeout<T>(params: {
|
||||
|
||||
26
src/agents/harness/result-classification.ts
Normal file
26
src/agents/harness/result-classification.ts
Normal file
@@ -0,0 +1,26 @@
|
||||
import type {
|
||||
AgentHarness,
|
||||
AgentHarnessAttemptParams,
|
||||
AgentHarnessAttemptResult,
|
||||
} from "./types.js";
|
||||
|
||||
export function applyAgentHarnessResultClassification(
|
||||
harness: Pick<AgentHarness, "id" | "classify">,
|
||||
result: AgentHarnessAttemptResult,
|
||||
params: AgentHarnessAttemptParams,
|
||||
): AgentHarnessAttemptResult {
|
||||
if (!harness.classify) {
|
||||
return { ...result, agentHarnessId: harness.id };
|
||||
}
|
||||
const { agentHarnessResultClassification: _previousClassification, ...resultWithoutPrevious } =
|
||||
result;
|
||||
const classification = harness.classify(resultWithoutPrevious, params);
|
||||
if (!classification || classification === "ok") {
|
||||
return { ...resultWithoutPrevious, agentHarnessId: harness.id };
|
||||
}
|
||||
return {
|
||||
...resultWithoutPrevious,
|
||||
agentHarnessId: harness.id,
|
||||
agentHarnessResultClassification: classification,
|
||||
};
|
||||
}
|
||||
@@ -20,6 +20,7 @@ import type { EmbeddedPiCompactResult } from "../pi-embedded-runner/types.js";
|
||||
import { createPiAgentHarness } from "./builtin-pi.js";
|
||||
import { listRegisteredAgentHarnesses } from "./registry.js";
|
||||
import type { AgentHarness, AgentHarnessSupport } from "./types.js";
|
||||
import { adaptAgentHarnessToV2, runAgentHarnessV2LifecycleAttempt } from "./v2.js";
|
||||
|
||||
const log = createSubsystemLogger("agents/harness");
|
||||
|
||||
@@ -187,14 +188,13 @@ export async function runAgentHarnessAttemptWithFallback(
|
||||
sessionKey: params.sessionKey,
|
||||
agentId: params.agentId,
|
||||
});
|
||||
const v2Harness = adaptAgentHarnessToV2(harness);
|
||||
if (harness.id === "pi") {
|
||||
const result = await harness.runAttempt(params);
|
||||
return applyHarnessResultClassification(harness, result, params);
|
||||
return await runAgentHarnessV2LifecycleAttempt(v2Harness, params);
|
||||
}
|
||||
|
||||
try {
|
||||
const result = await harness.runAttempt(params);
|
||||
return applyHarnessResultClassification(harness, result, params);
|
||||
return await runAgentHarnessV2LifecycleAttempt(v2Harness, params);
|
||||
} catch (error) {
|
||||
log.warn(`${harness.label} failed; not falling back to embedded PI backend`, {
|
||||
harnessId: harness.id,
|
||||
@@ -263,22 +263,6 @@ function logAgentHarnessSelection(
|
||||
});
|
||||
}
|
||||
|
||||
function applyHarnessResultClassification(
|
||||
harness: AgentHarness,
|
||||
result: EmbeddedRunAttemptResult,
|
||||
params: EmbeddedRunAttemptParams,
|
||||
): EmbeddedRunAttemptResult {
|
||||
const classification = harness.classify?.(result, params);
|
||||
if (!classification || classification === "ok") {
|
||||
return { ...result, agentHarnessId: harness.id };
|
||||
}
|
||||
return {
|
||||
...result,
|
||||
agentHarnessId: harness.id,
|
||||
agentHarnessResultClassification: classification,
|
||||
};
|
||||
}
|
||||
|
||||
function resolvePinnedAgentHarnessPolicy(
|
||||
agentHarnessId: string | undefined,
|
||||
): AgentHarnessPolicy | undefined {
|
||||
|
||||
399
src/agents/harness/v2.test.ts
Normal file
399
src/agents/harness/v2.test.ts
Normal file
@@ -0,0 +1,399 @@
|
||||
import type { Api, Model } from "@mariozechner/pi-ai";
|
||||
import { describe, expect, it, vi } from "vitest";
|
||||
import type { EmbeddedRunAttemptResult } from "../pi-embedded-runner/run/types.js";
|
||||
import type { AgentHarness, AgentHarnessAttemptParams } from "./types.js";
|
||||
import type { AgentHarnessV2 } from "./v2.js";
|
||||
import { adaptAgentHarnessToV2, runAgentHarnessV2LifecycleAttempt } from "./v2.js";
|
||||
|
||||
function createAttemptParams(): AgentHarnessAttemptParams {
|
||||
return {
|
||||
prompt: "hello",
|
||||
sessionId: "session-1",
|
||||
runId: "run-1",
|
||||
sessionFile: "/tmp/session.jsonl",
|
||||
workspaceDir: "/tmp/workspace",
|
||||
timeoutMs: 5_000,
|
||||
provider: "codex",
|
||||
modelId: "gpt-5.4",
|
||||
model: { id: "gpt-5.4", provider: "codex" } as Model<Api>,
|
||||
authStorage: {} as never,
|
||||
modelRegistry: {} as never,
|
||||
thinkLevel: "low",
|
||||
} as AgentHarnessAttemptParams;
|
||||
}
|
||||
|
||||
function createAttemptResult(): EmbeddedRunAttemptResult {
|
||||
return {
|
||||
aborted: false,
|
||||
externalAbort: false,
|
||||
timedOut: false,
|
||||
idleTimedOut: false,
|
||||
timedOutDuringCompaction: false,
|
||||
promptError: null,
|
||||
promptErrorSource: null,
|
||||
sessionIdUsed: "session-1",
|
||||
messagesSnapshot: [],
|
||||
assistantTexts: ["ok"],
|
||||
toolMetas: [],
|
||||
lastAssistant: undefined,
|
||||
didSendViaMessagingTool: false,
|
||||
messagingToolSentTexts: [],
|
||||
messagingToolSentMediaUrls: [],
|
||||
messagingToolSentTargets: [],
|
||||
cloudCodeAssistFormatError: false,
|
||||
replayMetadata: { hadPotentialSideEffects: false, replaySafe: true },
|
||||
itemLifecycle: { startedCount: 0, completedCount: 0, activeCount: 0 },
|
||||
};
|
||||
}
|
||||
|
||||
describe("AgentHarness V2 compatibility adapter", () => {
|
||||
it("executes prepare/start/send/outcome/cleanup as one bounded lifecycle", async () => {
|
||||
const params = createAttemptParams();
|
||||
const result = createAttemptResult();
|
||||
const events: string[] = [];
|
||||
const harness: AgentHarnessV2 = {
|
||||
id: "native-v2",
|
||||
label: "Native V2",
|
||||
supports: () => ({ supported: true }),
|
||||
prepare: async (attemptParams) => {
|
||||
events.push("prepare");
|
||||
expect(attemptParams).toBe(params);
|
||||
return {
|
||||
harnessId: "native-v2",
|
||||
label: "Native V2",
|
||||
params,
|
||||
lifecycleState: "prepared",
|
||||
};
|
||||
},
|
||||
start: async (prepared) => {
|
||||
events.push(`start:${prepared.lifecycleState}`);
|
||||
return { ...prepared, lifecycleState: "started" };
|
||||
},
|
||||
send: async (session) => {
|
||||
events.push(`send:${session.lifecycleState}`);
|
||||
return result;
|
||||
},
|
||||
resolveOutcome: async (session, rawResult) => {
|
||||
events.push(`outcome:${session.lifecycleState}`);
|
||||
return { ...rawResult, agentHarnessId: session.harnessId };
|
||||
},
|
||||
cleanup: async ({ prepared, session, result: cleanupResult, error }) => {
|
||||
expect(prepared?.lifecycleState).toBe("prepared");
|
||||
expect(session?.lifecycleState).toBe("started");
|
||||
if (!session) {
|
||||
throw new Error("expected started session during successful cleanup");
|
||||
}
|
||||
events.push(`cleanup:${session.lifecycleState}`);
|
||||
expect(cleanupResult).toMatchObject({ agentHarnessId: "native-v2" });
|
||||
expect(error).toBeUndefined();
|
||||
},
|
||||
};
|
||||
|
||||
await expect(runAgentHarnessV2LifecycleAttempt(harness, params)).resolves.toMatchObject({
|
||||
agentHarnessId: "native-v2",
|
||||
sessionIdUsed: "session-1",
|
||||
});
|
||||
expect(events).toEqual([
|
||||
"prepare",
|
||||
"start:prepared",
|
||||
"send:started",
|
||||
"outcome:started",
|
||||
"cleanup:started",
|
||||
]);
|
||||
});
|
||||
|
||||
it("runs cleanup with the original failure and preserves that failure", async () => {
|
||||
const params = createAttemptParams();
|
||||
const sendError = new Error("codex app-server send failed");
|
||||
const cleanup = vi.fn(async () => {
|
||||
throw new Error("cleanup should not mask send failure");
|
||||
});
|
||||
const harness: AgentHarnessV2 = {
|
||||
id: "native-v2",
|
||||
label: "Native V2",
|
||||
supports: () => ({ supported: true }),
|
||||
prepare: async () => ({
|
||||
harnessId: "native-v2",
|
||||
label: "Native V2",
|
||||
params,
|
||||
lifecycleState: "prepared",
|
||||
}),
|
||||
start: async (prepared) => ({ ...prepared, lifecycleState: "started" }),
|
||||
send: async () => {
|
||||
throw sendError;
|
||||
},
|
||||
resolveOutcome: async (_session, rawResult) => rawResult,
|
||||
cleanup,
|
||||
};
|
||||
|
||||
await expect(runAgentHarnessV2LifecycleAttempt(harness, params)).rejects.toThrow(
|
||||
"codex app-server send failed",
|
||||
);
|
||||
expect(cleanup).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
error: sendError,
|
||||
prepared: expect.objectContaining({ lifecycleState: "prepared" }),
|
||||
session: expect.objectContaining({ lifecycleState: "started" }),
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it("runs cleanup for failed prepare/start lifecycle stages", async () => {
|
||||
const params = createAttemptParams();
|
||||
const startError = new Error("codex app-server start failed");
|
||||
const cleanup = vi.fn(async () => {});
|
||||
const harness: AgentHarnessV2 = {
|
||||
id: "native-v2",
|
||||
label: "Native V2",
|
||||
supports: () => ({ supported: true }),
|
||||
prepare: async () => ({
|
||||
harnessId: "native-v2",
|
||||
label: "Native V2",
|
||||
params,
|
||||
lifecycleState: "prepared",
|
||||
}),
|
||||
start: async () => {
|
||||
throw startError;
|
||||
},
|
||||
send: async () => createAttemptResult(),
|
||||
resolveOutcome: async (_session, rawResult) => rawResult,
|
||||
cleanup,
|
||||
};
|
||||
|
||||
await expect(runAgentHarnessV2LifecycleAttempt(harness, params)).rejects.toThrow(
|
||||
"codex app-server start failed",
|
||||
);
|
||||
expect(cleanup).toHaveBeenCalledWith({
|
||||
error: startError,
|
||||
prepared: expect.objectContaining({ lifecycleState: "prepared" }),
|
||||
session: undefined,
|
||||
});
|
||||
});
|
||||
|
||||
it("passes raw send results to cleanup when outcome resolution fails", async () => {
|
||||
const params = createAttemptParams();
|
||||
const rawResult = createAttemptResult();
|
||||
const outcomeError = new Error("outcome classification failed");
|
||||
const cleanup = vi.fn(async () => {});
|
||||
const harness: AgentHarnessV2 = {
|
||||
id: "native-v2",
|
||||
label: "Native V2",
|
||||
supports: () => ({ supported: true }),
|
||||
prepare: async () => ({
|
||||
harnessId: "native-v2",
|
||||
label: "Native V2",
|
||||
params,
|
||||
lifecycleState: "prepared",
|
||||
}),
|
||||
start: async (prepared) => ({ ...prepared, lifecycleState: "started" }),
|
||||
send: async () => rawResult,
|
||||
resolveOutcome: async () => {
|
||||
throw outcomeError;
|
||||
},
|
||||
cleanup,
|
||||
};
|
||||
|
||||
await expect(runAgentHarnessV2LifecycleAttempt(harness, params)).rejects.toThrow(
|
||||
"outcome classification failed",
|
||||
);
|
||||
expect(cleanup).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
error: outcomeError,
|
||||
result: rawResult,
|
||||
prepared: expect.objectContaining({ lifecycleState: "prepared" }),
|
||||
session: expect.objectContaining({ lifecycleState: "started" }),
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it("surfaces cleanup failures after successful outcomes", async () => {
|
||||
const params = createAttemptParams();
|
||||
const harness: AgentHarnessV2 = {
|
||||
id: "native-v2",
|
||||
label: "Native V2",
|
||||
supports: () => ({ supported: true }),
|
||||
prepare: async () => ({
|
||||
harnessId: "native-v2",
|
||||
label: "Native V2",
|
||||
params,
|
||||
lifecycleState: "prepared",
|
||||
}),
|
||||
start: async (prepared) => ({ ...prepared, lifecycleState: "started" }),
|
||||
send: async () => createAttemptResult(),
|
||||
resolveOutcome: async (_session, rawResult) => rawResult,
|
||||
cleanup: async () => {
|
||||
throw new Error("cleanup failed");
|
||||
},
|
||||
};
|
||||
|
||||
await expect(runAgentHarnessV2LifecycleAttempt(harness, params)).rejects.toThrow(
|
||||
"cleanup failed",
|
||||
);
|
||||
});
|
||||
|
||||
it("runs a V1 harness through prepare/start/send without changing attempt params", async () => {
|
||||
const params = createAttemptParams();
|
||||
const result = createAttemptResult();
|
||||
const runAttempt = vi.fn(async () => result);
|
||||
const harness: AgentHarness = {
|
||||
id: "codex",
|
||||
label: "Codex",
|
||||
pluginId: "codex-plugin",
|
||||
supports: () => ({ supported: true, priority: 100 }),
|
||||
runAttempt,
|
||||
};
|
||||
|
||||
const v2 = adaptAgentHarnessToV2(harness);
|
||||
const prepared = await v2.prepare(params);
|
||||
const session = await v2.start(prepared);
|
||||
|
||||
expect(v2.resume).toBeUndefined();
|
||||
expect(await v2.send(session)).toBe(result);
|
||||
expect(runAttempt).toHaveBeenCalledWith(params);
|
||||
expect(session).toMatchObject({
|
||||
harnessId: "codex",
|
||||
label: "Codex",
|
||||
pluginId: "codex-plugin",
|
||||
params,
|
||||
lifecycleState: "started",
|
||||
});
|
||||
expect(prepared.lifecycleState).toBe("prepared");
|
||||
});
|
||||
|
||||
it("keeps result classification as an explicit outcome stage", async () => {
|
||||
const params = createAttemptParams();
|
||||
const result = createAttemptResult();
|
||||
const classify = vi.fn<NonNullable<AgentHarness["classify"]>>(() => "empty");
|
||||
const harness: AgentHarness = {
|
||||
id: "codex",
|
||||
label: "Codex",
|
||||
supports: () => ({ supported: true }),
|
||||
runAttempt: vi.fn(async () => result),
|
||||
classify,
|
||||
};
|
||||
|
||||
const v2 = adaptAgentHarnessToV2(harness);
|
||||
const session = await v2.start(await v2.prepare(params));
|
||||
|
||||
expect(await v2.resolveOutcome(session, result)).toMatchObject({
|
||||
agentHarnessId: "codex",
|
||||
agentHarnessResultClassification: "empty",
|
||||
});
|
||||
expect(harness.classify).toHaveBeenCalledWith(result, params);
|
||||
});
|
||||
|
||||
it("preserves harness-supplied classification when no classify hook is registered", async () => {
|
||||
const params = createAttemptParams();
|
||||
const result = {
|
||||
...createAttemptResult(),
|
||||
agentHarnessResultClassification: "reasoning-only",
|
||||
} as EmbeddedRunAttemptResult;
|
||||
const harness: AgentHarness = {
|
||||
id: "codex",
|
||||
label: "Codex",
|
||||
supports: () => ({ supported: true }),
|
||||
runAttempt: vi.fn(async () => result),
|
||||
};
|
||||
|
||||
const v2 = adaptAgentHarnessToV2(harness);
|
||||
const session = await v2.start(await v2.prepare(params));
|
||||
|
||||
expect(await v2.resolveOutcome(session, result)).toMatchObject({
|
||||
agentHarnessId: "codex",
|
||||
agentHarnessResultClassification: "reasoning-only",
|
||||
});
|
||||
});
|
||||
|
||||
it("clears stale non-ok classification when classification resolves to ok", async () => {
|
||||
const params = createAttemptParams();
|
||||
const result = {
|
||||
...createAttemptResult(),
|
||||
agentHarnessResultClassification: "empty",
|
||||
} as EmbeddedRunAttemptResult;
|
||||
const classify = vi.fn<NonNullable<AgentHarness["classify"]>>(() => "ok");
|
||||
const harness: AgentHarness = {
|
||||
id: "codex",
|
||||
label: "Codex",
|
||||
supports: () => ({ supported: true }),
|
||||
runAttempt: vi.fn(async () => result),
|
||||
classify,
|
||||
};
|
||||
|
||||
const v2 = adaptAgentHarnessToV2(harness);
|
||||
const session = await v2.start(await v2.prepare(params));
|
||||
|
||||
const classified = await v2.resolveOutcome(session, result);
|
||||
expect(classified).toMatchObject({ agentHarnessId: "codex" });
|
||||
expect(classified).not.toHaveProperty("agentHarnessResultClassification");
|
||||
});
|
||||
|
||||
it("preserves existing compact/reset/dispose hook this binding as compatibility methods", async () => {
|
||||
const harness: AgentHarness & {
|
||||
compactCalls: number;
|
||||
resetCalls: number;
|
||||
disposeCalls: number;
|
||||
} = {
|
||||
id: "custom",
|
||||
label: "Custom",
|
||||
compactCalls: 0,
|
||||
resetCalls: 0,
|
||||
disposeCalls: 0,
|
||||
supports: () => ({ supported: true }),
|
||||
runAttempt: vi.fn(async () => createAttemptResult()),
|
||||
async compact() {
|
||||
this.compactCalls += 1;
|
||||
return {
|
||||
ok: true,
|
||||
compacted: true,
|
||||
result: {
|
||||
summary: "done",
|
||||
firstKeptEntryId: "entry-1",
|
||||
tokensBefore: 100,
|
||||
},
|
||||
};
|
||||
},
|
||||
reset(params) {
|
||||
expect(params).toEqual({ reason: "reset" });
|
||||
this.resetCalls += 1;
|
||||
},
|
||||
dispose() {
|
||||
this.disposeCalls += 1;
|
||||
},
|
||||
};
|
||||
|
||||
const v2 = adaptAgentHarnessToV2(harness);
|
||||
|
||||
await expect(
|
||||
v2.compact?.({
|
||||
sessionId: "session-1",
|
||||
sessionFile: "/tmp/session.jsonl",
|
||||
workspaceDir: "/tmp/workspace",
|
||||
}),
|
||||
).resolves.toMatchObject({
|
||||
compacted: true,
|
||||
});
|
||||
await v2.reset?.({ reason: "reset" });
|
||||
await v2.dispose?.();
|
||||
|
||||
expect(harness.compactCalls).toBe(1);
|
||||
expect(harness.resetCalls).toBe(1);
|
||||
expect(harness.disposeCalls).toBe(1);
|
||||
});
|
||||
|
||||
it("does not dispose V1 harnesses during per-attempt cleanup", async () => {
|
||||
const dispose = vi.fn();
|
||||
const harness: AgentHarness = {
|
||||
id: "custom",
|
||||
label: "Custom",
|
||||
supports: () => ({ supported: true }),
|
||||
runAttempt: vi.fn(async () => createAttemptResult()),
|
||||
dispose,
|
||||
};
|
||||
const v2 = adaptAgentHarnessToV2(harness);
|
||||
const session = await v2.start(await v2.prepare(createAttemptParams()));
|
||||
|
||||
await v2.cleanup({ session, result: createAttemptResult() });
|
||||
|
||||
expect(dispose).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
136
src/agents/harness/v2.ts
Normal file
136
src/agents/harness/v2.ts
Normal file
@@ -0,0 +1,136 @@
|
||||
import { formatErrorMessage } from "../../infra/errors.js";
|
||||
import { createSubsystemLogger } from "../../logging/subsystem.js";
|
||||
import { applyAgentHarnessResultClassification } from "./result-classification.js";
|
||||
import type {
|
||||
AgentHarness,
|
||||
AgentHarnessAttemptParams,
|
||||
AgentHarnessAttemptResult,
|
||||
AgentHarnessCompactParams,
|
||||
AgentHarnessCompactResult,
|
||||
AgentHarnessResetParams,
|
||||
AgentHarnessSupport,
|
||||
AgentHarnessSupportContext,
|
||||
} from "./types.js";
|
||||
|
||||
const log = createSubsystemLogger("agents/harness/v2");
|
||||
|
||||
type AgentHarnessV2RunBase = {
|
||||
harnessId: string;
|
||||
label: string;
|
||||
pluginId?: string;
|
||||
params: AgentHarnessAttemptParams;
|
||||
};
|
||||
|
||||
export type AgentHarnessV2PreparedRun = AgentHarnessV2RunBase & {
|
||||
lifecycleState: "prepared";
|
||||
};
|
||||
|
||||
export type AgentHarnessV2Session = AgentHarnessV2RunBase & {
|
||||
lifecycleState: "started";
|
||||
};
|
||||
|
||||
export type AgentHarnessV2ToolCall = {
|
||||
id?: string;
|
||||
name: string;
|
||||
input?: unknown;
|
||||
};
|
||||
|
||||
export type AgentHarnessV2CleanupParams = {
|
||||
prepared?: AgentHarnessV2PreparedRun;
|
||||
session?: AgentHarnessV2Session;
|
||||
result?: AgentHarnessAttemptResult;
|
||||
error?: unknown;
|
||||
};
|
||||
|
||||
export type AgentHarnessV2 = {
|
||||
id: string;
|
||||
label: string;
|
||||
pluginId?: string;
|
||||
supports(ctx: AgentHarnessSupportContext): AgentHarnessSupport;
|
||||
prepare(params: AgentHarnessAttemptParams): Promise<AgentHarnessV2PreparedRun>;
|
||||
start(prepared: AgentHarnessV2PreparedRun): Promise<AgentHarnessV2Session>;
|
||||
resume?(session: AgentHarnessV2Session): Promise<AgentHarnessV2Session>;
|
||||
send(session: AgentHarnessV2Session): Promise<AgentHarnessAttemptResult>;
|
||||
handleToolCall?(session: AgentHarnessV2Session, call: AgentHarnessV2ToolCall): Promise<unknown>;
|
||||
resolveOutcome(
|
||||
session: AgentHarnessV2Session,
|
||||
result: AgentHarnessAttemptResult,
|
||||
): Promise<AgentHarnessAttemptResult>;
|
||||
cleanup(params: AgentHarnessV2CleanupParams): Promise<void>;
|
||||
compact?(params: AgentHarnessCompactParams): Promise<AgentHarnessCompactResult | undefined>;
|
||||
reset?(params: AgentHarnessResetParams): Promise<void> | void;
|
||||
dispose?(): Promise<void> | void;
|
||||
};
|
||||
|
||||
export function adaptAgentHarnessToV2(harness: AgentHarness): AgentHarnessV2 {
|
||||
return {
|
||||
id: harness.id,
|
||||
label: harness.label,
|
||||
pluginId: harness.pluginId,
|
||||
supports: (ctx) => harness.supports(ctx),
|
||||
prepare: async (params) => ({
|
||||
harnessId: harness.id,
|
||||
label: harness.label,
|
||||
pluginId: harness.pluginId,
|
||||
params,
|
||||
lifecycleState: "prepared",
|
||||
}),
|
||||
start: async (prepared) => ({
|
||||
harnessId: prepared.harnessId,
|
||||
label: prepared.label,
|
||||
pluginId: prepared.pluginId,
|
||||
params: prepared.params,
|
||||
lifecycleState: "started",
|
||||
}),
|
||||
send: async (session) => harness.runAttempt(session.params),
|
||||
resolveOutcome: async (session, result) =>
|
||||
applyAgentHarnessResultClassification(harness, result, session.params),
|
||||
cleanup: async (_params) => {
|
||||
// V1 harnesses have no per-attempt cleanup hook. Global cleanup remains
|
||||
// on dispose(), which must not run after every attempt.
|
||||
},
|
||||
compact: harness.compact ? (params) => harness.compact!(params) : undefined,
|
||||
reset: harness.reset ? (params) => harness.reset!(params) : undefined,
|
||||
dispose: harness.dispose ? () => harness.dispose!() : undefined,
|
||||
};
|
||||
}
|
||||
|
||||
export async function runAgentHarnessV2LifecycleAttempt(
|
||||
harness: AgentHarnessV2,
|
||||
params: AgentHarnessAttemptParams,
|
||||
): Promise<AgentHarnessAttemptResult> {
|
||||
let prepared: AgentHarnessV2PreparedRun | undefined;
|
||||
let session: AgentHarnessV2Session | undefined;
|
||||
let rawResult: AgentHarnessAttemptResult | undefined;
|
||||
let result: AgentHarnessAttemptResult;
|
||||
|
||||
try {
|
||||
prepared = await harness.prepare(params);
|
||||
session = await harness.start(prepared);
|
||||
rawResult = await harness.send(session);
|
||||
result = await harness.resolveOutcome(session, rawResult);
|
||||
} catch (error) {
|
||||
try {
|
||||
await harness.cleanup({
|
||||
prepared,
|
||||
session,
|
||||
error,
|
||||
...(rawResult === undefined ? {} : { result: rawResult }),
|
||||
});
|
||||
} catch (cleanupError) {
|
||||
// Preserve the user-visible harness failure. Cleanup errors after a
|
||||
// failed lifecycle stage must not mask the actionable runtime error.
|
||||
log.warn("agent harness cleanup failed after attempt failure", {
|
||||
harnessId: harness.id,
|
||||
provider: params.provider,
|
||||
modelId: params.modelId,
|
||||
error: formatErrorMessage(cleanupError),
|
||||
originalError: formatErrorMessage(error),
|
||||
});
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
|
||||
await harness.cleanup({ prepared, session, result });
|
||||
return result;
|
||||
}
|
||||
@@ -1,4 +1,7 @@
|
||||
import { beforeAll, beforeEach, describe, expect, it, vi } from "vitest";
|
||||
import type { AgentHarness } from "../harness/types.js";
|
||||
import type { AgentInternalEvent } from "../internal-events.js";
|
||||
import type { AgentRuntimePlan } from "../runtime-plan/types.js";
|
||||
import {
|
||||
makeAttemptResult,
|
||||
makeCompactionSuccess,
|
||||
@@ -8,6 +11,7 @@ import {
|
||||
} from "./run.overflow-compaction.fixture.js";
|
||||
import {
|
||||
loadRunOverflowCompactionHarness,
|
||||
mockedBuildAgentRuntimePlan,
|
||||
mockedBuildEmbeddedRunPayloads,
|
||||
mockedCoerceToFailoverError,
|
||||
mockedCompactDirect,
|
||||
@@ -26,8 +30,111 @@ import {
|
||||
overflowBaseRunParams,
|
||||
resetRunOverflowCompactionHarnessMocks,
|
||||
} from "./run.overflow-compaction.harness.js";
|
||||
import type { RunEmbeddedPiAgentParams } from "./run/params.js";
|
||||
import type { EmbeddedRunAttemptParams } from "./run/types.js";
|
||||
|
||||
let runEmbeddedPiAgent: typeof import("./run.js").runEmbeddedPiAgent;
|
||||
type RuntimePlanOverrides = Partial<Omit<AgentRuntimePlan, "auth" | "resolvedRef">> & {
|
||||
auth?: Partial<AgentRuntimePlan["auth"]>;
|
||||
resolvedRef?: Partial<AgentRuntimePlan["resolvedRef"]>;
|
||||
};
|
||||
function makeForwardingCase(internalEvents: AgentInternalEvent[]) {
|
||||
return {
|
||||
runId: "forward-attempt-params",
|
||||
params: {
|
||||
toolsAllow: ["exec", "read"],
|
||||
bootstrapContextMode: "lightweight",
|
||||
bootstrapContextRunKind: "cron",
|
||||
disableMessageTool: true,
|
||||
forceMessageTool: true,
|
||||
requireExplicitMessageTarget: true,
|
||||
internalEvents,
|
||||
},
|
||||
expected: {
|
||||
toolsAllow: ["exec", "read"],
|
||||
bootstrapContextMode: "lightweight",
|
||||
bootstrapContextRunKind: "cron",
|
||||
disableMessageTool: true,
|
||||
forceMessageTool: true,
|
||||
requireExplicitMessageTarget: true,
|
||||
},
|
||||
} satisfies {
|
||||
runId: string;
|
||||
params: Partial<RunEmbeddedPiAgentParams>;
|
||||
expected: Record<string, unknown>;
|
||||
};
|
||||
}
|
||||
|
||||
function makeForwardedRuntimePlan(overrides: RuntimePlanOverrides = {}): AgentRuntimePlan {
|
||||
const transcriptPolicy = {
|
||||
sanitizeMode: "full",
|
||||
sanitizeToolCallIds: true,
|
||||
preserveNativeAnthropicToolUseIds: false,
|
||||
repairToolUseResultPairing: true,
|
||||
preserveSignatures: false,
|
||||
sanitizeThinkingSignatures: true,
|
||||
dropThinkingBlocks: false,
|
||||
applyGoogleTurnOrdering: false,
|
||||
validateGeminiTurns: false,
|
||||
validateAnthropicTurns: false,
|
||||
allowSyntheticToolResults: false,
|
||||
} satisfies AgentRuntimePlan["transcript"]["policy"];
|
||||
const basePlan: AgentRuntimePlan = {
|
||||
auth: {
|
||||
authProfileProviderForAuth: "anthropic",
|
||||
providerForAuth: "anthropic",
|
||||
},
|
||||
delivery: {
|
||||
isSilentPayload: vi.fn(() => false),
|
||||
resolveFollowupRoute: vi.fn(),
|
||||
},
|
||||
observability: {
|
||||
provider: "anthropic",
|
||||
resolvedRef: "anthropic/test-model",
|
||||
modelId: "test-model",
|
||||
},
|
||||
outcome: {
|
||||
classifyRunResult: vi.fn(() => undefined),
|
||||
},
|
||||
prompt: {
|
||||
provider: "anthropic",
|
||||
modelId: "test-model",
|
||||
resolveSystemPromptContribution: vi.fn(),
|
||||
},
|
||||
transcript: {
|
||||
policy: transcriptPolicy,
|
||||
resolvePolicy: vi.fn((params): AgentRuntimePlan["transcript"]["policy"] => ({
|
||||
...transcriptPolicy,
|
||||
sanitizeMode: params?.modelApi === "anthropic-messages" ? "full" : "images-only",
|
||||
})),
|
||||
},
|
||||
transport: {
|
||||
extraParams: {},
|
||||
resolveExtraParams: vi.fn(() => ({})),
|
||||
},
|
||||
resolvedRef: {
|
||||
provider: "anthropic",
|
||||
modelId: "test-model",
|
||||
harnessId: "pi",
|
||||
},
|
||||
tools: {
|
||||
normalize: vi.fn((tools) => tools),
|
||||
logDiagnostics: vi.fn(),
|
||||
},
|
||||
};
|
||||
return {
|
||||
...basePlan,
|
||||
...overrides,
|
||||
auth: {
|
||||
...basePlan.auth,
|
||||
...overrides.auth,
|
||||
},
|
||||
resolvedRef: {
|
||||
...basePlan.resolvedRef,
|
||||
...overrides.resolvedRef,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
describe("runEmbeddedPiAgent overflow compaction trigger routing", () => {
|
||||
beforeAll(async () => {
|
||||
@@ -83,9 +190,61 @@ describe("runEmbeddedPiAgent overflow compaction trigger routing", () => {
|
||||
);
|
||||
});
|
||||
|
||||
it("forwards optional attempt params and the runtime plan into one attempt call", async () => {
|
||||
const internalEvents: AgentInternalEvent[] = [];
|
||||
const forwardingCase = makeForwardingCase(internalEvents);
|
||||
const runtimePlan = makeForwardedRuntimePlan();
|
||||
mockedBuildAgentRuntimePlan.mockReturnValueOnce(runtimePlan);
|
||||
mockedRunEmbeddedAttempt.mockResolvedValueOnce(makeAttemptResult({ promptError: null }));
|
||||
|
||||
await runEmbeddedPiAgent({
|
||||
...overflowBaseRunParams,
|
||||
...forwardingCase.params,
|
||||
runId: forwardingCase.runId,
|
||||
});
|
||||
|
||||
expect(mockedBuildAgentRuntimePlan).toHaveBeenCalledTimes(1);
|
||||
expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(1);
|
||||
expect(mockedRunEmbeddedAttempt).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
...forwardingCase.expected,
|
||||
runtimePlan: expect.objectContaining({
|
||||
resolvedRef: expect.objectContaining({
|
||||
provider: "anthropic",
|
||||
modelId: "test-model",
|
||||
}),
|
||||
tools: expect.objectContaining({
|
||||
normalize: expect.any(Function),
|
||||
}),
|
||||
transport: expect.objectContaining({
|
||||
resolveExtraParams: expect.any(Function),
|
||||
}),
|
||||
}),
|
||||
}),
|
||||
);
|
||||
const attemptParams = mockedRunEmbeddedAttempt.mock.calls[0]?.[0] as
|
||||
| EmbeddedRunAttemptParams
|
||||
| undefined;
|
||||
expect(attemptParams?.runtimePlan).toBe(runtimePlan);
|
||||
expect(attemptParams?.internalEvents).toBe(internalEvents);
|
||||
});
|
||||
|
||||
it("forwards explicit OpenAI Codex auth profiles to codex plugin harnesses", async () => {
|
||||
const { clearAgentHarnesses, registerAgentHarness } = await import("../harness/registry.js");
|
||||
const pluginRunAttempt = vi.fn(async () => makeAttemptResult({ assistantTexts: ["ok"] }));
|
||||
const pluginRunAttempt = vi.fn<AgentHarness["runAttempt"]>(async () =>
|
||||
makeAttemptResult({ assistantTexts: ["ok"] }),
|
||||
);
|
||||
const runtimePlan = makeForwardedRuntimePlan({
|
||||
resolvedRef: {
|
||||
provider: "codex",
|
||||
modelId: "gpt-5.4",
|
||||
harnessId: "codex",
|
||||
},
|
||||
auth: {
|
||||
harnessAuthProvider: "openai-codex",
|
||||
forwardedAuthProfileId: "openai-codex:work",
|
||||
},
|
||||
});
|
||||
clearAgentHarnesses();
|
||||
registerAgentHarness({
|
||||
id: "codex",
|
||||
@@ -94,6 +253,7 @@ describe("runEmbeddedPiAgent overflow compaction trigger routing", () => {
|
||||
ctx.provider === "codex" ? { supported: true, priority: 100 } : { supported: false },
|
||||
runAttempt: pluginRunAttempt,
|
||||
});
|
||||
mockedBuildAgentRuntimePlan.mockReturnValueOnce(runtimePlan);
|
||||
mockedGetApiKeyForModel.mockRejectedValueOnce(new Error("generic auth should be skipped"));
|
||||
|
||||
try {
|
||||
@@ -117,18 +277,47 @@ describe("runEmbeddedPiAgent overflow compaction trigger routing", () => {
|
||||
}
|
||||
|
||||
expect(mockedGetApiKeyForModel).not.toHaveBeenCalled();
|
||||
expect(mockedBuildAgentRuntimePlan).toHaveBeenCalledTimes(1);
|
||||
expect(pluginRunAttempt).toHaveBeenCalledTimes(1);
|
||||
expect(pluginRunAttempt).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
provider: "codex",
|
||||
authProfileId: "openai-codex:work",
|
||||
authProfileIdSource: "user",
|
||||
runtimePlan: expect.objectContaining({
|
||||
resolvedRef: expect.objectContaining({
|
||||
provider: "codex",
|
||||
modelId: "gpt-5.4",
|
||||
harnessId: "codex",
|
||||
}),
|
||||
auth: expect.objectContaining({
|
||||
harnessAuthProvider: "openai-codex",
|
||||
forwardedAuthProfileId: "openai-codex:work",
|
||||
}),
|
||||
}),
|
||||
}),
|
||||
);
|
||||
const harnessParams = pluginRunAttempt.mock.calls[0]?.[0];
|
||||
expect(harnessParams?.runtimePlan).toBe(runtimePlan);
|
||||
});
|
||||
|
||||
it("forwards OpenAI Codex auth profiles when openai/* is forced through codex", async () => {
|
||||
const { clearAgentHarnesses, registerAgentHarness } = await import("../harness/registry.js");
|
||||
const pluginRunAttempt = vi.fn(async () => makeAttemptResult({ assistantTexts: ["ok"] }));
|
||||
const pluginRunAttempt = vi.fn<AgentHarness["runAttempt"]>(async () =>
|
||||
makeAttemptResult({ assistantTexts: ["ok"] }),
|
||||
);
|
||||
const runtimePlan = makeForwardedRuntimePlan({
|
||||
resolvedRef: {
|
||||
provider: "openai",
|
||||
modelId: "gpt-5.4",
|
||||
harnessId: "codex",
|
||||
},
|
||||
auth: {
|
||||
providerForAuth: "openai",
|
||||
harnessAuthProvider: "openai-codex",
|
||||
forwardedAuthProfileId: "openai-codex:work",
|
||||
},
|
||||
});
|
||||
clearAgentHarnesses();
|
||||
registerAgentHarness({
|
||||
id: "codex",
|
||||
@@ -136,6 +325,7 @@ describe("runEmbeddedPiAgent overflow compaction trigger routing", () => {
|
||||
supports: () => ({ supported: false }),
|
||||
runAttempt: pluginRunAttempt,
|
||||
});
|
||||
mockedBuildAgentRuntimePlan.mockReturnValueOnce(runtimePlan);
|
||||
mockedGetApiKeyForModel.mockRejectedValueOnce(new Error("generic auth should be skipped"));
|
||||
|
||||
try {
|
||||
@@ -159,13 +349,29 @@ describe("runEmbeddedPiAgent overflow compaction trigger routing", () => {
|
||||
}
|
||||
|
||||
expect(mockedGetApiKeyForModel).not.toHaveBeenCalled();
|
||||
expect(mockedBuildAgentRuntimePlan).toHaveBeenCalledTimes(1);
|
||||
expect(pluginRunAttempt).toHaveBeenCalledTimes(1);
|
||||
expect(pluginRunAttempt).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
provider: "openai",
|
||||
authProfileId: "openai-codex:work",
|
||||
authProfileIdSource: "user",
|
||||
runtimePlan: expect.objectContaining({
|
||||
resolvedRef: expect.objectContaining({
|
||||
provider: "openai",
|
||||
modelId: "gpt-5.4",
|
||||
harnessId: "codex",
|
||||
}),
|
||||
auth: expect.objectContaining({
|
||||
providerForAuth: "openai",
|
||||
harnessAuthProvider: "openai-codex",
|
||||
forwardedAuthProfileId: "openai-codex:work",
|
||||
}),
|
||||
}),
|
||||
}),
|
||||
);
|
||||
const harnessParams = pluginRunAttempt.mock.calls[0]?.[0];
|
||||
expect(harnessParams?.runtimePlan).toBe(runtimePlan);
|
||||
});
|
||||
|
||||
it("blocks undersized models before dispatching a provider attempt", async () => {
|
||||
|
||||
@@ -0,0 +1,93 @@
|
||||
import { beforeEach, describe, expect, it, vi } from "vitest";
|
||||
import type { ProviderRuntimeModel } from "../../../plugins/provider-runtime-model.types.js";
|
||||
import type { AgentRuntimePlan } from "../../runtime-plan/types.js";
|
||||
import { resolveAttemptTranscriptPolicy } from "./attempt.transcript-policy.js";
|
||||
|
||||
const resolveProviderRuntimePluginMock = vi.hoisted(() => vi.fn());
|
||||
|
||||
vi.mock("../../../plugins/provider-hook-runtime.js", () => ({
|
||||
resolveProviderRuntimePlugin: resolveProviderRuntimePluginMock,
|
||||
}));
|
||||
|
||||
describe("resolveAttemptTranscriptPolicy", () => {
|
||||
beforeEach(() => {
|
||||
resolveProviderRuntimePluginMock.mockReset();
|
||||
resolveProviderRuntimePluginMock.mockReturnValue(undefined);
|
||||
});
|
||||
|
||||
it("uses RuntimePlan transcript policy when available", () => {
|
||||
const plannedPolicy = {
|
||||
sanitizeMode: "full",
|
||||
sanitizeToolCallIds: true,
|
||||
toolCallIdMode: "strict",
|
||||
preserveNativeAnthropicToolUseIds: false,
|
||||
repairToolUseResultPairing: true,
|
||||
preserveSignatures: true,
|
||||
sanitizeThinkingSignatures: false,
|
||||
dropThinkingBlocks: true,
|
||||
applyGoogleTurnOrdering: false,
|
||||
validateGeminiTurns: false,
|
||||
validateAnthropicTurns: true,
|
||||
allowSyntheticToolResults: true,
|
||||
} as const;
|
||||
const resolvePolicy = vi.fn(() => plannedPolicy);
|
||||
const runtimePlan = {
|
||||
transcript: {
|
||||
resolvePolicy,
|
||||
},
|
||||
} as unknown as AgentRuntimePlan;
|
||||
const runtimePlanModelContext = {
|
||||
workspaceDir: "/tmp/openclaw-transcript-policy",
|
||||
modelApi: "anthropic-messages",
|
||||
model: {
|
||||
id: "claude-opus-4.6",
|
||||
name: "Claude Opus 4.6",
|
||||
api: "anthropic-messages",
|
||||
provider: "anthropic",
|
||||
baseUrl: "https://api.anthropic.com",
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
contextWindow: 200_000,
|
||||
maxTokens: 8_192,
|
||||
} satisfies ProviderRuntimeModel,
|
||||
};
|
||||
|
||||
expect(
|
||||
resolveAttemptTranscriptPolicy({
|
||||
runtimePlan,
|
||||
runtimePlanModelContext,
|
||||
provider: "anthropic",
|
||||
modelId: "claude-opus-4.6",
|
||||
}),
|
||||
).toBe(plannedPolicy);
|
||||
expect(resolvePolicy).toHaveBeenCalledWith(runtimePlanModelContext);
|
||||
});
|
||||
|
||||
it("keeps the legacy provider transcript fallback when no RuntimePlan is available", () => {
|
||||
const env = { OPENCLAW_TEST_TRANSCRIPT_POLICY: "1" } as NodeJS.ProcessEnv;
|
||||
const policy = resolveAttemptTranscriptPolicy({
|
||||
runtimePlanModelContext: {
|
||||
workspaceDir: "/tmp/openclaw-transcript-policy",
|
||||
modelApi: "openai-responses",
|
||||
},
|
||||
provider: "custom-openai-compatible",
|
||||
modelId: "gpt-5.4",
|
||||
env,
|
||||
});
|
||||
|
||||
expect(policy).toMatchObject({
|
||||
sanitizeMode: "images-only",
|
||||
sanitizeToolCallIds: true,
|
||||
toolCallIdMode: "strict",
|
||||
repairToolUseResultPairing: true,
|
||||
allowSyntheticToolResults: false,
|
||||
});
|
||||
expect(resolveProviderRuntimePluginMock).toHaveBeenCalledWith({
|
||||
provider: "custom-openai-compatible",
|
||||
config: undefined,
|
||||
workspaceDir: "/tmp/openclaw-transcript-policy",
|
||||
env,
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,36 @@
|
||||
import type { OpenClawConfig } from "../../../config/types.openclaw.js";
|
||||
import type { ProviderRuntimeModel } from "../../../plugins/provider-runtime-model.types.js";
|
||||
import type { AgentRuntimePlan } from "../../runtime-plan/types.js";
|
||||
import { resolveTranscriptPolicy, type TranscriptPolicy } from "../../transcript-policy.js";
|
||||
|
||||
export type AttemptRuntimeModelContext = NonNullable<
|
||||
Parameters<AgentRuntimePlan["transcript"]["resolvePolicy"]>[0]
|
||||
>;
|
||||
|
||||
function asProviderRuntimeModel(
|
||||
model: AttemptRuntimeModelContext["model"],
|
||||
): ProviderRuntimeModel | undefined {
|
||||
return typeof model?.id === "string" ? (model as ProviderRuntimeModel) : undefined;
|
||||
}
|
||||
|
||||
export function resolveAttemptTranscriptPolicy(params: {
|
||||
runtimePlan?: AgentRuntimePlan;
|
||||
runtimePlanModelContext: AttemptRuntimeModelContext;
|
||||
provider: string;
|
||||
modelId: string;
|
||||
config?: OpenClawConfig;
|
||||
env?: NodeJS.ProcessEnv;
|
||||
}): TranscriptPolicy {
|
||||
return (
|
||||
params.runtimePlan?.transcript.resolvePolicy(params.runtimePlanModelContext) ??
|
||||
resolveTranscriptPolicy({
|
||||
modelApi: params.runtimePlanModelContext.modelApi,
|
||||
provider: params.provider,
|
||||
modelId: params.modelId,
|
||||
config: params.config,
|
||||
workspaceDir: params.runtimePlanModelContext.workspaceDir,
|
||||
env: params.env ?? process.env,
|
||||
model: asProviderRuntimeModel(params.runtimePlanModelContext.model),
|
||||
})
|
||||
);
|
||||
}
|
||||
@@ -117,6 +117,10 @@ import {
|
||||
import { wrapStreamFnTextTransforms } from "../../plugin-text-transforms.js";
|
||||
import { describeProviderRequestRoutingSummary } from "../../provider-attribution.js";
|
||||
import { registerProviderStreamForModel } from "../../provider-stream.js";
|
||||
import {
|
||||
logAgentRuntimeToolDiagnostics,
|
||||
normalizeAgentRuntimeTools,
|
||||
} from "../../runtime-plan/tools.js";
|
||||
import { resolveSandboxContext } from "../../sandbox.js";
|
||||
import { resolveSandboxRuntimeStatus } from "../../sandbox/runtime-status.js";
|
||||
import { repairSessionFileIfNeeded } from "../../session-file-repair.js";
|
||||
@@ -148,10 +152,7 @@ import {
|
||||
collectExplicitToolAllowlistSources,
|
||||
} from "../../tool-allowlist-guard.js";
|
||||
import { UNKNOWN_TOOL_THRESHOLD } from "../../tool-loop-detection.js";
|
||||
import {
|
||||
resolveTranscriptPolicy,
|
||||
shouldAllowProviderOwnedThinkingReplay,
|
||||
} from "../../transcript-policy.js";
|
||||
import { shouldAllowProviderOwnedThinkingReplay } from "../../transcript-policy.js";
|
||||
import { normalizeUsage, type NormalizedUsage } from "../../usage.js";
|
||||
import { DEFAULT_BOOTSTRAP_FILENAME } from "../../workspace.js";
|
||||
import { isRunnerAbortError } from "../abort.js";
|
||||
@@ -219,10 +220,6 @@ import {
|
||||
resolveLiveToolResultMaxChars,
|
||||
truncateOversizedToolResultsInSessionManager,
|
||||
} from "../tool-result-truncation.js";
|
||||
import {
|
||||
logProviderToolSchemaDiagnostics,
|
||||
normalizeProviderToolSchemas,
|
||||
} from "../tool-schema-runtime.js";
|
||||
import { splitSdkTools } from "../tool-split.js";
|
||||
import { mapThinkingLevel } from "../utils.js";
|
||||
import { flushPendingToolResultsAfterIdle } from "../wait-for-idle-before-flush.js";
|
||||
@@ -290,6 +287,7 @@ import {
|
||||
wrapStreamFnTrimToolCallNames,
|
||||
} from "./attempt.tool-call-normalization.js";
|
||||
import { buildEmbeddedAttemptToolRunContext } from "./attempt.tool-run-context.js";
|
||||
import { resolveAttemptTranscriptPolicy } from "./attempt.transcript-policy.js";
|
||||
import { waitForCompactionRetryWithAggregateTimeout } from "./compaction-retry-aggregate-timeout.js";
|
||||
import {
|
||||
resolveRunTimeoutDuringCompaction,
|
||||
@@ -844,18 +842,17 @@ export async function runEmbeddedAttempt(
|
||||
modelApi: params.model.api,
|
||||
model: params.model,
|
||||
};
|
||||
const tools =
|
||||
params.runtimePlan?.tools.normalize(toolsEnabled ? toolsRaw : [], runtimePlanModelContext) ??
|
||||
normalizeProviderToolSchemas({
|
||||
tools: toolsEnabled ? toolsRaw : [],
|
||||
provider: params.provider,
|
||||
config: params.config,
|
||||
workspaceDir: effectiveWorkspace,
|
||||
env: process.env,
|
||||
modelId: params.modelId,
|
||||
modelApi: params.model.api,
|
||||
model: params.model,
|
||||
});
|
||||
const tools = normalizeAgentRuntimeTools({
|
||||
runtimePlan: params.runtimePlan,
|
||||
tools: toolsEnabled ? toolsRaw : [],
|
||||
provider: params.provider,
|
||||
config: params.config,
|
||||
workspaceDir: effectiveWorkspace,
|
||||
env: process.env,
|
||||
modelId: params.modelId,
|
||||
modelApi: params.model.api,
|
||||
model: params.model,
|
||||
});
|
||||
const clientTools = toolsEnabled ? params.clientTools : undefined;
|
||||
const bundleMcpEnabled = shouldCreateBundleMcpRuntimeForAttempt({
|
||||
toolsEnabled,
|
||||
@@ -942,20 +939,17 @@ export async function runEmbeddedAttempt(
|
||||
toolsEnabled,
|
||||
disableTools: params.disableTools,
|
||||
});
|
||||
if (params.runtimePlan) {
|
||||
params.runtimePlan.tools.logDiagnostics(effectiveTools, runtimePlanModelContext);
|
||||
} else {
|
||||
logProviderToolSchemaDiagnostics({
|
||||
tools: effectiveTools,
|
||||
provider: params.provider,
|
||||
config: params.config,
|
||||
workspaceDir: effectiveWorkspace,
|
||||
env: process.env,
|
||||
modelId: params.modelId,
|
||||
modelApi: params.model.api,
|
||||
model: params.model,
|
||||
});
|
||||
}
|
||||
logAgentRuntimeToolDiagnostics({
|
||||
runtimePlan: params.runtimePlan,
|
||||
tools: effectiveTools,
|
||||
provider: params.provider,
|
||||
config: params.config,
|
||||
workspaceDir: effectiveWorkspace,
|
||||
env: process.env,
|
||||
modelId: params.modelId,
|
||||
modelApi: params.model.api,
|
||||
model: params.model,
|
||||
});
|
||||
|
||||
const machineName = await getMachineDisplayName();
|
||||
const runtimeChannel = normalizeMessageChannel(params.messageChannel ?? params.messageProvider);
|
||||
@@ -1201,17 +1195,14 @@ export async function runEmbeddedAttempt(
|
||||
.then(() => true)
|
||||
.catch(() => false);
|
||||
|
||||
const transcriptPolicy =
|
||||
params.runtimePlan?.transcript.resolvePolicy(runtimePlanModelContext) ??
|
||||
resolveTranscriptPolicy({
|
||||
modelApi: params.model?.api,
|
||||
provider: params.provider,
|
||||
modelId: params.modelId,
|
||||
config: params.config,
|
||||
workspaceDir: effectiveWorkspace,
|
||||
env: process.env,
|
||||
model: params.model,
|
||||
});
|
||||
const transcriptPolicy = resolveAttemptTranscriptPolicy({
|
||||
runtimePlan: params.runtimePlan,
|
||||
runtimePlanModelContext,
|
||||
provider: params.provider,
|
||||
modelId: params.modelId,
|
||||
config: params.config,
|
||||
env: process.env,
|
||||
});
|
||||
|
||||
await prewarmSessionFile(params.sessionFile);
|
||||
sessionManager = guardSessionManager(SessionManager.open(params.sessionFile), {
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
import type { AgentTool } from "@mariozechner/pi-agent-core";
|
||||
import { resolveSendableOutboundReplyParts } from "openclaw/plugin-sdk/reply-payload";
|
||||
import type { TSchema } from "typebox";
|
||||
import type { ThinkLevel } from "../../auto-reply/thinking.js";
|
||||
import { isSilentReplyPayloadText, SILENT_REPLY_TOKEN } from "../../auto-reply/tokens.js";
|
||||
import type { OpenClawConfig } from "../../config/types.openclaw.js";
|
||||
import type { ProviderRuntimeModel } from "../../plugins/provider-runtime-model.types.js";
|
||||
import {
|
||||
resolveProviderFollowupFallbackRoute,
|
||||
resolveProviderSystemPromptContribution,
|
||||
@@ -30,9 +33,26 @@ function hasMedia(payload: { mediaUrl?: string; mediaUrls?: string[] }): boolean
|
||||
return resolveSendableOutboundReplyParts(payload).hasMedia;
|
||||
}
|
||||
|
||||
function asOpenClawConfig(value: unknown): OpenClawConfig | undefined {
|
||||
return value !== null && typeof value === "object" && !Array.isArray(value)
|
||||
? (value as OpenClawConfig)
|
||||
: undefined;
|
||||
}
|
||||
|
||||
function asProviderRuntimeModel(
|
||||
value: BuildAgentRuntimePlanParams["model"],
|
||||
): ProviderRuntimeModel | undefined {
|
||||
return value !== undefined ? (value as ProviderRuntimeModel) : undefined;
|
||||
}
|
||||
|
||||
function asThinkLevel(value: BuildAgentRuntimePlanParams["thinkingLevel"]): ThinkLevel | undefined {
|
||||
return value !== undefined ? (value as ThinkLevel) : undefined;
|
||||
}
|
||||
|
||||
export function buildAgentRuntimeDeliveryPlan(
|
||||
params: BuildAgentRuntimeDeliveryPlanParams,
|
||||
): AgentRuntimeDeliveryPlan {
|
||||
const config = asOpenClawConfig(params.config);
|
||||
return {
|
||||
isSilentPayload(payload): boolean {
|
||||
return isSilentReplyPayloadText(payload.text, SILENT_REPLY_TOKEN) && !hasMedia(payload);
|
||||
@@ -40,10 +60,10 @@ export function buildAgentRuntimeDeliveryPlan(
|
||||
resolveFollowupRoute(routeParams) {
|
||||
return resolveProviderFollowupFallbackRoute({
|
||||
provider: params.provider,
|
||||
config: params.config,
|
||||
config,
|
||||
workspaceDir: params.workspaceDir,
|
||||
context: {
|
||||
config: params.config,
|
||||
config,
|
||||
agentDir: params.agentDir,
|
||||
workspaceDir: params.workspaceDir,
|
||||
provider: params.provider,
|
||||
@@ -66,13 +86,15 @@ export function buildAgentRuntimeOutcomePlan(): AgentRuntimeOutcomePlan {
|
||||
}
|
||||
|
||||
export function buildAgentRuntimePlan(params: BuildAgentRuntimePlanParams): AgentRuntimePlan {
|
||||
const config = asOpenClawConfig(params.config);
|
||||
const model = asProviderRuntimeModel(params.model);
|
||||
const modelApi = params.modelApi ?? params.model?.api ?? undefined;
|
||||
const transport = params.resolvedTransport;
|
||||
const auth = buildAgentRuntimeAuthPlan({
|
||||
provider: params.provider,
|
||||
authProfileProvider: params.authProfileProvider,
|
||||
sessionAuthProfileId: params.sessionAuthProfileId,
|
||||
config: params.config,
|
||||
config,
|
||||
workspaceDir: params.workspaceDir,
|
||||
harnessId: params.harnessId,
|
||||
harnessRuntime: params.harnessRuntime,
|
||||
@@ -87,12 +109,12 @@ export function buildAgentRuntimePlan(params: BuildAgentRuntimePlanParams): Agen
|
||||
};
|
||||
const toolContext = {
|
||||
provider: params.provider,
|
||||
config: params.config,
|
||||
config,
|
||||
workspaceDir: params.workspaceDir,
|
||||
env: process.env,
|
||||
modelId: params.modelId,
|
||||
modelApi,
|
||||
model: params.model,
|
||||
model,
|
||||
};
|
||||
const resolveToolContext = (overrides?: {
|
||||
workspaceDir?: string;
|
||||
@@ -102,7 +124,7 @@ export function buildAgentRuntimePlan(params: BuildAgentRuntimePlanParams): Agen
|
||||
...toolContext,
|
||||
...(overrides?.workspaceDir !== undefined ? { workspaceDir: overrides.workspaceDir } : {}),
|
||||
...(overrides?.modelApi !== undefined ? { modelApi: overrides.modelApi } : {}),
|
||||
...(overrides?.model !== undefined ? { model: overrides.model } : {}),
|
||||
...(overrides?.model !== undefined ? { model: asProviderRuntimeModel(overrides.model) } : {}),
|
||||
});
|
||||
const resolveTranscriptRuntimePolicy = (overrides?: {
|
||||
workspaceDir?: string;
|
||||
@@ -112,25 +134,25 @@ export function buildAgentRuntimePlan(params: BuildAgentRuntimePlanParams): Agen
|
||||
resolveTranscriptPolicy({
|
||||
provider: params.provider,
|
||||
modelId: params.modelId,
|
||||
config: params.config,
|
||||
config,
|
||||
workspaceDir: overrides?.workspaceDir ?? params.workspaceDir,
|
||||
env: process.env,
|
||||
modelApi: overrides?.modelApi ?? modelApi,
|
||||
model: overrides?.model ?? params.model,
|
||||
model: asProviderRuntimeModel(overrides?.model) ?? model,
|
||||
});
|
||||
const resolveTransportExtraParams = (
|
||||
overrides: Parameters<AgentRuntimePlan["transport"]["resolveExtraParams"]>[0] = {},
|
||||
) =>
|
||||
resolvePreparedExtraParams({
|
||||
cfg: params.config,
|
||||
cfg: config,
|
||||
provider: params.provider,
|
||||
modelId: params.modelId,
|
||||
agentDir: params.agentDir,
|
||||
workspaceDir: overrides.workspaceDir ?? params.workspaceDir,
|
||||
extraParamsOverride: overrides.extraParamsOverride ?? params.extraParamsOverride,
|
||||
thinkingLevel: overrides.thinkingLevel ?? params.thinkingLevel,
|
||||
thinkingLevel: asThinkLevel(overrides.thinkingLevel ?? params.thinkingLevel),
|
||||
agentId: overrides.agentId ?? params.agentId,
|
||||
model: overrides.model ?? params.model,
|
||||
model: asProviderRuntimeModel(overrides.model) ?? model,
|
||||
resolvedTransport: overrides.resolvedTransport ?? transport,
|
||||
});
|
||||
|
||||
@@ -143,9 +165,12 @@ export function buildAgentRuntimePlan(params: BuildAgentRuntimePlanParams): Agen
|
||||
resolveSystemPromptContribution(context) {
|
||||
return resolveProviderSystemPromptContribution({
|
||||
provider: params.provider,
|
||||
config: params.config,
|
||||
config,
|
||||
workspaceDir: context.workspaceDir ?? params.workspaceDir,
|
||||
context,
|
||||
context: {
|
||||
...context,
|
||||
config: asOpenClawConfig(context.config),
|
||||
},
|
||||
});
|
||||
},
|
||||
},
|
||||
|
||||
37
src/agents/runtime-plan/tools.diagnostics.test.ts
Normal file
37
src/agents/runtime-plan/tools.diagnostics.test.ts
Normal file
@@ -0,0 +1,37 @@
|
||||
import { describe, expect, it, vi } from "vitest";
|
||||
|
||||
const mocks = vi.hoisted(() => ({
|
||||
logProviderToolSchemaDiagnostics: vi.fn(),
|
||||
normalizeProviderToolSchemas: vi.fn((params: { tools: unknown[] }) => params.tools),
|
||||
}));
|
||||
|
||||
vi.mock("../pi-embedded-runner/tool-schema-runtime.js", () => ({
|
||||
logProviderToolSchemaDiagnostics: mocks.logProviderToolSchemaDiagnostics,
|
||||
normalizeProviderToolSchemas: mocks.normalizeProviderToolSchemas,
|
||||
}));
|
||||
|
||||
const { logAgentRuntimeToolDiagnostics } = await import("./tools.js");
|
||||
|
||||
describe("AgentRuntimePlan tool diagnostics legacy fallback", () => {
|
||||
it("falls back to provider diagnostics when no RuntimePlan is available", () => {
|
||||
const tools = [{ name: "alpha" }] as never;
|
||||
|
||||
logAgentRuntimeToolDiagnostics({
|
||||
tools,
|
||||
provider: "openai",
|
||||
modelId: "gpt-5.4",
|
||||
modelApi: "openai-responses",
|
||||
workspaceDir: "/tmp/openclaw-runtime-plan-tools",
|
||||
});
|
||||
|
||||
expect(mocks.logProviderToolSchemaDiagnostics).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
tools,
|
||||
provider: "openai",
|
||||
modelId: "gpt-5.4",
|
||||
modelApi: "openai-responses",
|
||||
workspaceDir: "/tmp/openclaw-runtime-plan-tools",
|
||||
}),
|
||||
);
|
||||
});
|
||||
});
|
||||
107
src/agents/runtime-plan/tools.test.ts
Normal file
107
src/agents/runtime-plan/tools.test.ts
Normal file
@@ -0,0 +1,107 @@
|
||||
import type { AgentTool } from "@mariozechner/pi-agent-core";
|
||||
import { describe, expect, it, vi } from "vitest";
|
||||
import {
|
||||
createNativeOpenAIResponsesModel,
|
||||
createParameterFreeTool,
|
||||
normalizedParameterFreeSchema,
|
||||
} from "../../../test/helpers/agents/schema-normalization-runtime-contract.js";
|
||||
import { logAgentRuntimeToolDiagnostics, normalizeAgentRuntimeTools } from "./tools.js";
|
||||
import type { AgentRuntimePlan } from "./types.js";
|
||||
|
||||
describe("AgentRuntimePlan tool policy helpers", () => {
|
||||
it("uses RuntimePlan-owned tool normalization when a plan is available", () => {
|
||||
const tools = [createParameterFreeTool()] as AgentTool[];
|
||||
const normalized = [{ ...tools[0], name: "normalized" }] as AgentTool[];
|
||||
const model = createNativeOpenAIResponsesModel() as never;
|
||||
const normalize = vi.fn(() => normalized);
|
||||
const runtimePlan = {
|
||||
tools: {
|
||||
normalize,
|
||||
logDiagnostics: vi.fn(),
|
||||
},
|
||||
} as unknown as AgentRuntimePlan;
|
||||
|
||||
expect(
|
||||
normalizeAgentRuntimeTools({
|
||||
runtimePlan,
|
||||
tools,
|
||||
provider: "openai",
|
||||
modelId: "gpt-5.4",
|
||||
modelApi: "openai-responses",
|
||||
workspaceDir: "/tmp/openclaw-runtime-plan-tools",
|
||||
model,
|
||||
}),
|
||||
).toBe(normalized);
|
||||
expect(normalize).toHaveBeenCalledWith(tools, {
|
||||
workspaceDir: "/tmp/openclaw-runtime-plan-tools",
|
||||
modelApi: "openai-responses",
|
||||
model,
|
||||
});
|
||||
});
|
||||
|
||||
it("accepts legacy optional model fields while normalizing RuntimePlan context", () => {
|
||||
const tools = [createParameterFreeTool()] as AgentTool[];
|
||||
const normalize = vi.fn(() => tools);
|
||||
const runtimePlan = {
|
||||
tools: {
|
||||
normalize,
|
||||
logDiagnostics: vi.fn(),
|
||||
},
|
||||
} as unknown as AgentRuntimePlan;
|
||||
|
||||
expect(
|
||||
normalizeAgentRuntimeTools({
|
||||
runtimePlan,
|
||||
tools,
|
||||
provider: "openai",
|
||||
modelApi: null,
|
||||
}),
|
||||
).toBe(tools);
|
||||
expect(normalize).toHaveBeenCalledWith(tools, {
|
||||
workspaceDir: undefined,
|
||||
modelApi: undefined,
|
||||
model: undefined,
|
||||
});
|
||||
});
|
||||
|
||||
it("falls back to legacy provider schema normalization when no plan is available", () => {
|
||||
const normalized = normalizeAgentRuntimeTools({
|
||||
tools: [createParameterFreeTool()] as AgentTool[],
|
||||
provider: "openai",
|
||||
modelId: "gpt-5.4",
|
||||
modelApi: "openai-responses",
|
||||
workspaceDir: "/tmp/openclaw-runtime-plan-tools",
|
||||
model: createNativeOpenAIResponsesModel() as never,
|
||||
});
|
||||
|
||||
expect(normalized[0]?.parameters).toEqual(normalizedParameterFreeSchema());
|
||||
});
|
||||
|
||||
it("routes diagnostics through RuntimePlan when a plan is available", () => {
|
||||
const tools = [createParameterFreeTool()] as AgentTool[];
|
||||
const model = createNativeOpenAIResponsesModel() as never;
|
||||
const logDiagnostics = vi.fn();
|
||||
const runtimePlan = {
|
||||
tools: {
|
||||
normalize: vi.fn(),
|
||||
logDiagnostics,
|
||||
},
|
||||
} as unknown as AgentRuntimePlan;
|
||||
|
||||
logAgentRuntimeToolDiagnostics({
|
||||
runtimePlan,
|
||||
tools,
|
||||
provider: "openai",
|
||||
modelId: "gpt-5.4",
|
||||
modelApi: "openai-responses",
|
||||
workspaceDir: "/tmp/openclaw-runtime-plan-tools",
|
||||
model,
|
||||
});
|
||||
|
||||
expect(logDiagnostics).toHaveBeenCalledWith(tools, {
|
||||
workspaceDir: "/tmp/openclaw-runtime-plan-tools",
|
||||
modelApi: "openai-responses",
|
||||
model,
|
||||
});
|
||||
});
|
||||
});
|
||||
71
src/agents/runtime-plan/tools.ts
Normal file
71
src/agents/runtime-plan/tools.ts
Normal file
@@ -0,0 +1,71 @@
|
||||
import type { AgentTool } from "@mariozechner/pi-agent-core";
|
||||
import type { TSchema } from "typebox";
|
||||
import type { OpenClawConfig } from "../../config/types.openclaw.js";
|
||||
import type { ProviderRuntimeModel } from "../../plugins/provider-runtime-model.types.js";
|
||||
import {
|
||||
logProviderToolSchemaDiagnostics,
|
||||
normalizeProviderToolSchemas,
|
||||
} from "../pi-embedded-runner/tool-schema-runtime.js";
|
||||
import type { AgentRuntimePlan } from "./types.js";
|
||||
|
||||
type AgentRuntimeToolPolicyParams<TSchemaType extends TSchema = TSchema, TResult = unknown> = {
|
||||
runtimePlan?: AgentRuntimePlan;
|
||||
tools: AgentTool<TSchemaType, TResult>[];
|
||||
provider: string;
|
||||
config?: OpenClawConfig;
|
||||
workspaceDir?: string;
|
||||
env?: NodeJS.ProcessEnv;
|
||||
modelId?: string;
|
||||
modelApi?: string | null;
|
||||
model?: ProviderRuntimeModel;
|
||||
};
|
||||
|
||||
function runtimePlanToolContext(params: {
|
||||
workspaceDir?: string;
|
||||
modelApi?: string | null;
|
||||
model?: ProviderRuntimeModel;
|
||||
}) {
|
||||
return {
|
||||
workspaceDir: params.workspaceDir,
|
||||
modelApi: params.modelApi ?? undefined,
|
||||
model: params.model,
|
||||
};
|
||||
}
|
||||
|
||||
export function normalizeAgentRuntimeTools<
|
||||
TSchemaType extends TSchema = TSchema,
|
||||
TResult = unknown,
|
||||
>(params: AgentRuntimeToolPolicyParams<TSchemaType, TResult>): AgentTool<TSchemaType, TResult>[] {
|
||||
const planContext = runtimePlanToolContext(params);
|
||||
return (
|
||||
params.runtimePlan?.tools.normalize(params.tools, planContext) ??
|
||||
normalizeProviderToolSchemas({
|
||||
tools: params.tools,
|
||||
provider: params.provider,
|
||||
config: params.config,
|
||||
workspaceDir: params.workspaceDir,
|
||||
env: params.env ?? process.env,
|
||||
modelId: params.modelId,
|
||||
modelApi: params.modelApi,
|
||||
model: params.model,
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
export function logAgentRuntimeToolDiagnostics(params: AgentRuntimeToolPolicyParams): void {
|
||||
const planContext = runtimePlanToolContext(params);
|
||||
if (params.runtimePlan) {
|
||||
params.runtimePlan.tools.logDiagnostics(params.tools, planContext);
|
||||
return;
|
||||
}
|
||||
logProviderToolSchemaDiagnostics({
|
||||
tools: params.tools,
|
||||
provider: params.provider,
|
||||
config: params.config,
|
||||
workspaceDir: params.workspaceDir,
|
||||
env: params.env ?? process.env,
|
||||
modelId: params.modelId,
|
||||
modelApi: params.modelApi,
|
||||
model: params.model,
|
||||
});
|
||||
}
|
||||
43
src/agents/runtime-plan/types.compat.test.ts
Normal file
43
src/agents/runtime-plan/types.compat.test.ts
Normal file
@@ -0,0 +1,43 @@
|
||||
import { describe, expectTypeOf, it } from "vitest";
|
||||
import type { ReplyPayload } from "../../auto-reply/reply-payload.js";
|
||||
import type { ThinkLevel } from "../../auto-reply/thinking.js";
|
||||
import type { FailoverReason } from "../pi-embedded-helpers/types.js";
|
||||
import type { PromptMode } from "../system-prompt.types.js";
|
||||
import type { buildAgentRuntimeDeliveryPlan, buildAgentRuntimePlan } from "./build.js";
|
||||
import type {
|
||||
AgentRuntimeFailoverReason,
|
||||
AgentRuntimePromptMode,
|
||||
AgentRuntimeReplyPayload,
|
||||
AgentRuntimeThinkLevel,
|
||||
BuildAgentRuntimeDeliveryPlanParams,
|
||||
BuildAgentRuntimePlanParams,
|
||||
} from "./types.js";
|
||||
|
||||
type Equal<X, Y> = [X] extends [Y] ? ([Y] extends [X] ? true : false) : false;
|
||||
|
||||
type Assert<T extends true> = T;
|
||||
|
||||
describe("AgentRuntimePlan structural type compatibility", () => {
|
||||
it("keeps copied scalar unions aligned with their source contracts", () => {
|
||||
expectTypeOf<AgentRuntimeThinkLevel>().toEqualTypeOf<ThinkLevel>();
|
||||
expectTypeOf<AgentRuntimeFailoverReason>().toEqualTypeOf<FailoverReason>();
|
||||
expectTypeOf<AgentRuntimePromptMode>().toEqualTypeOf<PromptMode>();
|
||||
});
|
||||
|
||||
it("keeps reply payload shapes structurally compatible with the runtime leaf payload shape", () => {
|
||||
type _ReplyPayloadKeysStayInSync = Assert<
|
||||
Equal<keyof ReplyPayload, keyof AgentRuntimeReplyPayload>
|
||||
>;
|
||||
expectTypeOf<ReplyPayload>().toMatchTypeOf<AgentRuntimeReplyPayload>();
|
||||
expectTypeOf<AgentRuntimeReplyPayload>().toMatchTypeOf<ReplyPayload>();
|
||||
});
|
||||
|
||||
it("keeps builder call signatures aligned with exported structural params", () => {
|
||||
expectTypeOf<
|
||||
Parameters<typeof buildAgentRuntimeDeliveryPlan>[0]
|
||||
>().toEqualTypeOf<BuildAgentRuntimeDeliveryPlanParams>();
|
||||
expectTypeOf<
|
||||
Parameters<typeof buildAgentRuntimePlan>[0]
|
||||
>().toEqualTypeOf<BuildAgentRuntimePlanParams>();
|
||||
});
|
||||
});
|
||||
37
src/agents/runtime-plan/types.test.ts
Normal file
37
src/agents/runtime-plan/types.test.ts
Normal file
@@ -0,0 +1,37 @@
|
||||
import fs from "node:fs/promises";
|
||||
import { fileURLToPath } from "node:url";
|
||||
import { describe, expect, it } from "vitest";
|
||||
|
||||
const TYPES_PATH = fileURLToPath(new URL("./types.ts", import.meta.url));
|
||||
|
||||
const concreteRuntimePolicyImportPatterns = [
|
||||
/from\s+["'][^"']*auto-reply(?:\/|\.js|["'])/,
|
||||
/from\s+["'](?:[^"']*\/)?config(?:\/|\.js|["'])/,
|
||||
/from\s+["'](?:[^"']*\/)?plugins(?:\/|\.js|["'])/,
|
||||
/from\s+["'][^"']*pi-embedded-/,
|
||||
/from\s+["'][^"']*transcript-policy(?:\.[^/"']+)?(?:\/|\.js|["'])/,
|
||||
/from\s+["'][^"']*system-prompt(?:\.[^/"']+)?(?:\/|\.js|["'])/,
|
||||
];
|
||||
|
||||
describe("AgentRuntimePlan leaf contracts", () => {
|
||||
it("keeps runtime plan type contracts independent from concrete runtime policy modules", async () => {
|
||||
const source = await fs.readFile(TYPES_PATH, "utf8");
|
||||
|
||||
for (const pattern of concreteRuntimePolicyImportPatterns) {
|
||||
expect(source).not.toMatch(pattern);
|
||||
}
|
||||
});
|
||||
|
||||
it("guards against policy type imports re-entering the leaf contract", () => {
|
||||
const forbiddenImports = [
|
||||
'import type { PromptContribution } from "../system-prompt.types.js";',
|
||||
'import type { TranscriptPolicy } from "../transcript-policy.types.js";',
|
||||
];
|
||||
|
||||
for (const importStatement of forbiddenImports) {
|
||||
expect(
|
||||
concreteRuntimePolicyImportPatterns.some((pattern) => pattern.test(importStatement)),
|
||||
).toBe(true);
|
||||
}
|
||||
});
|
||||
});
|
||||
@@ -1,14 +1,155 @@
|
||||
import type { AgentTool } from "@mariozechner/pi-agent-core";
|
||||
import type { TSchema } from "typebox";
|
||||
import type { ThinkLevel } from "../../auto-reply/thinking.js";
|
||||
import type { ReplyPayload } from "../../auto-reply/types.js";
|
||||
import type { OpenClawConfig } from "../../config/types.openclaw.js";
|
||||
import type { ProviderRuntimeModel } from "../../plugins/provider-runtime-model.types.js";
|
||||
import type { FailoverReason } from "../pi-embedded-helpers/types.js";
|
||||
import type { PromptMode } from "../system-prompt.types.js";
|
||||
|
||||
export type AgentRuntimeTransport = "sse" | "websocket" | "auto";
|
||||
|
||||
export type AgentRuntimeThinkLevel =
|
||||
| "off"
|
||||
| "minimal"
|
||||
| "low"
|
||||
| "medium"
|
||||
| "high"
|
||||
| "xhigh"
|
||||
| "adaptive"
|
||||
| "max";
|
||||
|
||||
export type AgentRuntimePromptMode = "full" | "minimal" | "none";
|
||||
|
||||
export type AgentRuntimeFailoverReason =
|
||||
| "auth"
|
||||
| "auth_permanent"
|
||||
| "format"
|
||||
| "rate_limit"
|
||||
| "overloaded"
|
||||
| "billing"
|
||||
| "timeout"
|
||||
| "model_not_found"
|
||||
| "session_expired"
|
||||
| "unknown";
|
||||
|
||||
export type AgentRuntimeConfig = unknown;
|
||||
|
||||
export type AgentRuntimeModel = {
|
||||
id?: string;
|
||||
name?: string;
|
||||
api?: string;
|
||||
provider?: string;
|
||||
baseUrl?: string;
|
||||
reasoning?: boolean;
|
||||
input?: string[];
|
||||
cost?: {
|
||||
input: number;
|
||||
output: number;
|
||||
cacheRead: number;
|
||||
cacheWrite: number;
|
||||
};
|
||||
contextWindow?: number;
|
||||
maxTokens?: number;
|
||||
contextTokens?: number;
|
||||
compat?: unknown;
|
||||
};
|
||||
|
||||
export type AgentRuntimeInteractiveButtonStyle = "primary" | "secondary" | "success" | "danger";
|
||||
|
||||
export type AgentRuntimeInteractiveReplyButton = {
|
||||
label: string;
|
||||
value?: string;
|
||||
url?: string;
|
||||
style?: AgentRuntimeInteractiveButtonStyle;
|
||||
};
|
||||
|
||||
export type AgentRuntimeInteractiveReplyOption = {
|
||||
label: string;
|
||||
value: string;
|
||||
};
|
||||
|
||||
export type AgentRuntimeInteractiveReplyBlock =
|
||||
| {
|
||||
type: "text";
|
||||
text: string;
|
||||
}
|
||||
| {
|
||||
type: "buttons";
|
||||
buttons: AgentRuntimeInteractiveReplyButton[];
|
||||
}
|
||||
| {
|
||||
type: "select";
|
||||
placeholder?: string;
|
||||
options: AgentRuntimeInteractiveReplyOption[];
|
||||
};
|
||||
|
||||
export type AgentRuntimeInteractiveReply = {
|
||||
blocks: AgentRuntimeInteractiveReplyBlock[];
|
||||
};
|
||||
|
||||
export type AgentRuntimeMessagePresentationTone =
|
||||
| "info"
|
||||
| "success"
|
||||
| "warning"
|
||||
| "danger"
|
||||
| "neutral";
|
||||
|
||||
export type AgentRuntimeMessagePresentationBlock =
|
||||
| {
|
||||
type: "text";
|
||||
text: string;
|
||||
}
|
||||
| {
|
||||
type: "context";
|
||||
text: string;
|
||||
}
|
||||
| {
|
||||
type: "divider";
|
||||
}
|
||||
| {
|
||||
type: "buttons";
|
||||
buttons: AgentRuntimeInteractiveReplyButton[];
|
||||
}
|
||||
| {
|
||||
type: "select";
|
||||
placeholder?: string;
|
||||
options: AgentRuntimeInteractiveReplyOption[];
|
||||
};
|
||||
|
||||
export type AgentRuntimeMessagePresentation = {
|
||||
title?: string;
|
||||
tone?: AgentRuntimeMessagePresentationTone;
|
||||
blocks: AgentRuntimeMessagePresentationBlock[];
|
||||
};
|
||||
|
||||
export type AgentRuntimeReplyPayloadDeliveryPin = {
|
||||
enabled: boolean;
|
||||
notify?: boolean;
|
||||
required?: boolean;
|
||||
};
|
||||
|
||||
export type AgentRuntimeReplyPayloadDelivery = {
|
||||
pin?: boolean | AgentRuntimeReplyPayloadDeliveryPin;
|
||||
};
|
||||
|
||||
export type AgentRuntimeReplyPayload = {
|
||||
text?: string;
|
||||
mediaUrl?: string;
|
||||
mediaUrls?: string[];
|
||||
trustedLocalMedia?: boolean;
|
||||
sensitiveMedia?: boolean;
|
||||
presentation?: AgentRuntimeMessagePresentation;
|
||||
delivery?: AgentRuntimeReplyPayloadDelivery;
|
||||
interactive?: AgentRuntimeInteractiveReply;
|
||||
btw?: {
|
||||
question: string;
|
||||
};
|
||||
replyToId?: string;
|
||||
replyToTag?: boolean;
|
||||
replyToCurrent?: boolean;
|
||||
audioAsVoice?: boolean;
|
||||
spokenText?: string;
|
||||
isError?: boolean;
|
||||
isReasoning?: boolean;
|
||||
isCompactionNotice?: boolean;
|
||||
channelData?: Record<string, unknown>;
|
||||
};
|
||||
|
||||
export type AgentRuntimeSystemPromptSectionId =
|
||||
| "interaction_style"
|
||||
| "tool_call_style"
|
||||
@@ -21,12 +162,12 @@ export type AgentRuntimeSystemPromptContribution = {
|
||||
};
|
||||
|
||||
export type AgentRuntimeSystemPromptContributionContext = {
|
||||
config?: OpenClawConfig;
|
||||
config?: AgentRuntimeConfig;
|
||||
agentDir?: string;
|
||||
workspaceDir?: string;
|
||||
provider: string;
|
||||
modelId: string;
|
||||
promptMode: PromptMode;
|
||||
promptMode: AgentRuntimePromptMode;
|
||||
runtimeChannel?: string;
|
||||
runtimeCapabilities?: string[];
|
||||
agentId?: string;
|
||||
@@ -61,7 +202,7 @@ export type AgentRuntimeTranscriptPolicy = {
|
||||
export type AgentRuntimeOutcomeClassification =
|
||||
| {
|
||||
message: string;
|
||||
reason?: FailoverReason;
|
||||
reason?: AgentRuntimeFailoverReason;
|
||||
status?: number;
|
||||
code?: string;
|
||||
rawError?: string;
|
||||
@@ -109,7 +250,7 @@ export type AgentRuntimeToolPlan = {
|
||||
params?: {
|
||||
workspaceDir?: string;
|
||||
modelApi?: string;
|
||||
model?: ProviderRuntimeModel;
|
||||
model?: AgentRuntimeModel;
|
||||
},
|
||||
): AgentTool<TSchemaType, TResult>[];
|
||||
logDiagnostics(
|
||||
@@ -117,15 +258,17 @@ export type AgentRuntimeToolPlan = {
|
||||
params?: {
|
||||
workspaceDir?: string;
|
||||
modelApi?: string;
|
||||
model?: ProviderRuntimeModel;
|
||||
model?: AgentRuntimeModel;
|
||||
},
|
||||
): void;
|
||||
};
|
||||
|
||||
export type AgentRuntimeDeliveryPlan = {
|
||||
isSilentPayload(payload: Pick<ReplyPayload, "text" | "mediaUrl" | "mediaUrls">): boolean;
|
||||
isSilentPayload(
|
||||
payload: Pick<AgentRuntimeReplyPayload, "text" | "mediaUrl" | "mediaUrls">,
|
||||
): boolean;
|
||||
resolveFollowupRoute(params: {
|
||||
payload: ReplyPayload;
|
||||
payload: AgentRuntimeReplyPayload;
|
||||
originatingChannel?: string;
|
||||
originatingTo?: string;
|
||||
originRoutable: boolean;
|
||||
@@ -141,10 +284,10 @@ export type AgentRuntimeTransportPlan = {
|
||||
extraParams: Record<string, unknown>;
|
||||
resolveExtraParams(params?: {
|
||||
extraParamsOverride?: Record<string, unknown>;
|
||||
thinkingLevel?: ThinkLevel;
|
||||
thinkingLevel?: AgentRuntimeThinkLevel;
|
||||
agentId?: string;
|
||||
workspaceDir?: string;
|
||||
model?: ProviderRuntimeModel;
|
||||
model?: AgentRuntimeModel;
|
||||
resolvedTransport?: AgentRuntimeTransport;
|
||||
}): Record<string, unknown>;
|
||||
};
|
||||
@@ -159,7 +302,7 @@ export type AgentRuntimePlan = {
|
||||
resolvePolicy(params?: {
|
||||
workspaceDir?: string;
|
||||
modelApi?: string;
|
||||
model?: ProviderRuntimeModel;
|
||||
model?: AgentRuntimeModel;
|
||||
}): AgentRuntimeTranscriptPolicy;
|
||||
};
|
||||
delivery: AgentRuntimeDeliveryPlan;
|
||||
@@ -177,7 +320,7 @@ export type AgentRuntimePlan = {
|
||||
};
|
||||
|
||||
export type BuildAgentRuntimeDeliveryPlanParams = {
|
||||
config?: OpenClawConfig;
|
||||
config?: AgentRuntimeConfig;
|
||||
workspaceDir?: string;
|
||||
agentDir?: string;
|
||||
provider: string;
|
||||
@@ -185,12 +328,12 @@ export type BuildAgentRuntimeDeliveryPlanParams = {
|
||||
};
|
||||
|
||||
export type BuildAgentRuntimePlanParams = {
|
||||
config?: OpenClawConfig;
|
||||
config?: AgentRuntimeConfig;
|
||||
workspaceDir?: string;
|
||||
agentDir?: string;
|
||||
provider: string;
|
||||
modelId: string;
|
||||
model?: ProviderRuntimeModel;
|
||||
model?: AgentRuntimeModel;
|
||||
modelApi?: string | null;
|
||||
harnessId?: string;
|
||||
harnessRuntime?: string;
|
||||
@@ -198,7 +341,7 @@ export type BuildAgentRuntimePlanParams = {
|
||||
authProfileProvider?: string;
|
||||
sessionAuthProfileId?: string;
|
||||
agentId?: string;
|
||||
thinkingLevel?: ThinkLevel;
|
||||
thinkingLevel?: AgentRuntimeThinkLevel;
|
||||
extraParamsOverride?: Record<string, unknown>;
|
||||
resolvedTransport?: AgentRuntimeTransport;
|
||||
};
|
||||
|
||||
128
src/plugin-sdk/agent-harness-runtime.test.ts
Normal file
128
src/plugin-sdk/agent-harness-runtime.test.ts
Normal file
@@ -0,0 +1,128 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import {
|
||||
classifyAgentHarnessTerminalOutcome,
|
||||
type AgentHarnessTerminalOutcomeClassification,
|
||||
} from "./agent-harness-runtime.js";
|
||||
|
||||
describe("classifyAgentHarnessTerminalOutcome", () => {
|
||||
it("does not classify an in-flight turn", () => {
|
||||
expect(
|
||||
classifyAgentHarnessTerminalOutcome({
|
||||
assistantTexts: [],
|
||||
reasoningText: "",
|
||||
planText: "",
|
||||
promptError: null,
|
||||
turnCompleted: false,
|
||||
}),
|
||||
).toBeUndefined();
|
||||
});
|
||||
|
||||
it("does not classify prompt errors as terminal empty-output outcomes", () => {
|
||||
expect(
|
||||
classifyAgentHarnessTerminalOutcome({
|
||||
assistantTexts: [],
|
||||
reasoningText: "",
|
||||
planText: "",
|
||||
promptError: new Error("turn failed"),
|
||||
turnCompleted: true,
|
||||
}),
|
||||
).toBeUndefined();
|
||||
});
|
||||
|
||||
it("does not classify deliberate silent replies such as NO_REPLY", () => {
|
||||
expect(
|
||||
classifyAgentHarnessTerminalOutcome({
|
||||
assistantTexts: ["NO_REPLY"],
|
||||
reasoningText: "",
|
||||
planText: "",
|
||||
promptError: null,
|
||||
turnCompleted: true,
|
||||
}),
|
||||
).toBeUndefined();
|
||||
});
|
||||
|
||||
it("treats empty-string prompt errors as terminal errors", () => {
|
||||
expect(
|
||||
classifyAgentHarnessTerminalOutcome({
|
||||
assistantTexts: [],
|
||||
reasoningText: "",
|
||||
planText: "",
|
||||
promptError: "",
|
||||
turnCompleted: true,
|
||||
}),
|
||||
).toBeUndefined();
|
||||
});
|
||||
|
||||
it("treats whitespace-only assistant text as not visible", () => {
|
||||
expect(
|
||||
classifyAgentHarnessTerminalOutcome({
|
||||
assistantTexts: [" ", "\n\t"],
|
||||
reasoningText: "",
|
||||
planText: "",
|
||||
promptError: null,
|
||||
turnCompleted: true,
|
||||
}),
|
||||
).toBe("empty");
|
||||
});
|
||||
|
||||
it("classifies a completed turn with plan text only as planning-only", () => {
|
||||
expect(
|
||||
classifyAgentHarnessTerminalOutcome({
|
||||
assistantTexts: [],
|
||||
reasoningText: "",
|
||||
planText: "1. inspect\n2. patch\n3. test",
|
||||
promptError: null,
|
||||
turnCompleted: true,
|
||||
}),
|
||||
).toBe("planning-only");
|
||||
});
|
||||
|
||||
it("prefers planning-only when both plan and reasoning text are present", () => {
|
||||
expect(
|
||||
classifyAgentHarnessTerminalOutcome({
|
||||
assistantTexts: [],
|
||||
reasoningText: "I need to inspect the files.",
|
||||
planText: "I will inspect, patch, and test.",
|
||||
promptError: null,
|
||||
turnCompleted: true,
|
||||
}),
|
||||
).toBe("planning-only");
|
||||
});
|
||||
|
||||
it("classifies a completed turn with reasoning text only as reasoning-only", () => {
|
||||
expect(
|
||||
classifyAgentHarnessTerminalOutcome({
|
||||
assistantTexts: [],
|
||||
reasoningText: "The answer depends on the current repository state.",
|
||||
planText: "",
|
||||
promptError: null,
|
||||
turnCompleted: true,
|
||||
}),
|
||||
).toBe("reasoning-only");
|
||||
});
|
||||
|
||||
it("classifies a completed turn with no visible output as empty", () => {
|
||||
expect(
|
||||
classifyAgentHarnessTerminalOutcome({
|
||||
assistantTexts: [],
|
||||
reasoningText: " ",
|
||||
planText: "\n",
|
||||
promptError: null,
|
||||
turnCompleted: true,
|
||||
}),
|
||||
).toBe("empty");
|
||||
});
|
||||
|
||||
it("returns only terminal fallback classifications, not ok", () => {
|
||||
const classification: AgentHarnessTerminalOutcomeClassification =
|
||||
classifyAgentHarnessTerminalOutcome({
|
||||
assistantTexts: [],
|
||||
reasoningText: "",
|
||||
planText: "",
|
||||
promptError: null,
|
||||
turnCompleted: true,
|
||||
}) ?? "empty";
|
||||
|
||||
expect(classification).toBe("empty");
|
||||
});
|
||||
});
|
||||
@@ -2,6 +2,7 @@
|
||||
// Keep heavyweight tool construction out of this module so harness imports can
|
||||
// register quickly inside gateway startup and Docker e2e runs.
|
||||
|
||||
import type { EmbeddedRunAttemptResult } from "../agents/pi-embedded-runner/run/types.js";
|
||||
import { formatToolDetail, resolveToolDisplay } from "../agents/tool-display.js";
|
||||
import { redactToolDetail } from "../logging/redact.js";
|
||||
import { truncateUtf16Safe } from "../utils.js";
|
||||
@@ -81,6 +82,10 @@ export {
|
||||
setActiveEmbeddedRun,
|
||||
} from "../agents/pi-embedded-runner/runs.js";
|
||||
export { disposeRegisteredAgentHarnesses } from "../agents/harness/registry.js";
|
||||
export {
|
||||
logAgentRuntimeToolDiagnostics,
|
||||
normalizeAgentRuntimeTools,
|
||||
} from "../agents/runtime-plan/tools.js";
|
||||
export { normalizeProviderToolSchemas } from "../agents/pi-embedded-runner/tool-schema-runtime.js";
|
||||
export { resolveSandboxContext } from "../agents/sandbox.js";
|
||||
export { isSubagentSessionKey } from "../routing/session-key.js";
|
||||
@@ -146,3 +151,46 @@ export function formatToolProgressOutput(
|
||||
}
|
||||
return `${truncateUtf16Safe(redacted, maxChars)}\n...(truncated)...`;
|
||||
}
|
||||
|
||||
export type AgentHarnessTerminalOutcomeInput = {
|
||||
assistantTexts: readonly string[];
|
||||
reasoningText?: string | null;
|
||||
planText?: string | null;
|
||||
promptError?: unknown;
|
||||
turnCompleted: boolean;
|
||||
};
|
||||
|
||||
export type AgentHarnessTerminalOutcomeClassification = NonNullable<
|
||||
EmbeddedRunAttemptResult["agentHarnessResultClassification"]
|
||||
>;
|
||||
|
||||
/**
|
||||
* Classify terminal harness turns that completed without assistant output that
|
||||
* should advance fallback. Deliberate silent replies such as NO_REPLY count as
|
||||
* intentional output, while whitespace-only text remains fallback-eligible.
|
||||
* This is intentionally SDK-level so plugin harness adapters such as Codex
|
||||
* preserve the same OpenClaw-owned fallback signals as the built-in PI path
|
||||
* without re-implementing terminal-result policy.
|
||||
*/
|
||||
export function classifyAgentHarnessTerminalOutcome(
|
||||
params: AgentHarnessTerminalOutcomeInput,
|
||||
): AgentHarnessTerminalOutcomeClassification | undefined {
|
||||
if (
|
||||
!params.turnCompleted ||
|
||||
(params.promptError !== undefined && params.promptError !== null) ||
|
||||
hasVisibleAssistantText(params.assistantTexts)
|
||||
) {
|
||||
return undefined;
|
||||
}
|
||||
if (params.planText?.trim()) {
|
||||
return "planning-only";
|
||||
}
|
||||
if (params.reasoningText?.trim()) {
|
||||
return "reasoning-only";
|
||||
}
|
||||
return "empty";
|
||||
}
|
||||
|
||||
function hasVisibleAssistantText(assistantTexts: readonly string[]): boolean {
|
||||
return assistantTexts.some((text) => text.trim().length > 0);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user