diff --git a/docs/plugins/sdk-agent-harness.md b/docs/plugins/sdk-agent-harness.md index c19f9027111..313f0d38510 100644 --- a/docs/plugins/sdk-agent-harness.md +++ b/docs/plugins/sdk-agent-harness.md @@ -46,6 +46,23 @@ Before a harness is selected, OpenClaw has already resolved: That split is intentional. A harness runs a prepared attempt; it does not pick providers, replace channel delivery, or silently switch models. +The prepared attempt also includes `params.runtimePlan`, an OpenClaw-owned +policy bundle for runtime decisions that must stay shared across PI and native +harnesses: + +- `runtimePlan.tools.normalize(...)` and + `runtimePlan.tools.logDiagnostics(...)` for provider-aware tool schema policy +- `runtimePlan.transcript.resolvePolicy(...)` for transcript sanitization and + tool-call repair policy +- `runtimePlan.delivery.isSilentPayload(...)` for shared `NO_REPLY` and media + delivery suppression +- `runtimePlan.outcome.classifyRunResult(...)` for model fallback classification +- `runtimePlan.observability` for resolved provider/model/harness metadata + +Harnesses may use the plan for decisions that need to match PI behavior, but +should still treat it as host-owned attempt state. Do not mutate it or use it to +switch providers/models inside a turn. + ## Register a harness **Import:** `openclaw/plugin-sdk/agent-harness` @@ -162,6 +179,16 @@ middleware, but new result transforms should use the runtime-neutral API. The Pi-only `api.registerEmbeddedExtensionFactory(...)` hook has been removed; Pi tool-result transforms must use runtime-neutral middleware. +### Terminal outcome classification + +Native harnesses that own their own protocol projection can use +`classifyAgentHarnessTerminalOutcome(...)` from +`openclaw/plugin-sdk/agent-harness-runtime` when a completed turn produced no +visible assistant text. The helper returns `empty`, `reasoning-only`, or +`planning-only` so OpenClaw's fallback policy can decide whether to retry on a +different model. It intentionally leaves prompt errors, in-flight turns, and +intentional silent replies such as `NO_REPLY` unclassified. + ### Native Codex harness mode The bundled `codex` harness is the native Codex mode for embedded OpenClaw diff --git a/docs/plugins/sdk-subpaths.md b/docs/plugins/sdk-subpaths.md index 9b0a17a2dd3..f07103fdef7 100644 --- a/docs/plugins/sdk-subpaths.md +++ b/docs/plugins/sdk-subpaths.md @@ -191,7 +191,7 @@ For the plugin authoring guide, see [Plugin SDK overview](/plugins/sdk-overview) | `plugin-sdk/models-provider-runtime` | `/models` command/provider reply helpers | | `plugin-sdk/skill-commands-runtime` | Skill command listing helpers | | `plugin-sdk/native-command-registry` | Native command registry/build/serialize helpers | - | `plugin-sdk/agent-harness` | Experimental trusted-plugin surface for low-level agent harnesses: harness types, active-run steer/abort helpers, OpenClaw tool bridge helpers, tool progress formatting/detail helpers, and attempt result utilities | + | `plugin-sdk/agent-harness` | Experimental trusted-plugin surface for low-level agent harnesses: harness types, active-run steer/abort helpers, OpenClaw tool bridge helpers, runtime-plan tool policy helpers, terminal outcome classification, tool progress formatting/detail helpers, and attempt result utilities | | `plugin-sdk/provider-zai-endpoint` | Z.AI endpoint detection helpers | | `plugin-sdk/infra-runtime` | System event/heartbeat helpers | | `plugin-sdk/collection-runtime` | Small bounded cache helpers | diff --git a/extensions/codex/src/app-server/event-projector.ts b/extensions/codex/src/app-server/event-projector.ts index 2cd8ce36afa..129d9f14b35 100644 --- a/extensions/codex/src/app-server/event-projector.ts +++ b/extensions/codex/src/app-server/event-projector.ts @@ -2,18 +2,19 @@ import type { AgentMessage } from "@mariozechner/pi-agent-core"; import type { AssistantMessage, Usage } from "@mariozechner/pi-ai"; import { SessionManager } from "@mariozechner/pi-coding-agent"; import { + classifyAgentHarnessTerminalOutcome, embeddedAgentLog, emitAgentEvent as emitGlobalAgentEvent, formatErrorMessage, + formatToolAggregate, formatToolProgressOutput, inferToolMetaFromArgs, normalizeUsage, runAgentHarnessAfterCompactionHook, runAgentHarnessBeforeCompactionHook, + TOOL_PROGRESS_OUTPUT_MAX_CHARS, type EmbeddedRunAttemptParams, type EmbeddedRunAttemptResult, - TOOL_PROGRESS_OUTPUT_MAX_CHARS, - formatToolAggregate, type MessagingToolSend, } from "openclaw/plugin-sdk/agent-harness-runtime"; import { readCodexTurn } from "./protocol-validators.js"; @@ -36,10 +37,6 @@ export type CodexAppServerToolTelemetry = { successfulCronAdds?: number; }; -type AgentHarnessResultClassification = NonNullable< - EmbeddedRunAttemptResult["agentHarnessResultClassification"] ->; - const ZERO_USAGE: Usage = { input: 0, output: 0, @@ -66,25 +63,6 @@ const CURRENT_TOKEN_USAGE_KEYS = [ const MAX_TOOL_OUTPUT_DELTA_MESSAGES_PER_ITEM = 20; -function classifyTerminalResult(params: { - assistantTexts: string[]; - reasoningText: string; - planText: string; - promptError: unknown; - turnCompleted: boolean; -}): AgentHarnessResultClassification | undefined { - if (!params.turnCompleted || params.promptError || params.assistantTexts.length > 0) { - return undefined; - } - if (params.planText.trim()) { - return "planning-only"; - } - if (params.reasoningText.trim()) { - return "reasoning-only"; - } - return "empty"; -} - export class CodexAppServerEventProjector { private readonly assistantTextByItem = new Map(); private readonly assistantItemOrder: string[] = []; @@ -217,7 +195,7 @@ export class CodexAppServerEventProjector { const promptError = this.promptError ?? (turnFailed ? (this.completedTurn?.error?.message ?? "codex app-server turn failed") : null); - const agentHarnessResultClassification = classifyTerminalResult({ + const agentHarnessResultClassification = classifyAgentHarnessTerminalOutcome({ assistantTexts, reasoningText, planText, diff --git a/extensions/codex/src/app-server/run-attempt.test.ts b/extensions/codex/src/app-server/run-attempt.test.ts index 2af5d81b913..29492f4031a 100644 --- a/extensions/codex/src/app-server/run-attempt.test.ts +++ b/extensions/codex/src/app-server/run-attempt.test.ts @@ -9,6 +9,7 @@ import { } from "openclaw/plugin-sdk/agent-harness"; import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; import { __testing as nativeHookRelayTesting } from "../../../../src/agents/harness/native-hook-relay.js"; +import { buildAgentRuntimePlan } from "../../../../src/agents/runtime-plan/build.js"; import { onAgentEvent, resetAgentEventsForTest, @@ -52,6 +53,28 @@ function createParams(sessionFile: string, workspaceDir: string): EmbeddedRunAtt } as EmbeddedRunAttemptParams; } +function createParamsWithRuntimePlan( + sessionFile: string, + workspaceDir: string, +): EmbeddedRunAttemptParams { + const params = createParams(sessionFile, workspaceDir); + return { + ...params, + runtimePlan: buildAgentRuntimePlan({ + provider: params.provider, + modelId: params.modelId, + model: params.model, + modelApi: params.model.api, + harnessId: "codex", + harnessRuntime: "codex", + config: params.config, + workspaceDir, + agentDir: tempDir, + thinkingLevel: params.thinkLevel, + }), + } as EmbeddedRunAttemptParams; +} + function threadStartResult(threadId = "thread-1") { return { thread: { @@ -364,7 +387,7 @@ describe("runCodexAppServerAttempt", () => { sessionManager.appendMessage(assistantMessage("existing context", Date.now())); const harness = createStartedThreadHarness(); - const params = createParams(sessionFile, workspaceDir); + const params = createParamsWithRuntimePlan(sessionFile, workspaceDir); params.onAgentEvent = onRunAgentEvent; const run = runCodexAppServerAttempt(params); await harness.waitForMethod("turn/start"); @@ -460,6 +483,8 @@ describe("runCodexAppServerAttempt", () => { sessionId: "session-1", provider: "codex", model: "gpt-5.4-codex", + resolvedRef: "codex/gpt-5.4-codex", + harnessId: "codex", assistantTexts: ["hello back"], lastAssistant: expect.objectContaining({ role: "assistant", @@ -675,9 +700,9 @@ describe("runCodexAppServerAttempt", () => { return undefined; }); - await expect(runCodexAppServerAttempt(createParams(sessionFile, workspaceDir))).rejects.toThrow( - "turn start exploded", - ); + await expect( + runCodexAppServerAttempt(createParamsWithRuntimePlan(sessionFile, workspaceDir)), + ).rejects.toThrow("turn start exploded"); await vi.waitFor(() => expect(llmInput).toHaveBeenCalledTimes(1), { interval: 1 }); await vi.waitFor(() => expect(llmOutput).toHaveBeenCalledTimes(1), { interval: 1 }); @@ -687,6 +712,8 @@ describe("runCodexAppServerAttempt", () => { assistantTexts: [], model: "gpt-5.4-codex", provider: "codex", + resolvedRef: "codex/gpt-5.4-codex", + harnessId: "codex", runId: "run-1", sessionId: "session-1", }), diff --git a/extensions/codex/src/app-server/run-attempt.ts b/extensions/codex/src/app-server/run-attempt.ts index b3ce8139fc4..c24aa13d769 100644 --- a/extensions/codex/src/app-server/run-attempt.ts +++ b/extensions/codex/src/app-server/run-attempt.ts @@ -14,7 +14,7 @@ import { formatErrorMessage, isActiveHarnessContextEngine, isSubagentSessionKey, - normalizeProviderToolSchemas, + normalizeAgentRuntimeTools, resolveAttemptSpawnWorkspaceDir, resolveAgentHarnessBeforePromptBuildResult, resolveModelAuthMode, @@ -906,23 +906,17 @@ async function buildDynamicTools(input: DynamicToolBuildParams) { params.toolsAllow && params.toolsAllow.length > 0 ? visionFilteredTools.filter((tool) => params.toolsAllow?.includes(tool.name)) : visionFilteredTools; - return ( - params.runtimePlan?.tools.normalize(filteredTools, { - workspaceDir: input.effectiveWorkspace, - modelApi: params.model.api, - model: params.model, - }) ?? - normalizeProviderToolSchemas({ - tools: filteredTools, - provider: params.provider, - config: params.config, - workspaceDir: input.effectiveWorkspace, - env: process.env, - modelId: params.modelId, - modelApi: params.model.api, - model: params.model, - }) - ); + return normalizeAgentRuntimeTools({ + runtimePlan: params.runtimePlan, + tools: filteredTools, + provider: params.provider, + config: params.config, + workspaceDir: input.effectiveWorkspace, + env: process.env, + modelId: params.modelId, + modelApi: params.model.api, + model: params.model, + }); } async function withCodexStartupTimeout(params: { diff --git a/src/agents/harness/result-classification.ts b/src/agents/harness/result-classification.ts new file mode 100644 index 00000000000..1e68fd6b1e2 --- /dev/null +++ b/src/agents/harness/result-classification.ts @@ -0,0 +1,26 @@ +import type { + AgentHarness, + AgentHarnessAttemptParams, + AgentHarnessAttemptResult, +} from "./types.js"; + +export function applyAgentHarnessResultClassification( + harness: Pick, + result: AgentHarnessAttemptResult, + params: AgentHarnessAttemptParams, +): AgentHarnessAttemptResult { + if (!harness.classify) { + return { ...result, agentHarnessId: harness.id }; + } + const { agentHarnessResultClassification: _previousClassification, ...resultWithoutPrevious } = + result; + const classification = harness.classify(resultWithoutPrevious, params); + if (!classification || classification === "ok") { + return { ...resultWithoutPrevious, agentHarnessId: harness.id }; + } + return { + ...resultWithoutPrevious, + agentHarnessId: harness.id, + agentHarnessResultClassification: classification, + }; +} diff --git a/src/agents/harness/selection.ts b/src/agents/harness/selection.ts index 093bd6d7462..6304e7cfffe 100644 --- a/src/agents/harness/selection.ts +++ b/src/agents/harness/selection.ts @@ -20,6 +20,7 @@ import type { EmbeddedPiCompactResult } from "../pi-embedded-runner/types.js"; import { createPiAgentHarness } from "./builtin-pi.js"; import { listRegisteredAgentHarnesses } from "./registry.js"; import type { AgentHarness, AgentHarnessSupport } from "./types.js"; +import { adaptAgentHarnessToV2, runAgentHarnessV2LifecycleAttempt } from "./v2.js"; const log = createSubsystemLogger("agents/harness"); @@ -187,14 +188,13 @@ export async function runAgentHarnessAttemptWithFallback( sessionKey: params.sessionKey, agentId: params.agentId, }); + const v2Harness = adaptAgentHarnessToV2(harness); if (harness.id === "pi") { - const result = await harness.runAttempt(params); - return applyHarnessResultClassification(harness, result, params); + return await runAgentHarnessV2LifecycleAttempt(v2Harness, params); } try { - const result = await harness.runAttempt(params); - return applyHarnessResultClassification(harness, result, params); + return await runAgentHarnessV2LifecycleAttempt(v2Harness, params); } catch (error) { log.warn(`${harness.label} failed; not falling back to embedded PI backend`, { harnessId: harness.id, @@ -263,22 +263,6 @@ function logAgentHarnessSelection( }); } -function applyHarnessResultClassification( - harness: AgentHarness, - result: EmbeddedRunAttemptResult, - params: EmbeddedRunAttemptParams, -): EmbeddedRunAttemptResult { - const classification = harness.classify?.(result, params); - if (!classification || classification === "ok") { - return { ...result, agentHarnessId: harness.id }; - } - return { - ...result, - agentHarnessId: harness.id, - agentHarnessResultClassification: classification, - }; -} - function resolvePinnedAgentHarnessPolicy( agentHarnessId: string | undefined, ): AgentHarnessPolicy | undefined { diff --git a/src/agents/harness/v2.test.ts b/src/agents/harness/v2.test.ts new file mode 100644 index 00000000000..dc6f196715c --- /dev/null +++ b/src/agents/harness/v2.test.ts @@ -0,0 +1,399 @@ +import type { Api, Model } from "@mariozechner/pi-ai"; +import { describe, expect, it, vi } from "vitest"; +import type { EmbeddedRunAttemptResult } from "../pi-embedded-runner/run/types.js"; +import type { AgentHarness, AgentHarnessAttemptParams } from "./types.js"; +import type { AgentHarnessV2 } from "./v2.js"; +import { adaptAgentHarnessToV2, runAgentHarnessV2LifecycleAttempt } from "./v2.js"; + +function createAttemptParams(): AgentHarnessAttemptParams { + return { + prompt: "hello", + sessionId: "session-1", + runId: "run-1", + sessionFile: "/tmp/session.jsonl", + workspaceDir: "/tmp/workspace", + timeoutMs: 5_000, + provider: "codex", + modelId: "gpt-5.4", + model: { id: "gpt-5.4", provider: "codex" } as Model, + authStorage: {} as never, + modelRegistry: {} as never, + thinkLevel: "low", + } as AgentHarnessAttemptParams; +} + +function createAttemptResult(): EmbeddedRunAttemptResult { + return { + aborted: false, + externalAbort: false, + timedOut: false, + idleTimedOut: false, + timedOutDuringCompaction: false, + promptError: null, + promptErrorSource: null, + sessionIdUsed: "session-1", + messagesSnapshot: [], + assistantTexts: ["ok"], + toolMetas: [], + lastAssistant: undefined, + didSendViaMessagingTool: false, + messagingToolSentTexts: [], + messagingToolSentMediaUrls: [], + messagingToolSentTargets: [], + cloudCodeAssistFormatError: false, + replayMetadata: { hadPotentialSideEffects: false, replaySafe: true }, + itemLifecycle: { startedCount: 0, completedCount: 0, activeCount: 0 }, + }; +} + +describe("AgentHarness V2 compatibility adapter", () => { + it("executes prepare/start/send/outcome/cleanup as one bounded lifecycle", async () => { + const params = createAttemptParams(); + const result = createAttemptResult(); + const events: string[] = []; + const harness: AgentHarnessV2 = { + id: "native-v2", + label: "Native V2", + supports: () => ({ supported: true }), + prepare: async (attemptParams) => { + events.push("prepare"); + expect(attemptParams).toBe(params); + return { + harnessId: "native-v2", + label: "Native V2", + params, + lifecycleState: "prepared", + }; + }, + start: async (prepared) => { + events.push(`start:${prepared.lifecycleState}`); + return { ...prepared, lifecycleState: "started" }; + }, + send: async (session) => { + events.push(`send:${session.lifecycleState}`); + return result; + }, + resolveOutcome: async (session, rawResult) => { + events.push(`outcome:${session.lifecycleState}`); + return { ...rawResult, agentHarnessId: session.harnessId }; + }, + cleanup: async ({ prepared, session, result: cleanupResult, error }) => { + expect(prepared?.lifecycleState).toBe("prepared"); + expect(session?.lifecycleState).toBe("started"); + if (!session) { + throw new Error("expected started session during successful cleanup"); + } + events.push(`cleanup:${session.lifecycleState}`); + expect(cleanupResult).toMatchObject({ agentHarnessId: "native-v2" }); + expect(error).toBeUndefined(); + }, + }; + + await expect(runAgentHarnessV2LifecycleAttempt(harness, params)).resolves.toMatchObject({ + agentHarnessId: "native-v2", + sessionIdUsed: "session-1", + }); + expect(events).toEqual([ + "prepare", + "start:prepared", + "send:started", + "outcome:started", + "cleanup:started", + ]); + }); + + it("runs cleanup with the original failure and preserves that failure", async () => { + const params = createAttemptParams(); + const sendError = new Error("codex app-server send failed"); + const cleanup = vi.fn(async () => { + throw new Error("cleanup should not mask send failure"); + }); + const harness: AgentHarnessV2 = { + id: "native-v2", + label: "Native V2", + supports: () => ({ supported: true }), + prepare: async () => ({ + harnessId: "native-v2", + label: "Native V2", + params, + lifecycleState: "prepared", + }), + start: async (prepared) => ({ ...prepared, lifecycleState: "started" }), + send: async () => { + throw sendError; + }, + resolveOutcome: async (_session, rawResult) => rawResult, + cleanup, + }; + + await expect(runAgentHarnessV2LifecycleAttempt(harness, params)).rejects.toThrow( + "codex app-server send failed", + ); + expect(cleanup).toHaveBeenCalledWith( + expect.objectContaining({ + error: sendError, + prepared: expect.objectContaining({ lifecycleState: "prepared" }), + session: expect.objectContaining({ lifecycleState: "started" }), + }), + ); + }); + + it("runs cleanup for failed prepare/start lifecycle stages", async () => { + const params = createAttemptParams(); + const startError = new Error("codex app-server start failed"); + const cleanup = vi.fn(async () => {}); + const harness: AgentHarnessV2 = { + id: "native-v2", + label: "Native V2", + supports: () => ({ supported: true }), + prepare: async () => ({ + harnessId: "native-v2", + label: "Native V2", + params, + lifecycleState: "prepared", + }), + start: async () => { + throw startError; + }, + send: async () => createAttemptResult(), + resolveOutcome: async (_session, rawResult) => rawResult, + cleanup, + }; + + await expect(runAgentHarnessV2LifecycleAttempt(harness, params)).rejects.toThrow( + "codex app-server start failed", + ); + expect(cleanup).toHaveBeenCalledWith({ + error: startError, + prepared: expect.objectContaining({ lifecycleState: "prepared" }), + session: undefined, + }); + }); + + it("passes raw send results to cleanup when outcome resolution fails", async () => { + const params = createAttemptParams(); + const rawResult = createAttemptResult(); + const outcomeError = new Error("outcome classification failed"); + const cleanup = vi.fn(async () => {}); + const harness: AgentHarnessV2 = { + id: "native-v2", + label: "Native V2", + supports: () => ({ supported: true }), + prepare: async () => ({ + harnessId: "native-v2", + label: "Native V2", + params, + lifecycleState: "prepared", + }), + start: async (prepared) => ({ ...prepared, lifecycleState: "started" }), + send: async () => rawResult, + resolveOutcome: async () => { + throw outcomeError; + }, + cleanup, + }; + + await expect(runAgentHarnessV2LifecycleAttempt(harness, params)).rejects.toThrow( + "outcome classification failed", + ); + expect(cleanup).toHaveBeenCalledWith( + expect.objectContaining({ + error: outcomeError, + result: rawResult, + prepared: expect.objectContaining({ lifecycleState: "prepared" }), + session: expect.objectContaining({ lifecycleState: "started" }), + }), + ); + }); + + it("surfaces cleanup failures after successful outcomes", async () => { + const params = createAttemptParams(); + const harness: AgentHarnessV2 = { + id: "native-v2", + label: "Native V2", + supports: () => ({ supported: true }), + prepare: async () => ({ + harnessId: "native-v2", + label: "Native V2", + params, + lifecycleState: "prepared", + }), + start: async (prepared) => ({ ...prepared, lifecycleState: "started" }), + send: async () => createAttemptResult(), + resolveOutcome: async (_session, rawResult) => rawResult, + cleanup: async () => { + throw new Error("cleanup failed"); + }, + }; + + await expect(runAgentHarnessV2LifecycleAttempt(harness, params)).rejects.toThrow( + "cleanup failed", + ); + }); + + it("runs a V1 harness through prepare/start/send without changing attempt params", async () => { + const params = createAttemptParams(); + const result = createAttemptResult(); + const runAttempt = vi.fn(async () => result); + const harness: AgentHarness = { + id: "codex", + label: "Codex", + pluginId: "codex-plugin", + supports: () => ({ supported: true, priority: 100 }), + runAttempt, + }; + + const v2 = adaptAgentHarnessToV2(harness); + const prepared = await v2.prepare(params); + const session = await v2.start(prepared); + + expect(v2.resume).toBeUndefined(); + expect(await v2.send(session)).toBe(result); + expect(runAttempt).toHaveBeenCalledWith(params); + expect(session).toMatchObject({ + harnessId: "codex", + label: "Codex", + pluginId: "codex-plugin", + params, + lifecycleState: "started", + }); + expect(prepared.lifecycleState).toBe("prepared"); + }); + + it("keeps result classification as an explicit outcome stage", async () => { + const params = createAttemptParams(); + const result = createAttemptResult(); + const classify = vi.fn>(() => "empty"); + const harness: AgentHarness = { + id: "codex", + label: "Codex", + supports: () => ({ supported: true }), + runAttempt: vi.fn(async () => result), + classify, + }; + + const v2 = adaptAgentHarnessToV2(harness); + const session = await v2.start(await v2.prepare(params)); + + expect(await v2.resolveOutcome(session, result)).toMatchObject({ + agentHarnessId: "codex", + agentHarnessResultClassification: "empty", + }); + expect(harness.classify).toHaveBeenCalledWith(result, params); + }); + + it("preserves harness-supplied classification when no classify hook is registered", async () => { + const params = createAttemptParams(); + const result = { + ...createAttemptResult(), + agentHarnessResultClassification: "reasoning-only", + } as EmbeddedRunAttemptResult; + const harness: AgentHarness = { + id: "codex", + label: "Codex", + supports: () => ({ supported: true }), + runAttempt: vi.fn(async () => result), + }; + + const v2 = adaptAgentHarnessToV2(harness); + const session = await v2.start(await v2.prepare(params)); + + expect(await v2.resolveOutcome(session, result)).toMatchObject({ + agentHarnessId: "codex", + agentHarnessResultClassification: "reasoning-only", + }); + }); + + it("clears stale non-ok classification when classification resolves to ok", async () => { + const params = createAttemptParams(); + const result = { + ...createAttemptResult(), + agentHarnessResultClassification: "empty", + } as EmbeddedRunAttemptResult; + const classify = vi.fn>(() => "ok"); + const harness: AgentHarness = { + id: "codex", + label: "Codex", + supports: () => ({ supported: true }), + runAttempt: vi.fn(async () => result), + classify, + }; + + const v2 = adaptAgentHarnessToV2(harness); + const session = await v2.start(await v2.prepare(params)); + + const classified = await v2.resolveOutcome(session, result); + expect(classified).toMatchObject({ agentHarnessId: "codex" }); + expect(classified).not.toHaveProperty("agentHarnessResultClassification"); + }); + + it("preserves existing compact/reset/dispose hook this binding as compatibility methods", async () => { + const harness: AgentHarness & { + compactCalls: number; + resetCalls: number; + disposeCalls: number; + } = { + id: "custom", + label: "Custom", + compactCalls: 0, + resetCalls: 0, + disposeCalls: 0, + supports: () => ({ supported: true }), + runAttempt: vi.fn(async () => createAttemptResult()), + async compact() { + this.compactCalls += 1; + return { + ok: true, + compacted: true, + result: { + summary: "done", + firstKeptEntryId: "entry-1", + tokensBefore: 100, + }, + }; + }, + reset(params) { + expect(params).toEqual({ reason: "reset" }); + this.resetCalls += 1; + }, + dispose() { + this.disposeCalls += 1; + }, + }; + + const v2 = adaptAgentHarnessToV2(harness); + + await expect( + v2.compact?.({ + sessionId: "session-1", + sessionFile: "/tmp/session.jsonl", + workspaceDir: "/tmp/workspace", + }), + ).resolves.toMatchObject({ + compacted: true, + }); + await v2.reset?.({ reason: "reset" }); + await v2.dispose?.(); + + expect(harness.compactCalls).toBe(1); + expect(harness.resetCalls).toBe(1); + expect(harness.disposeCalls).toBe(1); + }); + + it("does not dispose V1 harnesses during per-attempt cleanup", async () => { + const dispose = vi.fn(); + const harness: AgentHarness = { + id: "custom", + label: "Custom", + supports: () => ({ supported: true }), + runAttempt: vi.fn(async () => createAttemptResult()), + dispose, + }; + const v2 = adaptAgentHarnessToV2(harness); + const session = await v2.start(await v2.prepare(createAttemptParams())); + + await v2.cleanup({ session, result: createAttemptResult() }); + + expect(dispose).not.toHaveBeenCalled(); + }); +}); diff --git a/src/agents/harness/v2.ts b/src/agents/harness/v2.ts new file mode 100644 index 00000000000..0993bddda1f --- /dev/null +++ b/src/agents/harness/v2.ts @@ -0,0 +1,136 @@ +import { formatErrorMessage } from "../../infra/errors.js"; +import { createSubsystemLogger } from "../../logging/subsystem.js"; +import { applyAgentHarnessResultClassification } from "./result-classification.js"; +import type { + AgentHarness, + AgentHarnessAttemptParams, + AgentHarnessAttemptResult, + AgentHarnessCompactParams, + AgentHarnessCompactResult, + AgentHarnessResetParams, + AgentHarnessSupport, + AgentHarnessSupportContext, +} from "./types.js"; + +const log = createSubsystemLogger("agents/harness/v2"); + +type AgentHarnessV2RunBase = { + harnessId: string; + label: string; + pluginId?: string; + params: AgentHarnessAttemptParams; +}; + +export type AgentHarnessV2PreparedRun = AgentHarnessV2RunBase & { + lifecycleState: "prepared"; +}; + +export type AgentHarnessV2Session = AgentHarnessV2RunBase & { + lifecycleState: "started"; +}; + +export type AgentHarnessV2ToolCall = { + id?: string; + name: string; + input?: unknown; +}; + +export type AgentHarnessV2CleanupParams = { + prepared?: AgentHarnessV2PreparedRun; + session?: AgentHarnessV2Session; + result?: AgentHarnessAttemptResult; + error?: unknown; +}; + +export type AgentHarnessV2 = { + id: string; + label: string; + pluginId?: string; + supports(ctx: AgentHarnessSupportContext): AgentHarnessSupport; + prepare(params: AgentHarnessAttemptParams): Promise; + start(prepared: AgentHarnessV2PreparedRun): Promise; + resume?(session: AgentHarnessV2Session): Promise; + send(session: AgentHarnessV2Session): Promise; + handleToolCall?(session: AgentHarnessV2Session, call: AgentHarnessV2ToolCall): Promise; + resolveOutcome( + session: AgentHarnessV2Session, + result: AgentHarnessAttemptResult, + ): Promise; + cleanup(params: AgentHarnessV2CleanupParams): Promise; + compact?(params: AgentHarnessCompactParams): Promise; + reset?(params: AgentHarnessResetParams): Promise | void; + dispose?(): Promise | void; +}; + +export function adaptAgentHarnessToV2(harness: AgentHarness): AgentHarnessV2 { + return { + id: harness.id, + label: harness.label, + pluginId: harness.pluginId, + supports: (ctx) => harness.supports(ctx), + prepare: async (params) => ({ + harnessId: harness.id, + label: harness.label, + pluginId: harness.pluginId, + params, + lifecycleState: "prepared", + }), + start: async (prepared) => ({ + harnessId: prepared.harnessId, + label: prepared.label, + pluginId: prepared.pluginId, + params: prepared.params, + lifecycleState: "started", + }), + send: async (session) => harness.runAttempt(session.params), + resolveOutcome: async (session, result) => + applyAgentHarnessResultClassification(harness, result, session.params), + cleanup: async (_params) => { + // V1 harnesses have no per-attempt cleanup hook. Global cleanup remains + // on dispose(), which must not run after every attempt. + }, + compact: harness.compact ? (params) => harness.compact!(params) : undefined, + reset: harness.reset ? (params) => harness.reset!(params) : undefined, + dispose: harness.dispose ? () => harness.dispose!() : undefined, + }; +} + +export async function runAgentHarnessV2LifecycleAttempt( + harness: AgentHarnessV2, + params: AgentHarnessAttemptParams, +): Promise { + let prepared: AgentHarnessV2PreparedRun | undefined; + let session: AgentHarnessV2Session | undefined; + let rawResult: AgentHarnessAttemptResult | undefined; + let result: AgentHarnessAttemptResult; + + try { + prepared = await harness.prepare(params); + session = await harness.start(prepared); + rawResult = await harness.send(session); + result = await harness.resolveOutcome(session, rawResult); + } catch (error) { + try { + await harness.cleanup({ + prepared, + session, + error, + ...(rawResult === undefined ? {} : { result: rawResult }), + }); + } catch (cleanupError) { + // Preserve the user-visible harness failure. Cleanup errors after a + // failed lifecycle stage must not mask the actionable runtime error. + log.warn("agent harness cleanup failed after attempt failure", { + harnessId: harness.id, + provider: params.provider, + modelId: params.modelId, + error: formatErrorMessage(cleanupError), + originalError: formatErrorMessage(error), + }); + } + throw error; + } + + await harness.cleanup({ prepared, session, result }); + return result; +} diff --git a/src/agents/pi-embedded-runner/run.overflow-compaction.test.ts b/src/agents/pi-embedded-runner/run.overflow-compaction.test.ts index 1f93ded4b4a..6476564dbee 100644 --- a/src/agents/pi-embedded-runner/run.overflow-compaction.test.ts +++ b/src/agents/pi-embedded-runner/run.overflow-compaction.test.ts @@ -1,4 +1,7 @@ import { beforeAll, beforeEach, describe, expect, it, vi } from "vitest"; +import type { AgentHarness } from "../harness/types.js"; +import type { AgentInternalEvent } from "../internal-events.js"; +import type { AgentRuntimePlan } from "../runtime-plan/types.js"; import { makeAttemptResult, makeCompactionSuccess, @@ -8,6 +11,7 @@ import { } from "./run.overflow-compaction.fixture.js"; import { loadRunOverflowCompactionHarness, + mockedBuildAgentRuntimePlan, mockedBuildEmbeddedRunPayloads, mockedCoerceToFailoverError, mockedCompactDirect, @@ -26,8 +30,111 @@ import { overflowBaseRunParams, resetRunOverflowCompactionHarnessMocks, } from "./run.overflow-compaction.harness.js"; +import type { RunEmbeddedPiAgentParams } from "./run/params.js"; +import type { EmbeddedRunAttemptParams } from "./run/types.js"; let runEmbeddedPiAgent: typeof import("./run.js").runEmbeddedPiAgent; +type RuntimePlanOverrides = Partial> & { + auth?: Partial; + resolvedRef?: Partial; +}; +function makeForwardingCase(internalEvents: AgentInternalEvent[]) { + return { + runId: "forward-attempt-params", + params: { + toolsAllow: ["exec", "read"], + bootstrapContextMode: "lightweight", + bootstrapContextRunKind: "cron", + disableMessageTool: true, + forceMessageTool: true, + requireExplicitMessageTarget: true, + internalEvents, + }, + expected: { + toolsAllow: ["exec", "read"], + bootstrapContextMode: "lightweight", + bootstrapContextRunKind: "cron", + disableMessageTool: true, + forceMessageTool: true, + requireExplicitMessageTarget: true, + }, + } satisfies { + runId: string; + params: Partial; + expected: Record; + }; +} + +function makeForwardedRuntimePlan(overrides: RuntimePlanOverrides = {}): AgentRuntimePlan { + const transcriptPolicy = { + sanitizeMode: "full", + sanitizeToolCallIds: true, + preserveNativeAnthropicToolUseIds: false, + repairToolUseResultPairing: true, + preserveSignatures: false, + sanitizeThinkingSignatures: true, + dropThinkingBlocks: false, + applyGoogleTurnOrdering: false, + validateGeminiTurns: false, + validateAnthropicTurns: false, + allowSyntheticToolResults: false, + } satisfies AgentRuntimePlan["transcript"]["policy"]; + const basePlan: AgentRuntimePlan = { + auth: { + authProfileProviderForAuth: "anthropic", + providerForAuth: "anthropic", + }, + delivery: { + isSilentPayload: vi.fn(() => false), + resolveFollowupRoute: vi.fn(), + }, + observability: { + provider: "anthropic", + resolvedRef: "anthropic/test-model", + modelId: "test-model", + }, + outcome: { + classifyRunResult: vi.fn(() => undefined), + }, + prompt: { + provider: "anthropic", + modelId: "test-model", + resolveSystemPromptContribution: vi.fn(), + }, + transcript: { + policy: transcriptPolicy, + resolvePolicy: vi.fn((params): AgentRuntimePlan["transcript"]["policy"] => ({ + ...transcriptPolicy, + sanitizeMode: params?.modelApi === "anthropic-messages" ? "full" : "images-only", + })), + }, + transport: { + extraParams: {}, + resolveExtraParams: vi.fn(() => ({})), + }, + resolvedRef: { + provider: "anthropic", + modelId: "test-model", + harnessId: "pi", + }, + tools: { + normalize: vi.fn((tools) => tools), + logDiagnostics: vi.fn(), + }, + }; + return { + ...basePlan, + ...overrides, + auth: { + ...basePlan.auth, + ...overrides.auth, + }, + resolvedRef: { + ...basePlan.resolvedRef, + ...overrides.resolvedRef, + }, + }; +} describe("runEmbeddedPiAgent overflow compaction trigger routing", () => { beforeAll(async () => { @@ -83,9 +190,61 @@ describe("runEmbeddedPiAgent overflow compaction trigger routing", () => { ); }); + it("forwards optional attempt params and the runtime plan into one attempt call", async () => { + const internalEvents: AgentInternalEvent[] = []; + const forwardingCase = makeForwardingCase(internalEvents); + const runtimePlan = makeForwardedRuntimePlan(); + mockedBuildAgentRuntimePlan.mockReturnValueOnce(runtimePlan); + mockedRunEmbeddedAttempt.mockResolvedValueOnce(makeAttemptResult({ promptError: null })); + + await runEmbeddedPiAgent({ + ...overflowBaseRunParams, + ...forwardingCase.params, + runId: forwardingCase.runId, + }); + + expect(mockedBuildAgentRuntimePlan).toHaveBeenCalledTimes(1); + expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(1); + expect(mockedRunEmbeddedAttempt).toHaveBeenCalledWith( + expect.objectContaining({ + ...forwardingCase.expected, + runtimePlan: expect.objectContaining({ + resolvedRef: expect.objectContaining({ + provider: "anthropic", + modelId: "test-model", + }), + tools: expect.objectContaining({ + normalize: expect.any(Function), + }), + transport: expect.objectContaining({ + resolveExtraParams: expect.any(Function), + }), + }), + }), + ); + const attemptParams = mockedRunEmbeddedAttempt.mock.calls[0]?.[0] as + | EmbeddedRunAttemptParams + | undefined; + expect(attemptParams?.runtimePlan).toBe(runtimePlan); + expect(attemptParams?.internalEvents).toBe(internalEvents); + }); + it("forwards explicit OpenAI Codex auth profiles to codex plugin harnesses", async () => { const { clearAgentHarnesses, registerAgentHarness } = await import("../harness/registry.js"); - const pluginRunAttempt = vi.fn(async () => makeAttemptResult({ assistantTexts: ["ok"] })); + const pluginRunAttempt = vi.fn(async () => + makeAttemptResult({ assistantTexts: ["ok"] }), + ); + const runtimePlan = makeForwardedRuntimePlan({ + resolvedRef: { + provider: "codex", + modelId: "gpt-5.4", + harnessId: "codex", + }, + auth: { + harnessAuthProvider: "openai-codex", + forwardedAuthProfileId: "openai-codex:work", + }, + }); clearAgentHarnesses(); registerAgentHarness({ id: "codex", @@ -94,6 +253,7 @@ describe("runEmbeddedPiAgent overflow compaction trigger routing", () => { ctx.provider === "codex" ? { supported: true, priority: 100 } : { supported: false }, runAttempt: pluginRunAttempt, }); + mockedBuildAgentRuntimePlan.mockReturnValueOnce(runtimePlan); mockedGetApiKeyForModel.mockRejectedValueOnce(new Error("generic auth should be skipped")); try { @@ -117,18 +277,47 @@ describe("runEmbeddedPiAgent overflow compaction trigger routing", () => { } expect(mockedGetApiKeyForModel).not.toHaveBeenCalled(); + expect(mockedBuildAgentRuntimePlan).toHaveBeenCalledTimes(1); + expect(pluginRunAttempt).toHaveBeenCalledTimes(1); expect(pluginRunAttempt).toHaveBeenCalledWith( expect.objectContaining({ provider: "codex", authProfileId: "openai-codex:work", authProfileIdSource: "user", + runtimePlan: expect.objectContaining({ + resolvedRef: expect.objectContaining({ + provider: "codex", + modelId: "gpt-5.4", + harnessId: "codex", + }), + auth: expect.objectContaining({ + harnessAuthProvider: "openai-codex", + forwardedAuthProfileId: "openai-codex:work", + }), + }), }), ); + const harnessParams = pluginRunAttempt.mock.calls[0]?.[0]; + expect(harnessParams?.runtimePlan).toBe(runtimePlan); }); it("forwards OpenAI Codex auth profiles when openai/* is forced through codex", async () => { const { clearAgentHarnesses, registerAgentHarness } = await import("../harness/registry.js"); - const pluginRunAttempt = vi.fn(async () => makeAttemptResult({ assistantTexts: ["ok"] })); + const pluginRunAttempt = vi.fn(async () => + makeAttemptResult({ assistantTexts: ["ok"] }), + ); + const runtimePlan = makeForwardedRuntimePlan({ + resolvedRef: { + provider: "openai", + modelId: "gpt-5.4", + harnessId: "codex", + }, + auth: { + providerForAuth: "openai", + harnessAuthProvider: "openai-codex", + forwardedAuthProfileId: "openai-codex:work", + }, + }); clearAgentHarnesses(); registerAgentHarness({ id: "codex", @@ -136,6 +325,7 @@ describe("runEmbeddedPiAgent overflow compaction trigger routing", () => { supports: () => ({ supported: false }), runAttempt: pluginRunAttempt, }); + mockedBuildAgentRuntimePlan.mockReturnValueOnce(runtimePlan); mockedGetApiKeyForModel.mockRejectedValueOnce(new Error("generic auth should be skipped")); try { @@ -159,13 +349,29 @@ describe("runEmbeddedPiAgent overflow compaction trigger routing", () => { } expect(mockedGetApiKeyForModel).not.toHaveBeenCalled(); + expect(mockedBuildAgentRuntimePlan).toHaveBeenCalledTimes(1); + expect(pluginRunAttempt).toHaveBeenCalledTimes(1); expect(pluginRunAttempt).toHaveBeenCalledWith( expect.objectContaining({ provider: "openai", authProfileId: "openai-codex:work", authProfileIdSource: "user", + runtimePlan: expect.objectContaining({ + resolvedRef: expect.objectContaining({ + provider: "openai", + modelId: "gpt-5.4", + harnessId: "codex", + }), + auth: expect.objectContaining({ + providerForAuth: "openai", + harnessAuthProvider: "openai-codex", + forwardedAuthProfileId: "openai-codex:work", + }), + }), }), ); + const harnessParams = pluginRunAttempt.mock.calls[0]?.[0]; + expect(harnessParams?.runtimePlan).toBe(runtimePlan); }); it("blocks undersized models before dispatching a provider attempt", async () => { diff --git a/src/agents/pi-embedded-runner/run/attempt.transcript-policy.test.ts b/src/agents/pi-embedded-runner/run/attempt.transcript-policy.test.ts new file mode 100644 index 00000000000..64263c9aa3c --- /dev/null +++ b/src/agents/pi-embedded-runner/run/attempt.transcript-policy.test.ts @@ -0,0 +1,93 @@ +import { beforeEach, describe, expect, it, vi } from "vitest"; +import type { ProviderRuntimeModel } from "../../../plugins/provider-runtime-model.types.js"; +import type { AgentRuntimePlan } from "../../runtime-plan/types.js"; +import { resolveAttemptTranscriptPolicy } from "./attempt.transcript-policy.js"; + +const resolveProviderRuntimePluginMock = vi.hoisted(() => vi.fn()); + +vi.mock("../../../plugins/provider-hook-runtime.js", () => ({ + resolveProviderRuntimePlugin: resolveProviderRuntimePluginMock, +})); + +describe("resolveAttemptTranscriptPolicy", () => { + beforeEach(() => { + resolveProviderRuntimePluginMock.mockReset(); + resolveProviderRuntimePluginMock.mockReturnValue(undefined); + }); + + it("uses RuntimePlan transcript policy when available", () => { + const plannedPolicy = { + sanitizeMode: "full", + sanitizeToolCallIds: true, + toolCallIdMode: "strict", + preserveNativeAnthropicToolUseIds: false, + repairToolUseResultPairing: true, + preserveSignatures: true, + sanitizeThinkingSignatures: false, + dropThinkingBlocks: true, + applyGoogleTurnOrdering: false, + validateGeminiTurns: false, + validateAnthropicTurns: true, + allowSyntheticToolResults: true, + } as const; + const resolvePolicy = vi.fn(() => plannedPolicy); + const runtimePlan = { + transcript: { + resolvePolicy, + }, + } as unknown as AgentRuntimePlan; + const runtimePlanModelContext = { + workspaceDir: "/tmp/openclaw-transcript-policy", + modelApi: "anthropic-messages", + model: { + id: "claude-opus-4.6", + name: "Claude Opus 4.6", + api: "anthropic-messages", + provider: "anthropic", + baseUrl: "https://api.anthropic.com", + reasoning: true, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 200_000, + maxTokens: 8_192, + } satisfies ProviderRuntimeModel, + }; + + expect( + resolveAttemptTranscriptPolicy({ + runtimePlan, + runtimePlanModelContext, + provider: "anthropic", + modelId: "claude-opus-4.6", + }), + ).toBe(plannedPolicy); + expect(resolvePolicy).toHaveBeenCalledWith(runtimePlanModelContext); + }); + + it("keeps the legacy provider transcript fallback when no RuntimePlan is available", () => { + const env = { OPENCLAW_TEST_TRANSCRIPT_POLICY: "1" } as NodeJS.ProcessEnv; + const policy = resolveAttemptTranscriptPolicy({ + runtimePlanModelContext: { + workspaceDir: "/tmp/openclaw-transcript-policy", + modelApi: "openai-responses", + }, + provider: "custom-openai-compatible", + modelId: "gpt-5.4", + env, + }); + + expect(policy).toMatchObject({ + sanitizeMode: "images-only", + sanitizeToolCallIds: true, + toolCallIdMode: "strict", + repairToolUseResultPairing: true, + allowSyntheticToolResults: false, + }); + expect(resolveProviderRuntimePluginMock).toHaveBeenCalledWith({ + provider: "custom-openai-compatible", + config: undefined, + workspaceDir: "/tmp/openclaw-transcript-policy", + env, + }); + }); +}); diff --git a/src/agents/pi-embedded-runner/run/attempt.transcript-policy.ts b/src/agents/pi-embedded-runner/run/attempt.transcript-policy.ts new file mode 100644 index 00000000000..90307102010 --- /dev/null +++ b/src/agents/pi-embedded-runner/run/attempt.transcript-policy.ts @@ -0,0 +1,36 @@ +import type { OpenClawConfig } from "../../../config/types.openclaw.js"; +import type { ProviderRuntimeModel } from "../../../plugins/provider-runtime-model.types.js"; +import type { AgentRuntimePlan } from "../../runtime-plan/types.js"; +import { resolveTranscriptPolicy, type TranscriptPolicy } from "../../transcript-policy.js"; + +export type AttemptRuntimeModelContext = NonNullable< + Parameters[0] +>; + +function asProviderRuntimeModel( + model: AttemptRuntimeModelContext["model"], +): ProviderRuntimeModel | undefined { + return typeof model?.id === "string" ? (model as ProviderRuntimeModel) : undefined; +} + +export function resolveAttemptTranscriptPolicy(params: { + runtimePlan?: AgentRuntimePlan; + runtimePlanModelContext: AttemptRuntimeModelContext; + provider: string; + modelId: string; + config?: OpenClawConfig; + env?: NodeJS.ProcessEnv; +}): TranscriptPolicy { + return ( + params.runtimePlan?.transcript.resolvePolicy(params.runtimePlanModelContext) ?? + resolveTranscriptPolicy({ + modelApi: params.runtimePlanModelContext.modelApi, + provider: params.provider, + modelId: params.modelId, + config: params.config, + workspaceDir: params.runtimePlanModelContext.workspaceDir, + env: params.env ?? process.env, + model: asProviderRuntimeModel(params.runtimePlanModelContext.model), + }) + ); +} diff --git a/src/agents/pi-embedded-runner/run/attempt.ts b/src/agents/pi-embedded-runner/run/attempt.ts index 4ed9cd4adbc..5092e4142c9 100644 --- a/src/agents/pi-embedded-runner/run/attempt.ts +++ b/src/agents/pi-embedded-runner/run/attempt.ts @@ -117,6 +117,10 @@ import { import { wrapStreamFnTextTransforms } from "../../plugin-text-transforms.js"; import { describeProviderRequestRoutingSummary } from "../../provider-attribution.js"; import { registerProviderStreamForModel } from "../../provider-stream.js"; +import { + logAgentRuntimeToolDiagnostics, + normalizeAgentRuntimeTools, +} from "../../runtime-plan/tools.js"; import { resolveSandboxContext } from "../../sandbox.js"; import { resolveSandboxRuntimeStatus } from "../../sandbox/runtime-status.js"; import { repairSessionFileIfNeeded } from "../../session-file-repair.js"; @@ -148,10 +152,7 @@ import { collectExplicitToolAllowlistSources, } from "../../tool-allowlist-guard.js"; import { UNKNOWN_TOOL_THRESHOLD } from "../../tool-loop-detection.js"; -import { - resolveTranscriptPolicy, - shouldAllowProviderOwnedThinkingReplay, -} from "../../transcript-policy.js"; +import { shouldAllowProviderOwnedThinkingReplay } from "../../transcript-policy.js"; import { normalizeUsage, type NormalizedUsage } from "../../usage.js"; import { DEFAULT_BOOTSTRAP_FILENAME } from "../../workspace.js"; import { isRunnerAbortError } from "../abort.js"; @@ -219,10 +220,6 @@ import { resolveLiveToolResultMaxChars, truncateOversizedToolResultsInSessionManager, } from "../tool-result-truncation.js"; -import { - logProviderToolSchemaDiagnostics, - normalizeProviderToolSchemas, -} from "../tool-schema-runtime.js"; import { splitSdkTools } from "../tool-split.js"; import { mapThinkingLevel } from "../utils.js"; import { flushPendingToolResultsAfterIdle } from "../wait-for-idle-before-flush.js"; @@ -290,6 +287,7 @@ import { wrapStreamFnTrimToolCallNames, } from "./attempt.tool-call-normalization.js"; import { buildEmbeddedAttemptToolRunContext } from "./attempt.tool-run-context.js"; +import { resolveAttemptTranscriptPolicy } from "./attempt.transcript-policy.js"; import { waitForCompactionRetryWithAggregateTimeout } from "./compaction-retry-aggregate-timeout.js"; import { resolveRunTimeoutDuringCompaction, @@ -844,18 +842,17 @@ export async function runEmbeddedAttempt( modelApi: params.model.api, model: params.model, }; - const tools = - params.runtimePlan?.tools.normalize(toolsEnabled ? toolsRaw : [], runtimePlanModelContext) ?? - normalizeProviderToolSchemas({ - tools: toolsEnabled ? toolsRaw : [], - provider: params.provider, - config: params.config, - workspaceDir: effectiveWorkspace, - env: process.env, - modelId: params.modelId, - modelApi: params.model.api, - model: params.model, - }); + const tools = normalizeAgentRuntimeTools({ + runtimePlan: params.runtimePlan, + tools: toolsEnabled ? toolsRaw : [], + provider: params.provider, + config: params.config, + workspaceDir: effectiveWorkspace, + env: process.env, + modelId: params.modelId, + modelApi: params.model.api, + model: params.model, + }); const clientTools = toolsEnabled ? params.clientTools : undefined; const bundleMcpEnabled = shouldCreateBundleMcpRuntimeForAttempt({ toolsEnabled, @@ -942,20 +939,17 @@ export async function runEmbeddedAttempt( toolsEnabled, disableTools: params.disableTools, }); - if (params.runtimePlan) { - params.runtimePlan.tools.logDiagnostics(effectiveTools, runtimePlanModelContext); - } else { - logProviderToolSchemaDiagnostics({ - tools: effectiveTools, - provider: params.provider, - config: params.config, - workspaceDir: effectiveWorkspace, - env: process.env, - modelId: params.modelId, - modelApi: params.model.api, - model: params.model, - }); - } + logAgentRuntimeToolDiagnostics({ + runtimePlan: params.runtimePlan, + tools: effectiveTools, + provider: params.provider, + config: params.config, + workspaceDir: effectiveWorkspace, + env: process.env, + modelId: params.modelId, + modelApi: params.model.api, + model: params.model, + }); const machineName = await getMachineDisplayName(); const runtimeChannel = normalizeMessageChannel(params.messageChannel ?? params.messageProvider); @@ -1201,17 +1195,14 @@ export async function runEmbeddedAttempt( .then(() => true) .catch(() => false); - const transcriptPolicy = - params.runtimePlan?.transcript.resolvePolicy(runtimePlanModelContext) ?? - resolveTranscriptPolicy({ - modelApi: params.model?.api, - provider: params.provider, - modelId: params.modelId, - config: params.config, - workspaceDir: effectiveWorkspace, - env: process.env, - model: params.model, - }); + const transcriptPolicy = resolveAttemptTranscriptPolicy({ + runtimePlan: params.runtimePlan, + runtimePlanModelContext, + provider: params.provider, + modelId: params.modelId, + config: params.config, + env: process.env, + }); await prewarmSessionFile(params.sessionFile); sessionManager = guardSessionManager(SessionManager.open(params.sessionFile), { diff --git a/src/agents/runtime-plan/build.ts b/src/agents/runtime-plan/build.ts index 9c1acc04114..334534b2b00 100644 --- a/src/agents/runtime-plan/build.ts +++ b/src/agents/runtime-plan/build.ts @@ -1,7 +1,10 @@ import type { AgentTool } from "@mariozechner/pi-agent-core"; import { resolveSendableOutboundReplyParts } from "openclaw/plugin-sdk/reply-payload"; import type { TSchema } from "typebox"; +import type { ThinkLevel } from "../../auto-reply/thinking.js"; import { isSilentReplyPayloadText, SILENT_REPLY_TOKEN } from "../../auto-reply/tokens.js"; +import type { OpenClawConfig } from "../../config/types.openclaw.js"; +import type { ProviderRuntimeModel } from "../../plugins/provider-runtime-model.types.js"; import { resolveProviderFollowupFallbackRoute, resolveProviderSystemPromptContribution, @@ -30,9 +33,26 @@ function hasMedia(payload: { mediaUrl?: string; mediaUrls?: string[] }): boolean return resolveSendableOutboundReplyParts(payload).hasMedia; } +function asOpenClawConfig(value: unknown): OpenClawConfig | undefined { + return value !== null && typeof value === "object" && !Array.isArray(value) + ? (value as OpenClawConfig) + : undefined; +} + +function asProviderRuntimeModel( + value: BuildAgentRuntimePlanParams["model"], +): ProviderRuntimeModel | undefined { + return value !== undefined ? (value as ProviderRuntimeModel) : undefined; +} + +function asThinkLevel(value: BuildAgentRuntimePlanParams["thinkingLevel"]): ThinkLevel | undefined { + return value !== undefined ? (value as ThinkLevel) : undefined; +} + export function buildAgentRuntimeDeliveryPlan( params: BuildAgentRuntimeDeliveryPlanParams, ): AgentRuntimeDeliveryPlan { + const config = asOpenClawConfig(params.config); return { isSilentPayload(payload): boolean { return isSilentReplyPayloadText(payload.text, SILENT_REPLY_TOKEN) && !hasMedia(payload); @@ -40,10 +60,10 @@ export function buildAgentRuntimeDeliveryPlan( resolveFollowupRoute(routeParams) { return resolveProviderFollowupFallbackRoute({ provider: params.provider, - config: params.config, + config, workspaceDir: params.workspaceDir, context: { - config: params.config, + config, agentDir: params.agentDir, workspaceDir: params.workspaceDir, provider: params.provider, @@ -66,13 +86,15 @@ export function buildAgentRuntimeOutcomePlan(): AgentRuntimeOutcomePlan { } export function buildAgentRuntimePlan(params: BuildAgentRuntimePlanParams): AgentRuntimePlan { + const config = asOpenClawConfig(params.config); + const model = asProviderRuntimeModel(params.model); const modelApi = params.modelApi ?? params.model?.api ?? undefined; const transport = params.resolvedTransport; const auth = buildAgentRuntimeAuthPlan({ provider: params.provider, authProfileProvider: params.authProfileProvider, sessionAuthProfileId: params.sessionAuthProfileId, - config: params.config, + config, workspaceDir: params.workspaceDir, harnessId: params.harnessId, harnessRuntime: params.harnessRuntime, @@ -87,12 +109,12 @@ export function buildAgentRuntimePlan(params: BuildAgentRuntimePlanParams): Agen }; const toolContext = { provider: params.provider, - config: params.config, + config, workspaceDir: params.workspaceDir, env: process.env, modelId: params.modelId, modelApi, - model: params.model, + model, }; const resolveToolContext = (overrides?: { workspaceDir?: string; @@ -102,7 +124,7 @@ export function buildAgentRuntimePlan(params: BuildAgentRuntimePlanParams): Agen ...toolContext, ...(overrides?.workspaceDir !== undefined ? { workspaceDir: overrides.workspaceDir } : {}), ...(overrides?.modelApi !== undefined ? { modelApi: overrides.modelApi } : {}), - ...(overrides?.model !== undefined ? { model: overrides.model } : {}), + ...(overrides?.model !== undefined ? { model: asProviderRuntimeModel(overrides.model) } : {}), }); const resolveTranscriptRuntimePolicy = (overrides?: { workspaceDir?: string; @@ -112,25 +134,25 @@ export function buildAgentRuntimePlan(params: BuildAgentRuntimePlanParams): Agen resolveTranscriptPolicy({ provider: params.provider, modelId: params.modelId, - config: params.config, + config, workspaceDir: overrides?.workspaceDir ?? params.workspaceDir, env: process.env, modelApi: overrides?.modelApi ?? modelApi, - model: overrides?.model ?? params.model, + model: asProviderRuntimeModel(overrides?.model) ?? model, }); const resolveTransportExtraParams = ( overrides: Parameters[0] = {}, ) => resolvePreparedExtraParams({ - cfg: params.config, + cfg: config, provider: params.provider, modelId: params.modelId, agentDir: params.agentDir, workspaceDir: overrides.workspaceDir ?? params.workspaceDir, extraParamsOverride: overrides.extraParamsOverride ?? params.extraParamsOverride, - thinkingLevel: overrides.thinkingLevel ?? params.thinkingLevel, + thinkingLevel: asThinkLevel(overrides.thinkingLevel ?? params.thinkingLevel), agentId: overrides.agentId ?? params.agentId, - model: overrides.model ?? params.model, + model: asProviderRuntimeModel(overrides.model) ?? model, resolvedTransport: overrides.resolvedTransport ?? transport, }); @@ -143,9 +165,12 @@ export function buildAgentRuntimePlan(params: BuildAgentRuntimePlanParams): Agen resolveSystemPromptContribution(context) { return resolveProviderSystemPromptContribution({ provider: params.provider, - config: params.config, + config, workspaceDir: context.workspaceDir ?? params.workspaceDir, - context, + context: { + ...context, + config: asOpenClawConfig(context.config), + }, }); }, }, diff --git a/src/agents/runtime-plan/tools.diagnostics.test.ts b/src/agents/runtime-plan/tools.diagnostics.test.ts new file mode 100644 index 00000000000..a077850fc72 --- /dev/null +++ b/src/agents/runtime-plan/tools.diagnostics.test.ts @@ -0,0 +1,37 @@ +import { describe, expect, it, vi } from "vitest"; + +const mocks = vi.hoisted(() => ({ + logProviderToolSchemaDiagnostics: vi.fn(), + normalizeProviderToolSchemas: vi.fn((params: { tools: unknown[] }) => params.tools), +})); + +vi.mock("../pi-embedded-runner/tool-schema-runtime.js", () => ({ + logProviderToolSchemaDiagnostics: mocks.logProviderToolSchemaDiagnostics, + normalizeProviderToolSchemas: mocks.normalizeProviderToolSchemas, +})); + +const { logAgentRuntimeToolDiagnostics } = await import("./tools.js"); + +describe("AgentRuntimePlan tool diagnostics legacy fallback", () => { + it("falls back to provider diagnostics when no RuntimePlan is available", () => { + const tools = [{ name: "alpha" }] as never; + + logAgentRuntimeToolDiagnostics({ + tools, + provider: "openai", + modelId: "gpt-5.4", + modelApi: "openai-responses", + workspaceDir: "/tmp/openclaw-runtime-plan-tools", + }); + + expect(mocks.logProviderToolSchemaDiagnostics).toHaveBeenCalledWith( + expect.objectContaining({ + tools, + provider: "openai", + modelId: "gpt-5.4", + modelApi: "openai-responses", + workspaceDir: "/tmp/openclaw-runtime-plan-tools", + }), + ); + }); +}); diff --git a/src/agents/runtime-plan/tools.test.ts b/src/agents/runtime-plan/tools.test.ts new file mode 100644 index 00000000000..04e1a67f4ed --- /dev/null +++ b/src/agents/runtime-plan/tools.test.ts @@ -0,0 +1,107 @@ +import type { AgentTool } from "@mariozechner/pi-agent-core"; +import { describe, expect, it, vi } from "vitest"; +import { + createNativeOpenAIResponsesModel, + createParameterFreeTool, + normalizedParameterFreeSchema, +} from "../../../test/helpers/agents/schema-normalization-runtime-contract.js"; +import { logAgentRuntimeToolDiagnostics, normalizeAgentRuntimeTools } from "./tools.js"; +import type { AgentRuntimePlan } from "./types.js"; + +describe("AgentRuntimePlan tool policy helpers", () => { + it("uses RuntimePlan-owned tool normalization when a plan is available", () => { + const tools = [createParameterFreeTool()] as AgentTool[]; + const normalized = [{ ...tools[0], name: "normalized" }] as AgentTool[]; + const model = createNativeOpenAIResponsesModel() as never; + const normalize = vi.fn(() => normalized); + const runtimePlan = { + tools: { + normalize, + logDiagnostics: vi.fn(), + }, + } as unknown as AgentRuntimePlan; + + expect( + normalizeAgentRuntimeTools({ + runtimePlan, + tools, + provider: "openai", + modelId: "gpt-5.4", + modelApi: "openai-responses", + workspaceDir: "/tmp/openclaw-runtime-plan-tools", + model, + }), + ).toBe(normalized); + expect(normalize).toHaveBeenCalledWith(tools, { + workspaceDir: "/tmp/openclaw-runtime-plan-tools", + modelApi: "openai-responses", + model, + }); + }); + + it("accepts legacy optional model fields while normalizing RuntimePlan context", () => { + const tools = [createParameterFreeTool()] as AgentTool[]; + const normalize = vi.fn(() => tools); + const runtimePlan = { + tools: { + normalize, + logDiagnostics: vi.fn(), + }, + } as unknown as AgentRuntimePlan; + + expect( + normalizeAgentRuntimeTools({ + runtimePlan, + tools, + provider: "openai", + modelApi: null, + }), + ).toBe(tools); + expect(normalize).toHaveBeenCalledWith(tools, { + workspaceDir: undefined, + modelApi: undefined, + model: undefined, + }); + }); + + it("falls back to legacy provider schema normalization when no plan is available", () => { + const normalized = normalizeAgentRuntimeTools({ + tools: [createParameterFreeTool()] as AgentTool[], + provider: "openai", + modelId: "gpt-5.4", + modelApi: "openai-responses", + workspaceDir: "/tmp/openclaw-runtime-plan-tools", + model: createNativeOpenAIResponsesModel() as never, + }); + + expect(normalized[0]?.parameters).toEqual(normalizedParameterFreeSchema()); + }); + + it("routes diagnostics through RuntimePlan when a plan is available", () => { + const tools = [createParameterFreeTool()] as AgentTool[]; + const model = createNativeOpenAIResponsesModel() as never; + const logDiagnostics = vi.fn(); + const runtimePlan = { + tools: { + normalize: vi.fn(), + logDiagnostics, + }, + } as unknown as AgentRuntimePlan; + + logAgentRuntimeToolDiagnostics({ + runtimePlan, + tools, + provider: "openai", + modelId: "gpt-5.4", + modelApi: "openai-responses", + workspaceDir: "/tmp/openclaw-runtime-plan-tools", + model, + }); + + expect(logDiagnostics).toHaveBeenCalledWith(tools, { + workspaceDir: "/tmp/openclaw-runtime-plan-tools", + modelApi: "openai-responses", + model, + }); + }); +}); diff --git a/src/agents/runtime-plan/tools.ts b/src/agents/runtime-plan/tools.ts new file mode 100644 index 00000000000..6fa6dca1b47 --- /dev/null +++ b/src/agents/runtime-plan/tools.ts @@ -0,0 +1,71 @@ +import type { AgentTool } from "@mariozechner/pi-agent-core"; +import type { TSchema } from "typebox"; +import type { OpenClawConfig } from "../../config/types.openclaw.js"; +import type { ProviderRuntimeModel } from "../../plugins/provider-runtime-model.types.js"; +import { + logProviderToolSchemaDiagnostics, + normalizeProviderToolSchemas, +} from "../pi-embedded-runner/tool-schema-runtime.js"; +import type { AgentRuntimePlan } from "./types.js"; + +type AgentRuntimeToolPolicyParams = { + runtimePlan?: AgentRuntimePlan; + tools: AgentTool[]; + provider: string; + config?: OpenClawConfig; + workspaceDir?: string; + env?: NodeJS.ProcessEnv; + modelId?: string; + modelApi?: string | null; + model?: ProviderRuntimeModel; +}; + +function runtimePlanToolContext(params: { + workspaceDir?: string; + modelApi?: string | null; + model?: ProviderRuntimeModel; +}) { + return { + workspaceDir: params.workspaceDir, + modelApi: params.modelApi ?? undefined, + model: params.model, + }; +} + +export function normalizeAgentRuntimeTools< + TSchemaType extends TSchema = TSchema, + TResult = unknown, +>(params: AgentRuntimeToolPolicyParams): AgentTool[] { + const planContext = runtimePlanToolContext(params); + return ( + params.runtimePlan?.tools.normalize(params.tools, planContext) ?? + normalizeProviderToolSchemas({ + tools: params.tools, + provider: params.provider, + config: params.config, + workspaceDir: params.workspaceDir, + env: params.env ?? process.env, + modelId: params.modelId, + modelApi: params.modelApi, + model: params.model, + }) + ); +} + +export function logAgentRuntimeToolDiagnostics(params: AgentRuntimeToolPolicyParams): void { + const planContext = runtimePlanToolContext(params); + if (params.runtimePlan) { + params.runtimePlan.tools.logDiagnostics(params.tools, planContext); + return; + } + logProviderToolSchemaDiagnostics({ + tools: params.tools, + provider: params.provider, + config: params.config, + workspaceDir: params.workspaceDir, + env: params.env ?? process.env, + modelId: params.modelId, + modelApi: params.modelApi, + model: params.model, + }); +} diff --git a/src/agents/runtime-plan/types.compat.test.ts b/src/agents/runtime-plan/types.compat.test.ts new file mode 100644 index 00000000000..725a6f5e6fc --- /dev/null +++ b/src/agents/runtime-plan/types.compat.test.ts @@ -0,0 +1,43 @@ +import { describe, expectTypeOf, it } from "vitest"; +import type { ReplyPayload } from "../../auto-reply/reply-payload.js"; +import type { ThinkLevel } from "../../auto-reply/thinking.js"; +import type { FailoverReason } from "../pi-embedded-helpers/types.js"; +import type { PromptMode } from "../system-prompt.types.js"; +import type { buildAgentRuntimeDeliveryPlan, buildAgentRuntimePlan } from "./build.js"; +import type { + AgentRuntimeFailoverReason, + AgentRuntimePromptMode, + AgentRuntimeReplyPayload, + AgentRuntimeThinkLevel, + BuildAgentRuntimeDeliveryPlanParams, + BuildAgentRuntimePlanParams, +} from "./types.js"; + +type Equal = [X] extends [Y] ? ([Y] extends [X] ? true : false) : false; + +type Assert = T; + +describe("AgentRuntimePlan structural type compatibility", () => { + it("keeps copied scalar unions aligned with their source contracts", () => { + expectTypeOf().toEqualTypeOf(); + expectTypeOf().toEqualTypeOf(); + expectTypeOf().toEqualTypeOf(); + }); + + it("keeps reply payload shapes structurally compatible with the runtime leaf payload shape", () => { + type _ReplyPayloadKeysStayInSync = Assert< + Equal + >; + expectTypeOf().toMatchTypeOf(); + expectTypeOf().toMatchTypeOf(); + }); + + it("keeps builder call signatures aligned with exported structural params", () => { + expectTypeOf< + Parameters[0] + >().toEqualTypeOf(); + expectTypeOf< + Parameters[0] + >().toEqualTypeOf(); + }); +}); diff --git a/src/agents/runtime-plan/types.test.ts b/src/agents/runtime-plan/types.test.ts new file mode 100644 index 00000000000..5b011239979 --- /dev/null +++ b/src/agents/runtime-plan/types.test.ts @@ -0,0 +1,37 @@ +import fs from "node:fs/promises"; +import { fileURLToPath } from "node:url"; +import { describe, expect, it } from "vitest"; + +const TYPES_PATH = fileURLToPath(new URL("./types.ts", import.meta.url)); + +const concreteRuntimePolicyImportPatterns = [ + /from\s+["'][^"']*auto-reply(?:\/|\.js|["'])/, + /from\s+["'](?:[^"']*\/)?config(?:\/|\.js|["'])/, + /from\s+["'](?:[^"']*\/)?plugins(?:\/|\.js|["'])/, + /from\s+["'][^"']*pi-embedded-/, + /from\s+["'][^"']*transcript-policy(?:\.[^/"']+)?(?:\/|\.js|["'])/, + /from\s+["'][^"']*system-prompt(?:\.[^/"']+)?(?:\/|\.js|["'])/, +]; + +describe("AgentRuntimePlan leaf contracts", () => { + it("keeps runtime plan type contracts independent from concrete runtime policy modules", async () => { + const source = await fs.readFile(TYPES_PATH, "utf8"); + + for (const pattern of concreteRuntimePolicyImportPatterns) { + expect(source).not.toMatch(pattern); + } + }); + + it("guards against policy type imports re-entering the leaf contract", () => { + const forbiddenImports = [ + 'import type { PromptContribution } from "../system-prompt.types.js";', + 'import type { TranscriptPolicy } from "../transcript-policy.types.js";', + ]; + + for (const importStatement of forbiddenImports) { + expect( + concreteRuntimePolicyImportPatterns.some((pattern) => pattern.test(importStatement)), + ).toBe(true); + } + }); +}); diff --git a/src/agents/runtime-plan/types.ts b/src/agents/runtime-plan/types.ts index aad8e8cf7a8..c83fb7a0800 100644 --- a/src/agents/runtime-plan/types.ts +++ b/src/agents/runtime-plan/types.ts @@ -1,14 +1,155 @@ import type { AgentTool } from "@mariozechner/pi-agent-core"; import type { TSchema } from "typebox"; -import type { ThinkLevel } from "../../auto-reply/thinking.js"; -import type { ReplyPayload } from "../../auto-reply/types.js"; -import type { OpenClawConfig } from "../../config/types.openclaw.js"; -import type { ProviderRuntimeModel } from "../../plugins/provider-runtime-model.types.js"; -import type { FailoverReason } from "../pi-embedded-helpers/types.js"; -import type { PromptMode } from "../system-prompt.types.js"; export type AgentRuntimeTransport = "sse" | "websocket" | "auto"; +export type AgentRuntimeThinkLevel = + | "off" + | "minimal" + | "low" + | "medium" + | "high" + | "xhigh" + | "adaptive" + | "max"; + +export type AgentRuntimePromptMode = "full" | "minimal" | "none"; + +export type AgentRuntimeFailoverReason = + | "auth" + | "auth_permanent" + | "format" + | "rate_limit" + | "overloaded" + | "billing" + | "timeout" + | "model_not_found" + | "session_expired" + | "unknown"; + +export type AgentRuntimeConfig = unknown; + +export type AgentRuntimeModel = { + id?: string; + name?: string; + api?: string; + provider?: string; + baseUrl?: string; + reasoning?: boolean; + input?: string[]; + cost?: { + input: number; + output: number; + cacheRead: number; + cacheWrite: number; + }; + contextWindow?: number; + maxTokens?: number; + contextTokens?: number; + compat?: unknown; +}; + +export type AgentRuntimeInteractiveButtonStyle = "primary" | "secondary" | "success" | "danger"; + +export type AgentRuntimeInteractiveReplyButton = { + label: string; + value?: string; + url?: string; + style?: AgentRuntimeInteractiveButtonStyle; +}; + +export type AgentRuntimeInteractiveReplyOption = { + label: string; + value: string; +}; + +export type AgentRuntimeInteractiveReplyBlock = + | { + type: "text"; + text: string; + } + | { + type: "buttons"; + buttons: AgentRuntimeInteractiveReplyButton[]; + } + | { + type: "select"; + placeholder?: string; + options: AgentRuntimeInteractiveReplyOption[]; + }; + +export type AgentRuntimeInteractiveReply = { + blocks: AgentRuntimeInteractiveReplyBlock[]; +}; + +export type AgentRuntimeMessagePresentationTone = + | "info" + | "success" + | "warning" + | "danger" + | "neutral"; + +export type AgentRuntimeMessagePresentationBlock = + | { + type: "text"; + text: string; + } + | { + type: "context"; + text: string; + } + | { + type: "divider"; + } + | { + type: "buttons"; + buttons: AgentRuntimeInteractiveReplyButton[]; + } + | { + type: "select"; + placeholder?: string; + options: AgentRuntimeInteractiveReplyOption[]; + }; + +export type AgentRuntimeMessagePresentation = { + title?: string; + tone?: AgentRuntimeMessagePresentationTone; + blocks: AgentRuntimeMessagePresentationBlock[]; +}; + +export type AgentRuntimeReplyPayloadDeliveryPin = { + enabled: boolean; + notify?: boolean; + required?: boolean; +}; + +export type AgentRuntimeReplyPayloadDelivery = { + pin?: boolean | AgentRuntimeReplyPayloadDeliveryPin; +}; + +export type AgentRuntimeReplyPayload = { + text?: string; + mediaUrl?: string; + mediaUrls?: string[]; + trustedLocalMedia?: boolean; + sensitiveMedia?: boolean; + presentation?: AgentRuntimeMessagePresentation; + delivery?: AgentRuntimeReplyPayloadDelivery; + interactive?: AgentRuntimeInteractiveReply; + btw?: { + question: string; + }; + replyToId?: string; + replyToTag?: boolean; + replyToCurrent?: boolean; + audioAsVoice?: boolean; + spokenText?: string; + isError?: boolean; + isReasoning?: boolean; + isCompactionNotice?: boolean; + channelData?: Record; +}; + export type AgentRuntimeSystemPromptSectionId = | "interaction_style" | "tool_call_style" @@ -21,12 +162,12 @@ export type AgentRuntimeSystemPromptContribution = { }; export type AgentRuntimeSystemPromptContributionContext = { - config?: OpenClawConfig; + config?: AgentRuntimeConfig; agentDir?: string; workspaceDir?: string; provider: string; modelId: string; - promptMode: PromptMode; + promptMode: AgentRuntimePromptMode; runtimeChannel?: string; runtimeCapabilities?: string[]; agentId?: string; @@ -61,7 +202,7 @@ export type AgentRuntimeTranscriptPolicy = { export type AgentRuntimeOutcomeClassification = | { message: string; - reason?: FailoverReason; + reason?: AgentRuntimeFailoverReason; status?: number; code?: string; rawError?: string; @@ -109,7 +250,7 @@ export type AgentRuntimeToolPlan = { params?: { workspaceDir?: string; modelApi?: string; - model?: ProviderRuntimeModel; + model?: AgentRuntimeModel; }, ): AgentTool[]; logDiagnostics( @@ -117,15 +258,17 @@ export type AgentRuntimeToolPlan = { params?: { workspaceDir?: string; modelApi?: string; - model?: ProviderRuntimeModel; + model?: AgentRuntimeModel; }, ): void; }; export type AgentRuntimeDeliveryPlan = { - isSilentPayload(payload: Pick): boolean; + isSilentPayload( + payload: Pick, + ): boolean; resolveFollowupRoute(params: { - payload: ReplyPayload; + payload: AgentRuntimeReplyPayload; originatingChannel?: string; originatingTo?: string; originRoutable: boolean; @@ -141,10 +284,10 @@ export type AgentRuntimeTransportPlan = { extraParams: Record; resolveExtraParams(params?: { extraParamsOverride?: Record; - thinkingLevel?: ThinkLevel; + thinkingLevel?: AgentRuntimeThinkLevel; agentId?: string; workspaceDir?: string; - model?: ProviderRuntimeModel; + model?: AgentRuntimeModel; resolvedTransport?: AgentRuntimeTransport; }): Record; }; @@ -159,7 +302,7 @@ export type AgentRuntimePlan = { resolvePolicy(params?: { workspaceDir?: string; modelApi?: string; - model?: ProviderRuntimeModel; + model?: AgentRuntimeModel; }): AgentRuntimeTranscriptPolicy; }; delivery: AgentRuntimeDeliveryPlan; @@ -177,7 +320,7 @@ export type AgentRuntimePlan = { }; export type BuildAgentRuntimeDeliveryPlanParams = { - config?: OpenClawConfig; + config?: AgentRuntimeConfig; workspaceDir?: string; agentDir?: string; provider: string; @@ -185,12 +328,12 @@ export type BuildAgentRuntimeDeliveryPlanParams = { }; export type BuildAgentRuntimePlanParams = { - config?: OpenClawConfig; + config?: AgentRuntimeConfig; workspaceDir?: string; agentDir?: string; provider: string; modelId: string; - model?: ProviderRuntimeModel; + model?: AgentRuntimeModel; modelApi?: string | null; harnessId?: string; harnessRuntime?: string; @@ -198,7 +341,7 @@ export type BuildAgentRuntimePlanParams = { authProfileProvider?: string; sessionAuthProfileId?: string; agentId?: string; - thinkingLevel?: ThinkLevel; + thinkingLevel?: AgentRuntimeThinkLevel; extraParamsOverride?: Record; resolvedTransport?: AgentRuntimeTransport; }; diff --git a/src/plugin-sdk/agent-harness-runtime.test.ts b/src/plugin-sdk/agent-harness-runtime.test.ts new file mode 100644 index 00000000000..41a3a056fa9 --- /dev/null +++ b/src/plugin-sdk/agent-harness-runtime.test.ts @@ -0,0 +1,128 @@ +import { describe, expect, it } from "vitest"; +import { + classifyAgentHarnessTerminalOutcome, + type AgentHarnessTerminalOutcomeClassification, +} from "./agent-harness-runtime.js"; + +describe("classifyAgentHarnessTerminalOutcome", () => { + it("does not classify an in-flight turn", () => { + expect( + classifyAgentHarnessTerminalOutcome({ + assistantTexts: [], + reasoningText: "", + planText: "", + promptError: null, + turnCompleted: false, + }), + ).toBeUndefined(); + }); + + it("does not classify prompt errors as terminal empty-output outcomes", () => { + expect( + classifyAgentHarnessTerminalOutcome({ + assistantTexts: [], + reasoningText: "", + planText: "", + promptError: new Error("turn failed"), + turnCompleted: true, + }), + ).toBeUndefined(); + }); + + it("does not classify deliberate silent replies such as NO_REPLY", () => { + expect( + classifyAgentHarnessTerminalOutcome({ + assistantTexts: ["NO_REPLY"], + reasoningText: "", + planText: "", + promptError: null, + turnCompleted: true, + }), + ).toBeUndefined(); + }); + + it("treats empty-string prompt errors as terminal errors", () => { + expect( + classifyAgentHarnessTerminalOutcome({ + assistantTexts: [], + reasoningText: "", + planText: "", + promptError: "", + turnCompleted: true, + }), + ).toBeUndefined(); + }); + + it("treats whitespace-only assistant text as not visible", () => { + expect( + classifyAgentHarnessTerminalOutcome({ + assistantTexts: [" ", "\n\t"], + reasoningText: "", + planText: "", + promptError: null, + turnCompleted: true, + }), + ).toBe("empty"); + }); + + it("classifies a completed turn with plan text only as planning-only", () => { + expect( + classifyAgentHarnessTerminalOutcome({ + assistantTexts: [], + reasoningText: "", + planText: "1. inspect\n2. patch\n3. test", + promptError: null, + turnCompleted: true, + }), + ).toBe("planning-only"); + }); + + it("prefers planning-only when both plan and reasoning text are present", () => { + expect( + classifyAgentHarnessTerminalOutcome({ + assistantTexts: [], + reasoningText: "I need to inspect the files.", + planText: "I will inspect, patch, and test.", + promptError: null, + turnCompleted: true, + }), + ).toBe("planning-only"); + }); + + it("classifies a completed turn with reasoning text only as reasoning-only", () => { + expect( + classifyAgentHarnessTerminalOutcome({ + assistantTexts: [], + reasoningText: "The answer depends on the current repository state.", + planText: "", + promptError: null, + turnCompleted: true, + }), + ).toBe("reasoning-only"); + }); + + it("classifies a completed turn with no visible output as empty", () => { + expect( + classifyAgentHarnessTerminalOutcome({ + assistantTexts: [], + reasoningText: " ", + planText: "\n", + promptError: null, + turnCompleted: true, + }), + ).toBe("empty"); + }); + + it("returns only terminal fallback classifications, not ok", () => { + const classification: AgentHarnessTerminalOutcomeClassification = + classifyAgentHarnessTerminalOutcome({ + assistantTexts: [], + reasoningText: "", + planText: "", + promptError: null, + turnCompleted: true, + }) ?? "empty"; + + expect(classification).toBe("empty"); + }); +}); diff --git a/src/plugin-sdk/agent-harness-runtime.ts b/src/plugin-sdk/agent-harness-runtime.ts index 0c394a8b109..d0cc9f1fad1 100644 --- a/src/plugin-sdk/agent-harness-runtime.ts +++ b/src/plugin-sdk/agent-harness-runtime.ts @@ -2,6 +2,7 @@ // Keep heavyweight tool construction out of this module so harness imports can // register quickly inside gateway startup and Docker e2e runs. +import type { EmbeddedRunAttemptResult } from "../agents/pi-embedded-runner/run/types.js"; import { formatToolDetail, resolveToolDisplay } from "../agents/tool-display.js"; import { redactToolDetail } from "../logging/redact.js"; import { truncateUtf16Safe } from "../utils.js"; @@ -81,6 +82,10 @@ export { setActiveEmbeddedRun, } from "../agents/pi-embedded-runner/runs.js"; export { disposeRegisteredAgentHarnesses } from "../agents/harness/registry.js"; +export { + logAgentRuntimeToolDiagnostics, + normalizeAgentRuntimeTools, +} from "../agents/runtime-plan/tools.js"; export { normalizeProviderToolSchemas } from "../agents/pi-embedded-runner/tool-schema-runtime.js"; export { resolveSandboxContext } from "../agents/sandbox.js"; export { isSubagentSessionKey } from "../routing/session-key.js"; @@ -146,3 +151,46 @@ export function formatToolProgressOutput( } return `${truncateUtf16Safe(redacted, maxChars)}\n...(truncated)...`; } + +export type AgentHarnessTerminalOutcomeInput = { + assistantTexts: readonly string[]; + reasoningText?: string | null; + planText?: string | null; + promptError?: unknown; + turnCompleted: boolean; +}; + +export type AgentHarnessTerminalOutcomeClassification = NonNullable< + EmbeddedRunAttemptResult["agentHarnessResultClassification"] +>; + +/** + * Classify terminal harness turns that completed without assistant output that + * should advance fallback. Deliberate silent replies such as NO_REPLY count as + * intentional output, while whitespace-only text remains fallback-eligible. + * This is intentionally SDK-level so plugin harness adapters such as Codex + * preserve the same OpenClaw-owned fallback signals as the built-in PI path + * without re-implementing terminal-result policy. + */ +export function classifyAgentHarnessTerminalOutcome( + params: AgentHarnessTerminalOutcomeInput, +): AgentHarnessTerminalOutcomeClassification | undefined { + if ( + !params.turnCompleted || + (params.promptError !== undefined && params.promptError !== null) || + hasVisibleAssistantText(params.assistantTexts) + ) { + return undefined; + } + if (params.planText?.trim()) { + return "planning-only"; + } + if (params.reasoningText?.trim()) { + return "reasoning-only"; + } + return "empty"; +} + +function hasVisibleAssistantText(assistantTexts: readonly string[]): boolean { + return assistantTexts.some((text) => text.trim().length > 0); +}