From 7f6452897e25543c9263b67aabf140379d1c0b0c Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sat, 25 Apr 2026 05:52:33 +0100 Subject: [PATCH] fix(agents): repair strict provider tool replay --- CHANGELOG.md | 1 + docs/reference/transcript-hygiene.md | 4 +- .../openai-reasoning-compat.live.test.ts | 141 +++++++ ...ed-runner.sanitize-session-history.test.ts | 173 +++++++- src/agents/pi-embedded-runner/compact.ts | 11 + .../pi-embedded-runner/replay-history.ts | 13 +- .../attempt.tool-call-normalization.test.ts | 59 +++ src/agents/pi-embedded-runner/run/attempt.ts | 7 + .../session-tool-result-guard-wrapper.ts | 2 + src/agents/session-tool-result-guard.test.ts | 12 + src/agents/session-tool-result-guard.ts | 8 +- src/agents/session-transcript-repair.test.ts | 67 ++- src/agents/session-transcript-repair.ts | 18 +- src/agents/tool-replay-repair.live.test.ts | 386 ++++++++++++++++++ .../transport-message-transform.test.ts | 170 +++++++- src/agents/transport-message-transform.ts | 92 ++--- 16 files changed, 1091 insertions(+), 73 deletions(-) create mode 100644 src/agents/tool-replay-repair.live.test.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 59612634859..9d8c16fc50b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -75,6 +75,7 @@ Docs: https://docs.openclaw.ai - Gateway/tools: allow `POST /tools/invoke` to reach plugin-backed catalog tools such as `browser` when no core implementation exists, while still preferring built-in tools for real core names. Thanks @chat2way. - Browser/security: require `operator.admin` for the `browser.request` gateway method, matching the host/browser-node control authority exposed by that route. Thanks @RichardCao. - Browser/profiles: allow local managed profiles to override `browser.executablePath`, so different profiles can launch different Chromium-based browsers. Thanks @nobrainer-tech. +- Agents/replay: repair displaced or missing tool results before strict provider replay, use Codex-compatible `aborted` outputs for OpenAI Responses history, and drop partial aborted/error transport turns before retries. - Reply media: allow sandboxed replies to deliver OpenClaw-managed `media/outbound` and `media/tool-*` attachments without treating them as sandbox escapes, while keeping alias-escape checks on the managed media root. Fixes #71138. Thanks @mayor686, @truffle-dev, and @neeravmakwana. - CLI/agent: keep `openclaw agent --json` stdout reserved for the JSON response by routing gateway, plugin, and embedded-fallback diagnostics to stderr before execution starts. Fixes #71319. - Agents/Gemini: retry reasoning-only, empty, and planning-only Gemini turns instead of letting sessions silently stall. Fixes #71074. (#71362) Thanks @neeravmakwana. diff --git a/docs/reference/transcript-hygiene.md b/docs/reference/transcript-hygiene.md index b65e913b205..e195150b9e9 100644 --- a/docs/reference/transcript-hygiene.md +++ b/docs/reference/transcript-hygiene.md @@ -114,9 +114,9 @@ external end-user instructions. - Image sanitization only. - Drop orphaned reasoning signatures (standalone reasoning items without a following content block) for OpenAI Responses/Codex transcripts, and drop replayable OpenAI reasoning after a model route switch. - No tool call id sanitization. -- No tool result pairing repair. +- Tool result pairing repair may move real matched outputs and synthesize Codex-style `aborted` outputs for missing tool calls. - No turn validation or reordering. -- No synthetic tool results. +- Missing OpenAI Responses-family tool outputs are synthesized as `aborted` to match Codex replay normalization. - No thought signature stripping. **Google (Generative AI / Gemini CLI / Antigravity)** diff --git a/src/agents/openai-reasoning-compat.live.test.ts b/src/agents/openai-reasoning-compat.live.test.ts index 09670fb6ff5..ae8bcf5106c 100644 --- a/src/agents/openai-reasoning-compat.live.test.ts +++ b/src/agents/openai-reasoning-compat.live.test.ts @@ -1,10 +1,14 @@ +import type { AgentMessage } from "@mariozechner/pi-agent-core"; import { completeSimple, type Api, type Model } from "@mariozechner/pi-ai"; +import { SessionManager } from "@mariozechner/pi-coding-agent"; +import { Type } from "typebox"; import { describe, expect, it } from "vitest"; import { loadConfig } from "../config/config.js"; import { resolveOpenClawAgentDir } from "./agent-paths.js"; import { isLiveProfileKeyModeEnabled, isLiveTestEnabled } from "./live-test-helpers.js"; import { getApiKeyForModel, requireApiKey } from "./model-auth.js"; import { ensureOpenClawModelsJson } from "./models-config.js"; +import { sanitizeSessionHistory } from "./pi-embedded-runner/replay-history.js"; import { discoverAuthStorage, discoverModels } from "./pi-model-discovery.js"; const LIVE = isLiveTestEnabled(); @@ -169,4 +173,141 @@ describeLive("openai reasoning compat live", () => { }, 3 * 60 * 1000, ); + + it( + "accepts repaired OpenAI Codex parallel tool replay with aborted missing results", + async () => { + const { provider, modelId } = resolveTargetModelRef(); + const cfg = loadConfig(); + await ensureOpenClawModelsJson(cfg); + + const agentDir = resolveOpenClawAgentDir(); + const authStorage = discoverAuthStorage(agentDir); + const modelRegistry = discoverModels(authStorage, agentDir); + const model = modelRegistry.find(provider, modelId) as Model | null; + + if (!model) { + logProgress(`[openai-reasoning-compat] model missing from registry: ${TARGET_MODEL_REF}`); + return; + } + + let apiKeyInfo; + try { + apiKeyInfo = await getApiKeyForModel({ + model, + cfg, + credentialPrecedence: LIVE_CREDENTIAL_PRECEDENCE, + }); + } catch (error) { + logProgress(`[openai-reasoning-compat] skip (${String(error)})`); + return; + } + + if (REQUIRE_PROFILE_KEYS && !apiKeyInfo.source.startsWith("profile:")) { + logProgress( + `[openai-reasoning-compat] skip (non-profile credential source: ${apiKeyInfo.source})`, + ); + return; + } + + const messages = [ + { + role: "user", + content: "Use noop.", + timestamp: Date.now(), + }, + { + role: "assistant", + provider: model.provider, + api: model.api, + model: model.id, + stopReason: "toolUse", + timestamp: Date.now(), + content: [ + { type: "toolCall", id: "call_keep", name: "noop", arguments: {} }, + { type: "toolCall", id: "call_missing_a", name: "noop", arguments: {} }, + { type: "toolCall", id: "call_missing_b", name: "noop", arguments: {} }, + ], + }, + { + role: "user", + content: "Reply with exactly: replay ok.", + timestamp: Date.now(), + }, + { + role: "toolResult", + toolCallId: "call_keep", + toolName: "noop", + content: [{ type: "text", text: "ok" }], + isError: false, + timestamp: Date.now(), + }, + ] as unknown as AgentMessage[]; + + const sanitized = await sanitizeSessionHistory({ + messages, + modelApi: model.api, + provider: model.provider, + modelId: model.id, + sessionManager: SessionManager.inMemory(), + sessionId: "openai-codex-tool-replay-live", + }); + + expect(sanitized.map((message) => message.role)).toEqual([ + "user", + "assistant", + "toolResult", + "toolResult", + "toolResult", + "user", + ]); + expect( + sanitized.slice(2, 5).map((message) => (message as { toolCallId?: string }).toolCallId), + ).toEqual(["call_keep", "call_missing_a", "call_missing_b"]); + expect( + sanitized + .slice(3, 5) + .map((message) => (message as Extract).content), + ).toEqual([[{ type: "text", text: "aborted" }], [{ type: "text", text: "aborted" }]]); + expect(JSON.stringify(sanitized)).not.toContain("missing tool result"); + + const response = await completeSimpleWithTimeout( + model, + { + systemPrompt: "You are a concise assistant. Follow the user's instruction exactly.", + messages: sanitized as never, + tools: [ + { + name: "noop", + description: "Return ok.", + parameters: Type.Object({}, { additionalProperties: false }), + }, + ], + }, + { + apiKey: requireApiKey(apiKeyInfo, model.provider), + reasoning: "low", + maxTokens: 64, + }, + 120_000, + ); + + const text = response.content + .filter((block) => block.type === "text") + .map((block) => block.text.trim()) + .join(" ") + .trim(); + const errorMessage = + typeof (response as { errorMessage?: unknown }).errorMessage === "string" + ? ((response as { errorMessage?: string }).errorMessage ?? "") + : ""; + if (errorMessage && isKnownLiveBlocker(errorMessage)) { + logProgress(`[openai-reasoning-compat] skip (${errorMessage})`); + return; + } + + expect(text).toMatch(/^replay ok\.?$/i); + }, + 3 * 60 * 1000, + ); }); diff --git a/src/agents/pi-embedded-runner.sanitize-session-history.test.ts b/src/agents/pi-embedded-runner.sanitize-session-history.test.ts index 8c9a54b5caa..cc8fb530b5f 100644 --- a/src/agents/pi-embedded-runner.sanitize-session-history.test.ts +++ b/src/agents/pi-embedded-runner.sanitize-session-history.test.ts @@ -688,20 +688,181 @@ describe("sanitizeSessionHistory", () => { expect(result[1]?.role).toBe("assistant"); }); - it("synthesizes missing tool results for openai-responses after repair", async () => { + it("synthesizes Codex-style aborted tool results for openai-responses after repair", async () => { const messages: AgentMessage[] = [ + makeUserMessage("start"), makeAssistantMessage([{ type: "toolCall", id: "call_1", name: "read", arguments: {} }], { stopReason: "toolUse", }), + makeUserMessage("continue"), + ]; + + const result = await sanitizeOpenAIHistory(messages); + + expect(result.map((message) => message.role)).toEqual([ + "user", + "assistant", + "toolResult", + "user", + ]); + expect((result[2] as { toolCallId?: string }).toolCallId).toBe("call1"); + expect((result[2] as Extract).content).toEqual([ + { type: "text", text: "aborted" }, + ]); + expect(JSON.stringify(result)).not.toContain("missing tool result"); + }); + + it("synthesizes Codex-style aborted tool results for openai-codex-responses", async () => { + const messages: AgentMessage[] = [ + makeAssistantMessage( + [ + { type: "toolCall", id: "call_a", name: "exec", arguments: {} }, + { type: "toolCall", id: "call_b", name: "exec", arguments: {} }, + { type: "toolCall", id: "call_c", name: "exec", arguments: {} }, + ], + { stopReason: "toolUse" }, + ), + makeUserMessage("status?"), + ]; + + const result = await sanitizeSessionHistory({ + messages, + modelApi: "openai-codex-responses", + provider: "openai-codex", + sessionManager: mockSessionManager, + sessionId: TEST_SESSION_ID, + }); + + expect(result.map((message) => message.role)).toEqual([ + "assistant", + "toolResult", + "toolResult", + "toolResult", + "user", + ]); + expect( + result.slice(1, 4).map((message) => (message as { toolCallId?: string }).toolCallId), + ).toEqual(["calla", "callb", "callc"]); + for (const message of result.slice(1, 4)) { + expect((message as Extract).content).toEqual([ + { type: "text", text: "aborted" }, + ]); + } + expect(JSON.stringify(result)).not.toContain("missing tool result"); + }); + + it("keeps real parallel tool results for openai-responses and aborts missing siblings", async () => { + const messages: AgentMessage[] = [ + makeAssistantMessage( + [ + { type: "toolCall", id: "call_1", name: "read", arguments: {} }, + { type: "toolCall", id: "call_2", name: "exec", arguments: {} }, + { type: "toolCall", id: "call_3", name: "write", arguments: {} }, + ], + { stopReason: "toolUse" }, + ), + makeUserMessage("continue"), + castAgentMessage({ + role: "toolResult", + toolCallId: "call_2", + toolName: "exec", + content: [{ type: "text", text: "ok" }], + isError: false, + }), ]; const result = await sanitizeOpenAIHistory(messages); - // repairToolUseResultPairing now runs for all providers (including OpenAI) - // to fix orphaned function_call_output items that OpenAI would reject. - expect(result).toHaveLength(2); - expect(result[0]?.role).toBe("assistant"); - expect(result[1]?.role).toBe("toolResult"); + expect(result.map((message) => message.role)).toEqual([ + "assistant", + "toolResult", + "toolResult", + "toolResult", + "user", + ]); + expect( + extractToolCallsFromAssistant(result[0] as Extract), + ).toMatchObject([ + { id: "call1", name: "read" }, + { id: "call2", name: "exec" }, + { id: "call3", name: "write" }, + ]); + expect( + result.slice(1, 4).map((message) => (message as { toolCallId?: string }).toolCallId), + ).toEqual(["call1", "call2", "call3"]); + expect((result[1] as Extract).content).toEqual([ + { type: "text", text: "aborted" }, + ]); + expect((result[2] as Extract).content).toEqual([ + { type: "text", text: "ok" }, + ]); + expect((result[3] as Extract).content).toEqual([ + { type: "text", text: "aborted" }, + ]); + expect(JSON.stringify(result)).not.toContain("missing tool result"); + }); + + it("applies aborted missing-result repair to azure-openai-responses", async () => { + const messages: AgentMessage[] = [ + makeAssistantMessage([{ type: "toolCall", id: "call_azure", name: "read", arguments: {} }], { + stopReason: "toolUse", + }), + makeUserMessage("continue"), + ]; + + const result = await sanitizeSessionHistory({ + messages, + modelApi: "azure-openai-responses", + provider: "azure-openai-responses", + sessionManager: mockSessionManager, + sessionId: TEST_SESSION_ID, + }); + + expect(result.map((message) => message.role)).toEqual(["assistant", "toolResult", "user"]); + expect((result[1] as { toolCallId?: string }).toolCallId).toBe("callazure"); + expect((result[1] as Extract).content).toEqual([ + { type: "text", text: "aborted" }, + ]); + }); + + it("drops duplicate and orphan OpenAI outputs while preserving the first real result", async () => { + const messages: AgentMessage[] = [ + castAgentMessage({ + role: "toolResult", + toolCallId: "call_orphan", + toolName: "read", + content: [{ type: "text", text: "orphan" }], + isError: false, + }), + makeAssistantMessage([{ type: "toolCall", id: "call_keep", name: "read", arguments: {} }], { + stopReason: "toolUse", + }), + castAgentMessage({ + role: "toolResult", + toolCallId: "call_keep", + toolName: "read", + content: [{ type: "text", text: "first" }], + isError: false, + }), + castAgentMessage({ + role: "toolResult", + toolCallId: "call_keep", + toolName: "read", + content: [{ type: "text", text: "duplicate" }], + isError: false, + }), + makeUserMessage("continue"), + ]; + + const result = await sanitizeOpenAIHistory(messages); + + expect(result.map((message) => message.role)).toEqual(["assistant", "toolResult", "user"]); + expect((result[1] as { toolCallId?: string }).toolCallId).toBe("callkeep"); + expect((result[1] as Extract).content).toEqual([ + { type: "text", text: "first" }, + ]); + expect(JSON.stringify(result)).not.toContain("orphan"); + expect(JSON.stringify(result)).not.toContain("duplicate"); }); it.each([ diff --git a/src/agents/pi-embedded-runner/compact.ts b/src/agents/pi-embedded-runner/compact.ts index 30f042b8e4c..703a72a7dfe 100644 --- a/src/agents/pi-embedded-runner/compact.ts +++ b/src/agents/pi-embedded-runner/compact.ts @@ -810,6 +810,12 @@ export async function compactEmbeddedPiSessionDirect( config: params.config, contextWindowTokens: ctxInfo.tokens, allowSyntheticToolResults: transcriptPolicy.allowSyntheticToolResults, + missingToolResultText: + model.api === "openai-responses" || + model.api === "azure-openai-responses" || + model.api === "openai-codex-responses" + ? "aborted" + : undefined, allowedToolNames, }); checkpointSnapshot = captureCompactionCheckpointSnapshot({ @@ -965,6 +971,11 @@ export async function compactEmbeddedPiSessionDirect( const limited = transcriptPolicy.repairToolUseResultPairing ? sanitizeToolUseResultPairing(truncated, { erroredAssistantResultPolicy: "drop", + ...(model.api === "openai-responses" || + model.api === "azure-openai-responses" || + model.api === "openai-codex-responses" + ? { missingToolResultText: "aborted" } + : {}), }) : truncated; if (limited.length > 0) { diff --git a/src/agents/pi-embedded-runner/replay-history.ts b/src/agents/pi-embedded-runner/replay-history.ts index 938f063f1ff..b912b4b1234 100644 --- a/src/agents/pi-embedded-runner/replay-history.ts +++ b/src/agents/pi-embedded-runner/replay-history.ts @@ -493,13 +493,17 @@ export async function sanitizeSessionHistory(params: { allowedToolNames: params.allowedToolNames, allowProviderOwnedThinkingReplay, }); - // OpenAI's fc_* pairing downgrade needs the raw call_id|fc_id separator intact, - // but displaced tool results must first be repaired back next to their - // assistant turn so the downgrade can rewrite both sides consistently. + // OpenAI Responses rejects orphan/missing function_call_output items. Upstream + // Codex repairs those gaps with "aborted"; keep that before the fc_* downgrade + // so both call and result ids are rewritten together. Covered by unit replay + // tests plus live OpenAI/Codex and generic replay-repair model tests. const openAIRepairedToolCalls = isOpenAIResponsesApi && policy.repairToolUseResultPairing ? sanitizeToolUseResultPairing(sanitizedToolCalls, { erroredAssistantResultPolicy: "drop", + // Match upstream Codex history normalization for OpenAI Responses: + // missing function_call_output entries are model-visible "aborted". + missingToolResultText: "aborted", }) : sanitizedToolCalls; const openAISafeToolCalls = isOpenAIResponsesApi @@ -517,6 +521,9 @@ export async function sanitizeSessionHistory(params: { allowedToolNames: params.allowedToolNames, }) : openAISafeToolCalls; + // Gemini/Anthropic-class providers also require tool results to stay adjacent + // to their assistant tool calls. They do not use Codex's "aborted" text, but + // the same ordering repair is live-tested with Gemini 3 Flash. const repairedTools = !isOpenAIResponsesApi && policy.repairToolUseResultPairing ? sanitizeToolUseResultPairing(sanitizedToolIds, { diff --git a/src/agents/pi-embedded-runner/run/attempt.tool-call-normalization.test.ts b/src/agents/pi-embedded-runner/run/attempt.tool-call-normalization.test.ts index 6620dda0d89..c605c7e4b1d 100644 --- a/src/agents/pi-embedded-runner/run/attempt.tool-call-normalization.test.ts +++ b/src/agents/pi-embedded-runner/run/attempt.tool-call-normalization.test.ts @@ -61,6 +61,65 @@ describe("sanitizeReplayToolCallIdsForStream", () => { ]); }); + it("synthesizes missing tool results after strict id sanitization", () => { + const rawId = "call_function_av7cbkigmk7x1"; + const out = sanitizeReplayToolCallIdsForStream({ + messages: [ + { + role: "assistant", + content: [ + { type: "toolUse", id: rawId, name: "read", input: { path: "." } }, + { type: "toolUse", id: "call_missing", name: "exec", input: { cmd: "true" } }, + ], + } as never, + { + role: "toolResult", + toolCallId: rawId, + toolUseId: rawId, + toolName: "read", + content: [{ type: "text", text: "ok" }], + isError: false, + } as never, + ], + mode: "strict", + repairToolUseResultPairing: true, + }); + + expect(out.map((message) => message.role)).toEqual(["assistant", "toolResult", "toolResult"]); + expect((out[0] as Extract).content).toMatchObject([ + { type: "toolUse", id: "callfunctionav7cbkigmk7x1", name: "read" }, + { type: "toolUse", id: "callmissing", name: "exec" }, + ]); + expect(out[1]).toMatchObject({ + role: "toolResult", + toolCallId: "callfunctionav7cbkigmk7x1", + toolUseId: "callfunctionav7cbkigmk7x1", + }); + expect(out[2]).toMatchObject({ + role: "toolResult", + toolCallId: "callmissing", + isError: true, + }); + }); + + it("synthesizes missing tool results when repair is enabled", () => { + const out = sanitizeReplayToolCallIdsForStream({ + messages: [ + { + role: "assistant", + content: [{ type: "toolUse", id: "call_missing", name: "exec", input: { cmd: "true" } }], + } as never, + ], + mode: "strict", + repairToolUseResultPairing: true, + }); + + expect(out).toMatchObject([ + { role: "assistant" }, + { role: "toolResult", toolCallId: "callmissing", isError: true }, + ]); + }); + it("keeps real tool results for aborted assistant spans", () => { const rawId = "call_function_av7cbkigmk7x1"; const out = sanitizeReplayToolCallIdsForStream({ diff --git a/src/agents/pi-embedded-runner/run/attempt.ts b/src/agents/pi-embedded-runner/run/attempt.ts index 5bb325ed5c0..c2bc4162b86 100644 --- a/src/agents/pi-embedded-runner/run/attempt.ts +++ b/src/agents/pi-embedded-runner/run/attempt.ts @@ -1193,6 +1193,12 @@ export async function runEmbeddedAttempt( contextWindowTokens: params.contextTokenBudget, inputProvenance: params.inputProvenance, allowSyntheticToolResults: transcriptPolicy.allowSyntheticToolResults, + missingToolResultText: + params.model.api === "openai-responses" || + params.model.api === "azure-openai-responses" || + params.model.api === "openai-codex-responses" + ? "aborted" + : undefined, allowedToolNames, }); trackSessionManagerAccess(params.sessionFile); @@ -1840,6 +1846,7 @@ export async function runEmbeddedAttempt( const limited = transcriptPolicy.repairToolUseResultPairing ? sanitizeToolUseResultPairing(truncated, { erroredAssistantResultPolicy: "drop", + ...(isOpenAIResponsesApi ? { missingToolResultText: "aborted" } : {}), }) : truncated; cacheTrace?.recordStage("session:limited", { messages: limited }); diff --git a/src/agents/session-tool-result-guard-wrapper.ts b/src/agents/session-tool-result-guard-wrapper.ts index cdfd499d90a..79939e7ab96 100644 --- a/src/agents/session-tool-result-guard-wrapper.ts +++ b/src/agents/session-tool-result-guard-wrapper.ts @@ -29,6 +29,7 @@ export function guardSessionManager( contextWindowTokens?: number; inputProvenance?: InputProvenance; allowSyntheticToolResults?: boolean; + missingToolResultText?: string; allowedToolNames?: Iterable; }, ): GuardedSessionManager { @@ -75,6 +76,7 @@ export function guardSessionManager( applyInputProvenanceToUserMessage(message, opts?.inputProvenance), transformToolResultForPersistence: transform, allowSyntheticToolResults: opts?.allowSyntheticToolResults, + missingToolResultText: opts?.missingToolResultText, allowedToolNames: opts?.allowedToolNames, beforeMessageWriteHook: beforeMessageWrite, maxToolResultChars: diff --git a/src/agents/session-tool-result-guard.test.ts b/src/agents/session-tool-result-guard.test.ts index 93fa78083a9..82c1fc5311a 100644 --- a/src/agents/session-tool-result-guard.test.ts +++ b/src/agents/session-tool-result-guard.test.ts @@ -111,6 +111,18 @@ describe("installSessionToolResultGuard", () => { expectPersistedRoles(sm, ["assistant", "toolResult"]); }); + it("uses configured text for synthetic tool results", () => { + const sm = SessionManager.inMemory(); + const guard = installSessionToolResultGuard(sm, { + missingToolResultText: "aborted", + }); + + sm.appendMessage(toolCallMessage); + guard.flushPendingToolResults(); + + expect(getToolResultText(getPersistedMessages(sm))).toBe("aborted"); + }); + it("clears pending tool calls without inserting synthetic tool results", () => { const sm = SessionManager.inMemory(); const guard = installSessionToolResultGuard(sm); diff --git a/src/agents/session-tool-result-guard.ts b/src/agents/session-tool-result-guard.ts index 242e26ac0eb..c3cb82314c5 100644 --- a/src/agents/session-tool-result-guard.ts +++ b/src/agents/session-tool-result-guard.ts @@ -90,6 +90,7 @@ export function installSessionToolResultGuard( * Defaults to true. */ allowSyntheticToolResults?: boolean; + missingToolResultText?: string; /** * Optional set/list of tool names accepted for assistant toolCall/toolUse blocks. * When set, tool calls with unknown names are dropped before persistence. @@ -127,6 +128,7 @@ export function installSessionToolResultGuard( }; const allowSyntheticToolResults = opts?.allowSyntheticToolResults ?? true; + const missingToolResultText = opts?.missingToolResultText; const beforeWrite = opts?.beforeMessageWriteHook; const maxToolResultChars = resolveMaxToolResultChars(opts); @@ -154,7 +156,11 @@ export function installSessionToolResultGuard( } if (allowSyntheticToolResults) { for (const [id, name] of pendingState.entries()) { - const synthetic = makeMissingToolResult({ toolCallId: id, toolName: name }); + const synthetic = makeMissingToolResult({ + toolCallId: id, + toolName: name, + text: missingToolResultText, + }); const flushed = applyBeforeWriteHook( persistToolResult(persistMessage(synthetic), { toolCallId: id, diff --git a/src/agents/session-transcript-repair.test.ts b/src/agents/session-transcript-repair.test.ts index d95cef17564..ecab4cc2485 100644 --- a/src/agents/session-transcript-repair.test.ts +++ b/src/agents/session-transcript-repair.test.ts @@ -76,6 +76,68 @@ describe("sanitizeToolUseResultPairing", () => { expect(out[3]?.role).toBe("user"); }); + it("uses custom text for synthesized missing tool results", () => { + const input = castAgentMessages([ + { + role: "assistant", + content: [{ type: "toolCall", id: "call_1", name: "read", arguments: {} }], + }, + { role: "user", content: "user message that should come after tool use" }, + ]); + + const result = repairToolUseResultPairing(input, { + missingToolResultText: "aborted", + }); + + expect(result.added).toHaveLength(1); + expect(result.messages.map((m) => m.role)).toEqual(["assistant", "toolResult", "user"]); + expect(result.added[0]?.content).toEqual([{ type: "text", text: "aborted" }]); + }); + + it("keeps matched parallel tool results and synthesizes only missing siblings", () => { + const input = castAgentMessages([ + { + role: "assistant", + content: [ + { type: "text", text: "checking" }, + { type: "toolCall", id: "call_1", name: "read", arguments: {} }, + { type: "toolCall", id: "call_2", name: "exec", arguments: {} }, + { type: "toolCall", id: "call_3", name: "write", arguments: {} }, + ], + }, + { role: "user", content: "user message that should come after tool use" }, + { + role: "toolResult", + toolCallId: "call_2", + toolName: "exec", + content: [{ type: "text", text: "ok" }], + isError: false, + }, + ]); + + const result = repairToolUseResultPairing(input, { + missingToolResultText: "aborted", + }); + + expect(result.added.map((message) => message.toolCallId)).toEqual(["call_1", "call_3"]); + expect(result.messages.map((m) => m.role)).toEqual([ + "assistant", + "toolResult", + "toolResult", + "toolResult", + "user", + ]); + expect(getAssistantToolCallBlocks(result.messages)).toMatchObject([ + { id: "call_1", name: "read" }, + { id: "call_2", name: "exec" }, + { id: "call_3", name: "write" }, + ]); + expect((result.messages[1] as { toolCallId?: string }).toolCallId).toBe("call_1"); + expect((result.messages[2] as { toolCallId?: string }).toolCallId).toBe("call_2"); + expect((result.messages[3] as { toolCallId?: string }).toolCallId).toBe("call_3"); + expect(JSON.stringify(result.added)).not.toContain("missing tool result"); + }); + it("repairs blank tool result names from matching tool calls", () => { const input = castAgentMessages([ { @@ -248,9 +310,8 @@ describe("sanitizeToolUseResultPairing", () => { }); expect(result.droppedOrphanCount).toBe(0); - expect(result.messages).toHaveLength(2); - expect(result.messages[0]?.role).toBe("assistant"); - expect(result.messages[1]?.role).toBe("user"); + expect(result.messages).toHaveLength(1); + expect(result.messages[0]?.role).toBe("user"); expect(result.added).toHaveLength(0); }); }); diff --git a/src/agents/session-transcript-repair.ts b/src/agents/session-transcript-repair.ts index 2260a29b1f0..5e651603e80 100644 --- a/src/agents/session-transcript-repair.ts +++ b/src/agents/session-transcript-repair.ts @@ -175,6 +175,12 @@ function isReplaySafeThinkingAssistantTurn( function makeMissingToolResult(params: { toolCallId: string; toolName?: string; + // OpenAI Responses/Codex replay should match upstream Codex's "aborted" + // function_call_output normalization; live coverage in + // openai-reasoning-compat.live.test.ts and tool-replay-repair.live.test.ts + // sends this repaired history to real models. Other providers keep the older, + // explicit OpenClaw diagnostic text unless the caller opts in. + text?: string; }): Extract { return { role: "toolResult", @@ -183,7 +189,9 @@ function makeMissingToolResult(params: { content: [ { type: "text", - text: "[openclaw] missing tool result in session history; inserted synthetic error result for transcript repair.", + text: + params.text ?? + "[openclaw] missing tool result in session history; inserted synthetic error result for transcript repair.", }, ], isError: true, @@ -232,6 +240,7 @@ export type ErroredAssistantResultPolicy = "preserve" | "drop"; export type ToolUseResultPairingOptions = { erroredAssistantResultPolicy?: ErroredAssistantResultPolicy; + missingToolResultText?: string; }; export function stripToolResultDetails(messages: AgentMessage[]): AgentMessage[] { @@ -529,8 +538,8 @@ export function repairToolUseResultPairing( // tool calls in the same turn after malformed siblings are dropped. const stopReason = (assistant as { stopReason?: string }).stopReason; if (stopReason === "error" || stopReason === "aborted") { - out.push(msg); if (!shouldDropErroredAssistantResults(options)) { + out.push(msg); for (const toolCall of toolCalls) { const result = spanResultsById.get(toolCall.id); if (!result) { @@ -540,6 +549,8 @@ export function repairToolUseResultPairing( } } else if (spanResultsById.size > 0) { changed = true; + } else { + changed = true; } for (const rem of remainder) { out.push(rem); @@ -551,6 +562,8 @@ export function repairToolUseResultPairing( out.push(msg); if (spanResultsById.size > 0 && remainder.length > 0) { + // Preserve real late-arriving results before synthesizing missing siblings; + // otherwise parallel tool replay can replace useful output with repair noise. moved = true; changed = true; } @@ -563,6 +576,7 @@ export function repairToolUseResultPairing( const missing = makeMissingToolResult({ toolCallId: call.id, toolName: call.name, + text: options?.missingToolResultText, }); added.push(missing); changed = true; diff --git a/src/agents/tool-replay-repair.live.test.ts b/src/agents/tool-replay-repair.live.test.ts new file mode 100644 index 00000000000..ebde652c777 --- /dev/null +++ b/src/agents/tool-replay-repair.live.test.ts @@ -0,0 +1,386 @@ +import type { AgentMessage } from "@mariozechner/pi-agent-core"; +import { completeSimple, type Api, type Context, type Model } from "@mariozechner/pi-ai"; +import { SessionManager } from "@mariozechner/pi-coding-agent"; +import { Type } from "typebox"; +import { describe, expect, it } from "vitest"; +import { loadConfig } from "../config/config.js"; +import { resolveOpenClawAgentDir } from "./agent-paths.js"; +import { isLiveProfileKeyModeEnabled, isLiveTestEnabled } from "./live-test-helpers.js"; +import { getApiKeyForModel, requireApiKey } from "./model-auth.js"; +import { ensureOpenClawModelsJson } from "./models-config.js"; +import { sanitizeSessionHistory } from "./pi-embedded-runner/replay-history.js"; +import { discoverAuthStorage, discoverModels } from "./pi-model-discovery.js"; +import { transformTransportMessages } from "./transport-message-transform.js"; + +const LIVE = isLiveTestEnabled(); +const REQUIRE_PROFILE_KEYS = isLiveProfileKeyModeEnabled(); +const LIVE_CREDENTIAL_PRECEDENCE = REQUIRE_PROFILE_KEYS ? "profile-first" : "env-first"; +const DEFAULT_TARGET_MODEL_REFS = "openai-codex/gpt-5.5,google/gemini-3-flash-preview"; +const TARGET_MODEL_REFS = parseTargetModelRefs( + process.env.OPENCLAW_LIVE_TOOL_REPLAY_REPAIR_MODELS ?? DEFAULT_TARGET_MODEL_REFS, +); +const describeLive = LIVE ? describe : describe.skip; + +type TargetModelRef = { + ref: string; + provider: string; + modelId: string; +}; + +function parseTargetModelRefs(raw: string | undefined): TargetModelRef[] { + return (raw ?? "") + .split(",") + .map((item) => item.trim()) + .filter(Boolean) + .map((ref) => { + const [provider, ...rest] = ref.split("/"); + const modelId = rest.join("/").trim(); + if (!provider?.trim() || !modelId) { + throw new Error( + `Invalid OPENCLAW_LIVE_TOOL_REPLAY_REPAIR_MODELS entry: ${JSON.stringify(ref)}`, + ); + } + return { ref, provider: provider.trim(), modelId }; + }); +} + +function logProgress(message: string): void { + process.stderr.write(`[live] ${message}\n`); +} + +async function completeSimpleWithTimeout( + model: Model, + context: Parameters>[1], + options: Parameters>[2], + timeoutMs: number, +): Promise>>> { + const controller = new AbortController(); + const abortTimer = setTimeout(() => { + controller.abort(); + }, timeoutMs); + abortTimer.unref?.(); + try { + return await Promise.race([ + completeSimple(model, context, { + ...options, + signal: controller.signal, + }), + new Promise((_, reject) => { + const hardTimer = setTimeout(() => { + reject(new Error(`model call timed out after ${timeoutMs}ms`)); + }, timeoutMs); + hardTimer.unref?.(); + }), + ]); + } finally { + clearTimeout(abortTimer); + } +} + +function isOpenAIResponsesFamily(api: string): boolean { + return ( + api === "openai-responses" || + api === "openai-codex-responses" || + api === "azure-openai-responses" + ); +} + +function buildReplayMessages(model: Model): AgentMessage[] { + const now = Date.now(); + // Gemini source metadata deliberately simulates a model switch from a + // provider-owned transcript. That forces the same id sanitization and replay + // repair path that failed in real session replays, not just the happy path for + // a same-provider synthetic fixture. + const source = + model.provider === "google" + ? { + api: "google-gemini-cli", + provider: "google-antigravity", + model: "claude-sonnet-4-20250514", + } + : { + api: model.api, + provider: model.provider, + model: model.id, + }; + + return [ + { + role: "user", + content: "Use noop.", + timestamp: now, + }, + { + role: "assistant", + provider: source.provider, + api: source.api, + model: source.model, + stopReason: "toolUse", + timestamp: now + 1, + content: [ + { type: "toolCall", id: "call_keep", name: "noop", arguments: {} }, + { type: "toolCall", id: "call_missing_a", name: "noop", arguments: {} }, + { type: "toolCall", id: "call_missing_b", name: "noop", arguments: {} }, + ], + }, + { + role: "user", + content: "Reply with exactly: replay repair ok.", + timestamp: now + 2, + }, + { + role: "toolResult", + toolCallId: "call_keep", + toolName: "noop", + content: [{ type: "text", text: "ok" }], + isError: false, + timestamp: now + 3, + }, + ] as unknown as AgentMessage[]; +} + +function buildAbortedTransportMessages(model: Model): Context["messages"] { + const now = Date.now(); + return [ + { + role: "assistant", + provider: model.provider, + api: model.api, + model: model.id, + stopReason: "aborted", + timestamp: now, + content: [{ type: "toolCall", id: "call_transport_aborted", name: "noop", arguments: {} }], + }, + { + role: "user", + content: "Reply with exactly: transport replay ok.", + timestamp: now + 1, + }, + ] as Context["messages"]; +} + +function syntheticToolResultText(message: AgentMessage): string | undefined { + if (message.role !== "toolResult") { + return undefined; + } + const first = message.content[0] as { type?: unknown; text?: unknown } | undefined; + return first?.type === "text" && typeof first.text === "string" ? first.text : undefined; +} + +function assistantToolCallIds(message: AgentMessage): string[] { + if (message.role !== "assistant") { + return []; + } + return message.content.filter((block) => block.type === "toolCall").map((block) => block.id); +} + +function isKnownLiveBlocker(errorMessage: string): boolean { + return ( + /not supported when using codex with a chatgpt account/i.test(errorMessage) || + /hit your chatgpt usage limit/i.test(errorMessage) + ); +} + +describeLive("tool replay repair live", () => { + for (const target of TARGET_MODEL_REFS) { + it( + `accepts repaired displaced and missing tool results with ${target.ref}`, + async () => { + const cfg = loadConfig(); + await ensureOpenClawModelsJson(cfg); + + const agentDir = resolveOpenClawAgentDir(); + const authStorage = discoverAuthStorage(agentDir); + const modelRegistry = discoverModels(authStorage, agentDir); + const model = modelRegistry.find(target.provider, target.modelId) as Model | null; + + if (!model) { + logProgress(`[tool-replay-repair] model missing from registry: ${target.ref}`); + return; + } + + let apiKeyInfo; + try { + apiKeyInfo = await getApiKeyForModel({ + model, + cfg, + credentialPrecedence: LIVE_CREDENTIAL_PRECEDENCE, + }); + } catch (error) { + logProgress(`[tool-replay-repair] skip ${target.ref} (${String(error)})`); + return; + } + + if (REQUIRE_PROFILE_KEYS && !apiKeyInfo.source.startsWith("profile:")) { + logProgress( + `[tool-replay-repair] skip ${target.ref} (non-profile credential source: ${apiKeyInfo.source})`, + ); + return; + } + + logProgress(`[tool-replay-repair] target=${target.ref} auth source=${apiKeyInfo.source}`); + const sanitized = await sanitizeSessionHistory({ + messages: buildReplayMessages(model), + modelApi: model.api, + provider: model.provider, + modelId: model.id, + sessionManager: SessionManager.inMemory(), + sessionId: `tool-replay-repair-live-${target.provider}-${target.modelId}`, + }); + + expect(sanitized.map((message) => message.role)).toEqual([ + "user", + "assistant", + "toolResult", + "toolResult", + "toolResult", + "user", + ]); + const assistantMessage = sanitized[1]; + expect(assistantMessage?.role).toBe("assistant"); + expect( + sanitized.slice(2, 5).map((message) => (message as { toolCallId?: string }).toolCallId), + ).toEqual(assistantToolCallIds(assistantMessage)); + + // These assertions are the model-visible contract: OpenAI Responses + // gets Codex-compatible "aborted" outputs, while Gemini proves the + // generic repair does not leak OpenAI wording into other providers. + const insertedTexts = sanitized.slice(3, 5).map(syntheticToolResultText); + if (isOpenAIResponsesFamily(model.api)) { + expect(insertedTexts).toEqual(["aborted", "aborted"]); + } else { + expect(insertedTexts).not.toContain("aborted"); + } + + // Sending the repaired transcript to the real model is the live proof: + // providers reject malformed tool-call adjacency before generation, so + // any non-error response here validates the repair shape end to end. + const response = await completeSimpleWithTimeout( + model, + { + systemPrompt: "You are a concise assistant. Follow the user's instruction exactly.", + messages: sanitized as never, + tools: [ + { + name: "noop", + description: "Return ok.", + parameters: Type.Object({}, { additionalProperties: false }), + }, + ], + }, + { + apiKey: requireApiKey(apiKeyInfo, model.provider), + reasoning: "low", + maxTokens: 96, + }, + 120_000, + ); + + const text = response.content + .filter((block) => block.type === "text") + .map((block) => block.text.trim()) + .join(" ") + .trim(); + const errorMessage = + typeof (response as { errorMessage?: unknown }).errorMessage === "string" + ? ((response as { errorMessage?: string }).errorMessage ?? "") + : ""; + if (errorMessage && isKnownLiveBlocker(errorMessage)) { + logProgress(`[tool-replay-repair] skip ${target.ref} (${errorMessage})`); + return; + } + + expect(response.stopReason).not.toBe("error"); + if (text.length > 0) { + expect(text).toMatch(/^replay repair ok\.?$/i); + } + }, + 3 * 60 * 1000, + ); + + it( + `accepts transport replay after dropping aborted assistant tool calls with ${target.ref}`, + async () => { + const cfg = loadConfig(); + await ensureOpenClawModelsJson(cfg); + + const agentDir = resolveOpenClawAgentDir(); + const authStorage = discoverAuthStorage(agentDir); + const modelRegistry = discoverModels(authStorage, agentDir); + const model = modelRegistry.find(target.provider, target.modelId) as Model | null; + + if (!model) { + logProgress(`[tool-replay-repair] model missing from registry: ${target.ref}`); + return; + } + + let apiKeyInfo; + try { + apiKeyInfo = await getApiKeyForModel({ + model, + cfg, + credentialPrecedence: LIVE_CREDENTIAL_PRECEDENCE, + }); + } catch (error) { + logProgress(`[tool-replay-repair] skip ${target.ref} (${String(error)})`); + return; + } + + if (REQUIRE_PROFILE_KEYS && !apiKeyInfo.source.startsWith("profile:")) { + logProgress( + `[tool-replay-repair] skip ${target.ref} (non-profile credential source: ${apiKeyInfo.source})`, + ); + return; + } + + const transformed = transformTransportMessages(buildAbortedTransportMessages(model), model); + expect(transformed.map((message) => message.role)).toEqual(["user"]); + expect(JSON.stringify(transformed)).not.toContain("call_transport_aborted"); + + // This is the transport replay regression proof: providers reject + // assistant(tool_call)->user replays without a matching result, so the + // dropped transcript must still be accepted by real model APIs. + const response = await completeSimpleWithTimeout( + model, + { + systemPrompt: "You are a concise assistant. Follow the user's instruction exactly.", + messages: transformed as never, + tools: [ + { + name: "noop", + description: "Return ok.", + parameters: Type.Object({}, { additionalProperties: false }), + }, + ], + }, + { + apiKey: requireApiKey(apiKeyInfo, model.provider), + reasoning: "low", + maxTokens: 96, + }, + 120_000, + ); + + const text = response.content + .filter((block) => block.type === "text") + .map((block) => block.text.trim()) + .join(" ") + .trim(); + const errorMessage = + typeof (response as { errorMessage?: unknown }).errorMessage === "string" + ? ((response as { errorMessage?: string }).errorMessage ?? "") + : ""; + if (errorMessage && isKnownLiveBlocker(errorMessage)) { + logProgress(`[tool-replay-repair] skip ${target.ref} (${errorMessage})`); + return; + } + + expect(response.stopReason).not.toBe("error"); + if (text.length > 0) { + expect(text).toMatch(/^transport replay ok\.?$/i); + } + }, + 3 * 60 * 1000, + ); + } +}); diff --git a/src/agents/transport-message-transform.test.ts b/src/agents/transport-message-transform.test.ts index f2f5b66ce5b..f185ed7cc95 100644 --- a/src/agents/transport-message-transform.test.ts +++ b/src/agents/transport-message-transform.test.ts @@ -9,20 +9,21 @@ function makeModel(api: Api, provider: string, id: string): Model { function assistantToolCall( id: string, name = "read", + stopReason: Extract["stopReason"] = "toolUse", ): Extract { return { role: "assistant", provider: "openai", api: "openai-responses", model: "gpt-5.4", - stopReason: "toolUse", + stopReason, timestamp: Date.now(), content: [{ type: "toolCall", id, name, arguments: {} }], } as Extract; } describe("transformTransportMessages synthetic tool-result policy", () => { - it("does not synthesize missing tool results for OpenAI-compatible transports", () => { + it("synthesizes Codex-style aborted tool results for OpenAI Responses transports", () => { const messages: Context["messages"] = [ assistantToolCall("call_openai_1"), { role: "user", content: "continue", timestamp: Date.now() }, @@ -33,7 +34,166 @@ describe("transformTransportMessages synthetic tool-result policy", () => { makeModel("openai-responses", "openai", "gpt-5.4"), ); - expect(result.map((msg) => msg.role)).toEqual(["assistant", "user"]); + expect(result.map((msg) => msg.role)).toEqual(["assistant", "toolResult", "user"]); + expect(result[1]).toMatchObject({ + role: "toolResult", + toolCallId: "call_openai_1", + isError: true, + content: [{ type: "text", text: "aborted" }], + }); + }); + + it("preserves real OpenAI transport results and aborts missing parallel siblings", () => { + const messages: Context["messages"] = [ + { + ...assistantToolCall("call_keep"), + content: [ + { type: "toolCall", id: "call_keep", name: "read", arguments: {} }, + { type: "toolCall", id: "call_missing", name: "exec", arguments: {} }, + ], + }, + { + role: "toolResult", + toolCallId: "call_keep", + toolName: "read", + content: [{ type: "text", text: "ok" }], + isError: false, + timestamp: Date.now(), + }, + { role: "user", content: "continue", timestamp: Date.now() }, + ]; + + const result = transformTransportMessages( + messages, + makeModel("openclaw-openai-responses-transport" as Api, "openai", "gpt-5.4"), + ); + + expect(result.map((msg) => msg.role)).toEqual([ + "assistant", + "toolResult", + "toolResult", + "user", + ]); + expect(result.slice(1, 3)).toMatchObject([ + { role: "toolResult", toolCallId: "call_keep", content: [{ type: "text", text: "ok" }] }, + { + role: "toolResult", + toolCallId: "call_missing", + content: [{ type: "text", text: "aborted" }], + }, + ]); + }); + + it("moves displaced OpenAI transport results before synthesizing missing siblings", () => { + const messages: Context["messages"] = [ + { + ...assistantToolCall("call_keep"), + content: [ + { type: "toolCall", id: "call_keep", name: "read", arguments: {} }, + { type: "toolCall", id: "call_missing", name: "exec", arguments: {} }, + ], + }, + { role: "user", content: "continue", timestamp: Date.now() }, + { + role: "toolResult", + toolCallId: "call_keep", + toolName: "read", + content: [{ type: "text", text: "late ok" }], + isError: false, + timestamp: Date.now(), + }, + ]; + + const result = transformTransportMessages( + messages, + makeModel("openai-responses", "openai", "gpt-5.4"), + ); + + expect(result.map((msg) => msg.role)).toEqual([ + "assistant", + "toolResult", + "toolResult", + "user", + ]); + expect(result.slice(1, 3)).toMatchObject([ + { role: "toolResult", toolCallId: "call_keep", content: [{ type: "text", text: "late ok" }] }, + { + role: "toolResult", + toolCallId: "call_missing", + content: [{ type: "text", text: "aborted" }], + }, + ]); + }); + + it("drops aborted OpenAI transport assistant tool calls before replay", () => { + const messages: Context["messages"] = [ + assistantToolCall("call_aborted", "exec", "aborted"), + { role: "user", content: "retry after abort", timestamp: Date.now() }, + ]; + + const result = transformTransportMessages( + messages, + makeModel("openai-responses", "openai", "gpt-5.4"), + ); + + expect(result.map((msg) => msg.role)).toEqual(["user"]); + expect(JSON.stringify(result)).not.toContain("call_aborted"); + }); + + it("drops text-only aborted and errored transport assistant turns before replay", () => { + const messages: Context["messages"] = [ + { + role: "assistant", + provider: "openai", + api: "openai-responses", + model: "gpt-5.4", + stopReason: "aborted", + timestamp: Date.now(), + content: [{ type: "text", text: "partial aborted output" }], + } as Extract, + { + role: "assistant", + provider: "openai", + api: "openai-responses", + model: "gpt-5.4", + stopReason: "error", + timestamp: Date.now(), + content: [{ type: "text", text: "partial error output" }], + } as Extract, + { role: "user", content: "retry after failed text turns", timestamp: Date.now() }, + ]; + + const result = transformTransportMessages( + messages, + makeModel("openai-responses", "openai", "gpt-5.4"), + ); + + expect(result.map((msg) => msg.role)).toEqual(["user"]); + expect(JSON.stringify(result)).not.toContain("partial aborted output"); + expect(JSON.stringify(result)).not.toContain("partial error output"); + }); + + it("drops errored Anthropic transport assistant tool calls and matching results before replay", () => { + const messages: Context["messages"] = [ + assistantToolCall("call_error", "exec", "error"), + { + role: "toolResult", + toolCallId: "call_error", + toolName: "exec", + content: [{ type: "text", text: "partial" }], + isError: true, + timestamp: Date.now(), + }, + { role: "user", content: "retry after error", timestamp: Date.now() }, + ]; + + const result = transformTransportMessages( + messages, + makeModel("anthropic-messages", "anthropic", "claude-opus-4-6"), + ); + + expect(result.map((msg) => msg.role)).toEqual(["user"]); + expect(JSON.stringify(result)).not.toContain("call_error"); }); it("still synthesizes missing tool results for Anthropic transports", () => { @@ -72,6 +232,10 @@ describe("transformTransportMessages synthetic tool-result policy", () => { makeModel("openclaw-google-generative-ai-transport" as Api, "google", "gemini-2.5-pro"), ); expect(googleAlias.map((msg) => msg.role)).toEqual(["assistant", "toolResult", "user"]); + expect(googleAlias[1]).toMatchObject({ + role: "toolResult", + content: [{ type: "text", text: "No result provided" }], + }); const bedrockCanonical = transformTransportMessages( messages, diff --git a/src/agents/transport-message-transform.ts b/src/agents/transport-message-transform.ts index 2262d014f3b..20d47f7bc70 100644 --- a/src/agents/transport-message-transform.ts +++ b/src/agents/transport-message-transform.ts @@ -1,4 +1,5 @@ import type { Api, Context, Model } from "@mariozechner/pi-ai"; +import { repairToolUseResultPairing } from "./session-transcript-repair.js"; const SYNTHETIC_TOOL_RESULT_APIS = new Set([ "anthropic-messages", @@ -6,31 +7,34 @@ const SYNTHETIC_TOOL_RESULT_APIS = new Set([ "bedrock-converse-stream", "google-generative-ai", "openclaw-google-generative-ai-transport", + "openai-responses", + "openai-codex-responses", + "azure-openai-responses", + "openclaw-openai-responses-transport", + "openclaw-azure-openai-responses-transport", ]); -type PendingToolCall = { id: string; name: string }; +// "aborted" is an OpenAI Responses-family convention from upstream Codex +// history normalization. Gemini/Anthropic transports use their own text while +// still needing synthetic results to satisfy provider turn-shape contracts; +// tool-replay-repair.live.test.ts exercises both paths against real models. +const CODEX_STYLE_ABORTED_OUTPUT_APIS = new Set([ + "openai-responses", + "openai-codex-responses", + "azure-openai-responses", + "openclaw-openai-responses-transport", + "openclaw-azure-openai-responses-transport", +]); function defaultAllowSyntheticToolResults(modelApi: Api): boolean { return SYNTHETIC_TOOL_RESULT_APIS.has(modelApi); } -function appendMissingToolResults( - result: Context["messages"], - pendingToolCalls: PendingToolCall[], - existingToolResultIds: ReadonlySet, -): void { - for (const toolCall of pendingToolCalls) { - if (!existingToolResultIds.has(toolCall.id)) { - result.push({ - role: "toolResult", - toolCallId: toolCall.id, - toolName: toolCall.name, - content: [{ type: "text", text: "No result provided" }], - isError: true, - timestamp: Date.now(), - }); - } +function isFailedAssistantTurn(message: Context["messages"][number]): boolean { + if (message.role !== "assistant") { + return false; } + return message.stopReason === "error" || message.stopReason === "aborted"; } export function transformTransportMessages( @@ -43,6 +47,9 @@ export function transformTransportMessages( ) => string, ): Context["messages"] { const allowSyntheticToolResults = defaultAllowSyntheticToolResults(model.api); + const syntheticToolResultText = CODEX_STYLE_ABORTED_OUTPUT_APIS.has(model.api) + ? "aborted" + : "No result provided"; const toolCallIdMap = new Map(); const transformed = messages.map((msg) => { if (msg.role === "user") { @@ -102,42 +109,21 @@ export function transformTransportMessages( } return { ...msg, content }; }); + // Preserve the old transport replay filter: failed streamed turns can contain + // partial text, partial tool calls, or both, and strict providers can treat + // them as valid assistant context on retry unless we drop the whole turn. + const replayable = transformed.filter((msg) => !isFailedAssistantTurn(msg)); - const result: Context["messages"] = []; - let pendingToolCalls: PendingToolCall[] = []; - let existingToolResultIds = new Set(); - for (const msg of transformed) { - if (msg.role === "assistant") { - if (allowSyntheticToolResults && pendingToolCalls.length > 0) { - appendMissingToolResults(result, pendingToolCalls, existingToolResultIds); - } - pendingToolCalls = []; - existingToolResultIds = new Set(); - if (msg.stopReason === "error" || msg.stopReason === "aborted") { - continue; - } - const toolCalls = msg.content.filter( - (block): block is Extract<(typeof msg.content)[number], { type: "toolCall" }> => - block.type === "toolCall", - ); - if (toolCalls.length > 0) { - pendingToolCalls = toolCalls.map((block) => ({ id: block.id, name: block.name })); - existingToolResultIds = new Set(); - } - result.push(msg); - continue; - } - if (msg.role === "toolResult") { - existingToolResultIds.add(msg.toolCallId); - result.push(msg); - continue; - } - if (allowSyntheticToolResults && pendingToolCalls.length > 0) { - appendMissingToolResults(result, pendingToolCalls, existingToolResultIds); - } - pendingToolCalls = []; - existingToolResultIds = new Set(); - result.push(msg); + if (!allowSyntheticToolResults) { + return replayable; } - return result; + + // PI's local transform can synthesize missing results, but it does not move + // displaced real results back before an intervening user turn. Shared repair + // handles both, while preserving the previous transport behavior of dropping + // aborted/error assistant tool-call turns before replaying strict providers. + return repairToolUseResultPairing(replayable, { + erroredAssistantResultPolicy: "drop", + missingToolResultText: syntheticToolResultText, + }).messages as Context["messages"]; }