fix(agents): repair strict provider tool replay

2026-05-06 11:50:43 +00:00 · 2026-04-25 05:52:33 +01:00
parent e31aef7e19
commit 7f6452897e
16 changed files with 1091 additions and 73 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -75,6 +75,7 @@ Docs: https://docs.openclaw.ai
 - Gateway/tools: allow `POST /tools/invoke` to reach plugin-backed catalog tools such as `browser` when no core implementation exists, while still preferring built-in tools for real core names. Thanks @chat2way.
 - Browser/security: require `operator.admin` for the `browser.request` gateway method, matching the host/browser-node control authority exposed by that route. Thanks @RichardCao.
 - Browser/profiles: allow local managed profiles to override `browser.executablePath`, so different profiles can launch different Chromium-based browsers. Thanks @nobrainer-tech.
+- Agents/replay: repair displaced or missing tool results before strict provider replay, use Codex-compatible `aborted` outputs for OpenAI Responses history, and drop partial aborted/error transport turns before retries.
 - Reply media: allow sandboxed replies to deliver OpenClaw-managed `media/outbound` and `media/tool-*` attachments without treating them as sandbox escapes, while keeping alias-escape checks on the managed media root. Fixes #71138. Thanks @mayor686, @truffle-dev, and @neeravmakwana.
 - CLI/agent: keep `openclaw agent --json` stdout reserved for the JSON response by routing gateway, plugin, and embedded-fallback diagnostics to stderr before execution starts. Fixes #71319.
 - Agents/Gemini: retry reasoning-only, empty, and planning-only Gemini turns instead of letting sessions silently stall. Fixes #71074. (#71362) Thanks @neeravmakwana.
--- a/docs/reference/transcript-hygiene.md
+++ b/docs/reference/transcript-hygiene.md
@@ -114,9 +114,9 @@ external end-user instructions.
 - Image sanitization only.
 - Drop orphaned reasoning signatures (standalone reasoning items without a following content block) for OpenAI Responses/Codex transcripts, and drop replayable OpenAI reasoning after a model route switch.
 - No tool call id sanitization.
- No tool result pairing repair.
+- Tool result pairing repair may move real matched outputs and synthesize Codex-style `aborted` outputs for missing tool calls.
 - No turn validation or reordering.
- No synthetic tool results.
+- Missing OpenAI Responses-family tool outputs are synthesized as `aborted` to match Codex replay normalization.
 - No thought signature stripping.

 **Google (Generative AI / Gemini CLI / Antigravity)**
--- a/src/agents/openai-reasoning-compat.live.test.ts
+++ b/src/agents/openai-reasoning-compat.live.test.ts
@@ -1,10 +1,14 @@
+import type { AgentMessage } from "@mariozechner/pi-agent-core";
 import { completeSimple, type Api, type Model } from "@mariozechner/pi-ai";
+import { SessionManager } from "@mariozechner/pi-coding-agent";
+import { Type } from "typebox";
 import { describe, expect, it } from "vitest";
 import { loadConfig } from "../config/config.js";
 import { resolveOpenClawAgentDir } from "./agent-paths.js";
 import { isLiveProfileKeyModeEnabled, isLiveTestEnabled } from "./live-test-helpers.js";
 import { getApiKeyForModel, requireApiKey } from "./model-auth.js";
 import { ensureOpenClawModelsJson } from "./models-config.js";
+import { sanitizeSessionHistory } from "./pi-embedded-runner/replay-history.js";
 import { discoverAuthStorage, discoverModels } from "./pi-model-discovery.js";

 const LIVE = isLiveTestEnabled();
@@ -169,4 +173,141 @@ describeLive("openai reasoning compat live", () => {
    },
    3 * 60 * 1000,
  );
+
+  it(
+    "accepts repaired OpenAI Codex parallel tool replay with aborted missing results",
+    async () => {
+      const { provider, modelId } = resolveTargetModelRef();
+      const cfg = loadConfig();
+      await ensureOpenClawModelsJson(cfg);
+
+      const agentDir = resolveOpenClawAgentDir();
+      const authStorage = discoverAuthStorage(agentDir);
+      const modelRegistry = discoverModels(authStorage, agentDir);
+      const model = modelRegistry.find(provider, modelId) as Model<Api> | null;
+
+      if (!model) {
+        logProgress(`[openai-reasoning-compat] model missing from registry: ${TARGET_MODEL_REF}`);
+        return;
+      }
+
+      let apiKeyInfo;
+      try {
+        apiKeyInfo = await getApiKeyForModel({
+          model,
+          cfg,
+          credentialPrecedence: LIVE_CREDENTIAL_PRECEDENCE,
+        });
+      } catch (error) {
+        logProgress(`[openai-reasoning-compat] skip (${String(error)})`);
+        return;
+      }
+
+      if (REQUIRE_PROFILE_KEYS && !apiKeyInfo.source.startsWith("profile:")) {
+        logProgress(
+          `[openai-reasoning-compat] skip (non-profile credential source: ${apiKeyInfo.source})`,
+        );
+        return;
+      }
+
+      const messages = [
+        {
+          role: "user",
+          content: "Use noop.",
+          timestamp: Date.now(),
+        },
+        {
+          role: "assistant",
+          provider: model.provider,
+          api: model.api,
+          model: model.id,
+          stopReason: "toolUse",
+          timestamp: Date.now(),
+          content: [
+            { type: "toolCall", id: "call_keep", name: "noop", arguments: {} },
+            { type: "toolCall", id: "call_missing_a", name: "noop", arguments: {} },
+            { type: "toolCall", id: "call_missing_b", name: "noop", arguments: {} },
+          ],
+        },
+        {
+          role: "user",
+          content: "Reply with exactly: replay ok.",
+          timestamp: Date.now(),
+        },
+        {
+          role: "toolResult",
+          toolCallId: "call_keep",
+          toolName: "noop",
+          content: [{ type: "text", text: "ok" }],
+          isError: false,
+          timestamp: Date.now(),
+        },
+      ] as unknown as AgentMessage[];
+
+      const sanitized = await sanitizeSessionHistory({
+        messages,
+        modelApi: model.api,
+        provider: model.provider,
+        modelId: model.id,
+        sessionManager: SessionManager.inMemory(),
+        sessionId: "openai-codex-tool-replay-live",
+      });
+
+      expect(sanitized.map((message) => message.role)).toEqual([
+        "user",
+        "assistant",
+        "toolResult",
+        "toolResult",
+        "toolResult",
+        "user",
+      ]);
+      expect(
+        sanitized.slice(2, 5).map((message) => (message as { toolCallId?: string }).toolCallId),
+      ).toEqual(["call_keep", "call_missing_a", "call_missing_b"]);
+      expect(
+        sanitized
+          .slice(3, 5)
+          .map((message) => (message as Extract<AgentMessage, { role: "toolResult" }>).content),
+      ).toEqual([[{ type: "text", text: "aborted" }], [{ type: "text", text: "aborted" }]]);
+      expect(JSON.stringify(sanitized)).not.toContain("missing tool result");
+
+      const response = await completeSimpleWithTimeout(
+        model,
+        {
+          systemPrompt: "You are a concise assistant. Follow the user's instruction exactly.",
+          messages: sanitized as never,
+          tools: [
+            {
+              name: "noop",
+              description: "Return ok.",
+              parameters: Type.Object({}, { additionalProperties: false }),
+            },
+          ],
+        },
+        {
+          apiKey: requireApiKey(apiKeyInfo, model.provider),
+          reasoning: "low",
+          maxTokens: 64,
+        },
+        120_000,
+      );
+
+      const text = response.content
+        .filter((block) => block.type === "text")
+        .map((block) => block.text.trim())
+        .join(" ")
+        .trim();
+      const errorMessage =
+        typeof (response as { errorMessage?: unknown }).errorMessage === "string"
+          ? ((response as { errorMessage?: string }).errorMessage ?? "")
+          : "";
+      if (errorMessage && isKnownLiveBlocker(errorMessage)) {
+        logProgress(`[openai-reasoning-compat] skip (${errorMessage})`);
+        return;
+      }
+
+      expect(text).toMatch(/^replay ok\.?$/i);
+    },
+    3 * 60 * 1000,
+  );
 });
--- a/src/agents/pi-embedded-runner.sanitize-session-history.test.ts
+++ b/src/agents/pi-embedded-runner.sanitize-session-history.test.ts
@@ -688,20 +688,181 @@ describe("sanitizeSessionHistory", () => {
    expect(result[1]?.role).toBe("assistant");
  });

-  it("synthesizes missing tool results for openai-responses after repair", async () => {
+  it("synthesizes Codex-style aborted tool results for openai-responses after repair", async () => {
    const messages: AgentMessage[] = [
+      makeUserMessage("start"),
      makeAssistantMessage([{ type: "toolCall", id: "call_1", name: "read", arguments: {} }], {
        stopReason: "toolUse",
      }),
+      makeUserMessage("continue"),
+    ];
+
+    const result = await sanitizeOpenAIHistory(messages);
+
+    expect(result.map((message) => message.role)).toEqual([
+      "user",
+      "assistant",
+      "toolResult",
+      "user",
+    ]);
+    expect((result[2] as { toolCallId?: string }).toolCallId).toBe("call1");
+    expect((result[2] as Extract<AgentMessage, { role: "toolResult" }>).content).toEqual([
+      { type: "text", text: "aborted" },
+    ]);
+    expect(JSON.stringify(result)).not.toContain("missing tool result");
+  });
+
+  it("synthesizes Codex-style aborted tool results for openai-codex-responses", async () => {
+    const messages: AgentMessage[] = [
+      makeAssistantMessage(
+        [
+          { type: "toolCall", id: "call_a", name: "exec", arguments: {} },
+          { type: "toolCall", id: "call_b", name: "exec", arguments: {} },
+          { type: "toolCall", id: "call_c", name: "exec", arguments: {} },
+        ],
+        { stopReason: "toolUse" },
+      ),
+      makeUserMessage("status?"),
+    ];
+
+    const result = await sanitizeSessionHistory({
+      messages,
+      modelApi: "openai-codex-responses",
+      provider: "openai-codex",
+      sessionManager: mockSessionManager,
+      sessionId: TEST_SESSION_ID,
+    });
+
+    expect(result.map((message) => message.role)).toEqual([
+      "assistant",
+      "toolResult",
+      "toolResult",
+      "toolResult",
+      "user",
+    ]);
+    expect(
+      result.slice(1, 4).map((message) => (message as { toolCallId?: string }).toolCallId),
+    ).toEqual(["calla", "callb", "callc"]);
+    for (const message of result.slice(1, 4)) {
+      expect((message as Extract<AgentMessage, { role: "toolResult" }>).content).toEqual([
+        { type: "text", text: "aborted" },
+      ]);
+    }
+    expect(JSON.stringify(result)).not.toContain("missing tool result");
+  });
+
+  it("keeps real parallel tool results for openai-responses and aborts missing siblings", async () => {
+    const messages: AgentMessage[] = [
+      makeAssistantMessage(
+        [
+          { type: "toolCall", id: "call_1", name: "read", arguments: {} },
+          { type: "toolCall", id: "call_2", name: "exec", arguments: {} },
+          { type: "toolCall", id: "call_3", name: "write", arguments: {} },
+        ],
+        { stopReason: "toolUse" },
+      ),
+      makeUserMessage("continue"),
+      castAgentMessage({
+        role: "toolResult",
+        toolCallId: "call_2",
+        toolName: "exec",
+        content: [{ type: "text", text: "ok" }],
+        isError: false,
+      }),
    ];

    const result = await sanitizeOpenAIHistory(messages);

-    // repairToolUseResultPairing now runs for all providers (including OpenAI)
-    // to fix orphaned function_call_output items that OpenAI would reject.
-    expect(result).toHaveLength(2);
-    expect(result[0]?.role).toBe("assistant");
-    expect(result[1]?.role).toBe("toolResult");
+    expect(result.map((message) => message.role)).toEqual([
+      "assistant",
+      "toolResult",
+      "toolResult",
+      "toolResult",
+      "user",
+    ]);
+    expect(
+      extractToolCallsFromAssistant(result[0] as Extract<AgentMessage, { role: "assistant" }>),
+    ).toMatchObject([
+      { id: "call1", name: "read" },
+      { id: "call2", name: "exec" },
+      { id: "call3", name: "write" },
+    ]);
+    expect(
+      result.slice(1, 4).map((message) => (message as { toolCallId?: string }).toolCallId),
+    ).toEqual(["call1", "call2", "call3"]);
+    expect((result[1] as Extract<AgentMessage, { role: "toolResult" }>).content).toEqual([
+      { type: "text", text: "aborted" },
+    ]);
+    expect((result[2] as Extract<AgentMessage, { role: "toolResult" }>).content).toEqual([
+      { type: "text", text: "ok" },
+    ]);
+    expect((result[3] as Extract<AgentMessage, { role: "toolResult" }>).content).toEqual([
+      { type: "text", text: "aborted" },
+    ]);
+    expect(JSON.stringify(result)).not.toContain("missing tool result");
+  });
+
+  it("applies aborted missing-result repair to azure-openai-responses", async () => {
+    const messages: AgentMessage[] = [
+      makeAssistantMessage([{ type: "toolCall", id: "call_azure", name: "read", arguments: {} }], {
+        stopReason: "toolUse",
+      }),
+      makeUserMessage("continue"),
+    ];
+
+    const result = await sanitizeSessionHistory({
+      messages,
+      modelApi: "azure-openai-responses",
+      provider: "azure-openai-responses",
+      sessionManager: mockSessionManager,
+      sessionId: TEST_SESSION_ID,
+    });
+
+    expect(result.map((message) => message.role)).toEqual(["assistant", "toolResult", "user"]);
+    expect((result[1] as { toolCallId?: string }).toolCallId).toBe("callazure");
+    expect((result[1] as Extract<AgentMessage, { role: "toolResult" }>).content).toEqual([
+      { type: "text", text: "aborted" },
+    ]);
+  });
+
+  it("drops duplicate and orphan OpenAI outputs while preserving the first real result", async () => {
+    const messages: AgentMessage[] = [
+      castAgentMessage({
+        role: "toolResult",
+        toolCallId: "call_orphan",
+        toolName: "read",
+        content: [{ type: "text", text: "orphan" }],
+        isError: false,
+      }),
+      makeAssistantMessage([{ type: "toolCall", id: "call_keep", name: "read", arguments: {} }], {
+        stopReason: "toolUse",
+      }),
+      castAgentMessage({
+        role: "toolResult",
+        toolCallId: "call_keep",
+        toolName: "read",
+        content: [{ type: "text", text: "first" }],
+        isError: false,
+      }),
+      castAgentMessage({
+        role: "toolResult",
+        toolCallId: "call_keep",
+        toolName: "read",
+        content: [{ type: "text", text: "duplicate" }],
+        isError: false,
+      }),
+      makeUserMessage("continue"),
+    ];
+
+    const result = await sanitizeOpenAIHistory(messages);
+
+    expect(result.map((message) => message.role)).toEqual(["assistant", "toolResult", "user"]);
+    expect((result[1] as { toolCallId?: string }).toolCallId).toBe("callkeep");
+    expect((result[1] as Extract<AgentMessage, { role: "toolResult" }>).content).toEqual([
+      { type: "text", text: "first" },
+    ]);
+    expect(JSON.stringify(result)).not.toContain("orphan");
+    expect(JSON.stringify(result)).not.toContain("duplicate");
  });

  it.each([
--- a/src/agents/pi-embedded-runner/compact.ts
+++ b/src/agents/pi-embedded-runner/compact.ts
@@ -810,6 +810,12 @@ export async function compactEmbeddedPiSessionDirect(
        config: params.config,
        contextWindowTokens: ctxInfo.tokens,
        allowSyntheticToolResults: transcriptPolicy.allowSyntheticToolResults,
+        missingToolResultText:
+          model.api === "openai-responses" ||
+          model.api === "azure-openai-responses" ||
+          model.api === "openai-codex-responses"
+            ? "aborted"
+            : undefined,
        allowedToolNames,
      });
      checkpointSnapshot = captureCompactionCheckpointSnapshot({
@@ -965,6 +971,11 @@ export async function compactEmbeddedPiSessionDirect(
          const limited = transcriptPolicy.repairToolUseResultPairing
            ? sanitizeToolUseResultPairing(truncated, {
                erroredAssistantResultPolicy: "drop",
+                ...(model.api === "openai-responses" ||
+                model.api === "azure-openai-responses" ||
+                model.api === "openai-codex-responses"
+                  ? { missingToolResultText: "aborted" }
+                  : {}),
              })
            : truncated;
          if (limited.length > 0) {
--- a/src/agents/pi-embedded-runner/replay-history.ts
+++ b/src/agents/pi-embedded-runner/replay-history.ts
@@ -493,13 +493,17 @@ export async function sanitizeSessionHistory(params: {
    allowedToolNames: params.allowedToolNames,
    allowProviderOwnedThinkingReplay,
  });
-  // OpenAI's fc_* pairing downgrade needs the raw call_id|fc_id separator intact,
-  // but displaced tool results must first be repaired back next to their
-  // assistant turn so the downgrade can rewrite both sides consistently.
+  // OpenAI Responses rejects orphan/missing function_call_output items. Upstream
+  // Codex repairs those gaps with "aborted"; keep that before the fc_* downgrade
+  // so both call and result ids are rewritten together. Covered by unit replay
+  // tests plus live OpenAI/Codex and generic replay-repair model tests.
  const openAIRepairedToolCalls =
    isOpenAIResponsesApi && policy.repairToolUseResultPairing
      ? sanitizeToolUseResultPairing(sanitizedToolCalls, {
          erroredAssistantResultPolicy: "drop",
+          // Match upstream Codex history normalization for OpenAI Responses:
+          // missing function_call_output entries are model-visible "aborted".
+          missingToolResultText: "aborted",
        })
      : sanitizedToolCalls;
  const openAISafeToolCalls = isOpenAIResponsesApi
@@ -517,6 +521,9 @@ export async function sanitizeSessionHistory(params: {
          allowedToolNames: params.allowedToolNames,
        })
      : openAISafeToolCalls;
+  // Gemini/Anthropic-class providers also require tool results to stay adjacent
+  // to their assistant tool calls. They do not use Codex's "aborted" text, but
+  // the same ordering repair is live-tested with Gemini 3 Flash.
  const repairedTools =
    !isOpenAIResponsesApi && policy.repairToolUseResultPairing
      ? sanitizeToolUseResultPairing(sanitizedToolIds, {
--- a/src/agents/pi-embedded-runner/run/attempt.tool-call-normalization.test.ts
+++ b/src/agents/pi-embedded-runner/run/attempt.tool-call-normalization.test.ts
@@ -61,6 +61,65 @@ describe("sanitizeReplayToolCallIdsForStream", () => {
    ]);
  });

+  it("synthesizes missing tool results after strict id sanitization", () => {
+    const rawId = "call_function_av7cbkigmk7x1";
+    const out = sanitizeReplayToolCallIdsForStream({
+      messages: [
+        {
+          role: "assistant",
+          content: [
+            { type: "toolUse", id: rawId, name: "read", input: { path: "." } },
+            { type: "toolUse", id: "call_missing", name: "exec", input: { cmd: "true" } },
+          ],
+        } as never,
+        {
+          role: "toolResult",
+          toolCallId: rawId,
+          toolUseId: rawId,
+          toolName: "read",
+          content: [{ type: "text", text: "ok" }],
+          isError: false,
+        } as never,
+      ],
+      mode: "strict",
+      repairToolUseResultPairing: true,
+    });
+
+    expect(out.map((message) => message.role)).toEqual(["assistant", "toolResult", "toolResult"]);
+    expect((out[0] as Extract<AgentMessage, { role: "assistant" }>).content).toMatchObject([
+      { type: "toolUse", id: "callfunctionav7cbkigmk7x1", name: "read" },
+      { type: "toolUse", id: "callmissing", name: "exec" },
+    ]);
+    expect(out[1]).toMatchObject({
+      role: "toolResult",
+      toolCallId: "callfunctionav7cbkigmk7x1",
+      toolUseId: "callfunctionav7cbkigmk7x1",
+    });
+    expect(out[2]).toMatchObject({
+      role: "toolResult",
+      toolCallId: "callmissing",
+      isError: true,
+    });
+  });
+
+  it("synthesizes missing tool results when repair is enabled", () => {
+    const out = sanitizeReplayToolCallIdsForStream({
+      messages: [
+        {
+          role: "assistant",
+          content: [{ type: "toolUse", id: "call_missing", name: "exec", input: { cmd: "true" } }],
+        } as never,
+      ],
+      mode: "strict",
+      repairToolUseResultPairing: true,
+    });
+
+    expect(out).toMatchObject([
+      { role: "assistant" },
+      { role: "toolResult", toolCallId: "callmissing", isError: true },
+    ]);
+  });
+
  it("keeps real tool results for aborted assistant spans", () => {
    const rawId = "call_function_av7cbkigmk7x1";
    const out = sanitizeReplayToolCallIdsForStream({
--- a/src/agents/pi-embedded-runner/run/attempt.ts
+++ b/src/agents/pi-embedded-runner/run/attempt.ts
@@ -1193,6 +1193,12 @@ export async function runEmbeddedAttempt(
        contextWindowTokens: params.contextTokenBudget,
        inputProvenance: params.inputProvenance,
        allowSyntheticToolResults: transcriptPolicy.allowSyntheticToolResults,
+        missingToolResultText:
+          params.model.api === "openai-responses" ||
+          params.model.api === "azure-openai-responses" ||
+          params.model.api === "openai-codex-responses"
+            ? "aborted"
+            : undefined,
        allowedToolNames,
      });
      trackSessionManagerAccess(params.sessionFile);
@@ -1840,6 +1846,7 @@ export async function runEmbeddedAttempt(
        const limited = transcriptPolicy.repairToolUseResultPairing
          ? sanitizeToolUseResultPairing(truncated, {
              erroredAssistantResultPolicy: "drop",
+              ...(isOpenAIResponsesApi ? { missingToolResultText: "aborted" } : {}),
            })
          : truncated;
        cacheTrace?.recordStage("session:limited", { messages: limited });
--- a/src/agents/session-tool-result-guard-wrapper.ts
+++ b/src/agents/session-tool-result-guard-wrapper.ts
@@ -29,6 +29,7 @@ export function guardSessionManager(
    contextWindowTokens?: number;
    inputProvenance?: InputProvenance;
    allowSyntheticToolResults?: boolean;
+    missingToolResultText?: string;
    allowedToolNames?: Iterable<string>;
  },
 ): GuardedSessionManager {
@@ -75,6 +76,7 @@ export function guardSessionManager(
      applyInputProvenanceToUserMessage(message, opts?.inputProvenance),
    transformToolResultForPersistence: transform,
    allowSyntheticToolResults: opts?.allowSyntheticToolResults,
+    missingToolResultText: opts?.missingToolResultText,
    allowedToolNames: opts?.allowedToolNames,
    beforeMessageWriteHook: beforeMessageWrite,
    maxToolResultChars:
--- a/src/agents/session-tool-result-guard.test.ts
+++ b/src/agents/session-tool-result-guard.test.ts
@@ -111,6 +111,18 @@ describe("installSessionToolResultGuard", () => {
    expectPersistedRoles(sm, ["assistant", "toolResult"]);
  });

+  it("uses configured text for synthetic tool results", () => {
+    const sm = SessionManager.inMemory();
+    const guard = installSessionToolResultGuard(sm, {
+      missingToolResultText: "aborted",
+    });
+
+    sm.appendMessage(toolCallMessage);
+    guard.flushPendingToolResults();
+
+    expect(getToolResultText(getPersistedMessages(sm))).toBe("aborted");
+  });
+
  it("clears pending tool calls without inserting synthetic tool results", () => {
    const sm = SessionManager.inMemory();
    const guard = installSessionToolResultGuard(sm);
--- a/src/agents/session-tool-result-guard.ts
+++ b/src/agents/session-tool-result-guard.ts
@@ -90,6 +90,7 @@ export function installSessionToolResultGuard(
     * Defaults to true.
     */
    allowSyntheticToolResults?: boolean;
+    missingToolResultText?: string;
    /**
     * Optional set/list of tool names accepted for assistant toolCall/toolUse blocks.
     * When set, tool calls with unknown names are dropped before persistence.
@@ -127,6 +128,7 @@ export function installSessionToolResultGuard(
  };

  const allowSyntheticToolResults = opts?.allowSyntheticToolResults ?? true;
+  const missingToolResultText = opts?.missingToolResultText;
  const beforeWrite = opts?.beforeMessageWriteHook;
  const maxToolResultChars = resolveMaxToolResultChars(opts);

@@ -154,7 +156,11 @@ export function installSessionToolResultGuard(
    }
    if (allowSyntheticToolResults) {
      for (const [id, name] of pendingState.entries()) {
-        const synthetic = makeMissingToolResult({ toolCallId: id, toolName: name });
+        const synthetic = makeMissingToolResult({
+          toolCallId: id,
+          toolName: name,
+          text: missingToolResultText,
+        });
        const flushed = applyBeforeWriteHook(
          persistToolResult(persistMessage(synthetic), {
            toolCallId: id,
--- a/src/agents/session-transcript-repair.test.ts
+++ b/src/agents/session-transcript-repair.test.ts
@@ -76,6 +76,68 @@ describe("sanitizeToolUseResultPairing", () => {
    expect(out[3]?.role).toBe("user");
  });

+  it("uses custom text for synthesized missing tool results", () => {
+    const input = castAgentMessages([
+      {
+        role: "assistant",
+        content: [{ type: "toolCall", id: "call_1", name: "read", arguments: {} }],
+      },
+      { role: "user", content: "user message that should come after tool use" },
+    ]);
+
+    const result = repairToolUseResultPairing(input, {
+      missingToolResultText: "aborted",
+    });
+
+    expect(result.added).toHaveLength(1);
+    expect(result.messages.map((m) => m.role)).toEqual(["assistant", "toolResult", "user"]);
+    expect(result.added[0]?.content).toEqual([{ type: "text", text: "aborted" }]);
+  });
+
+  it("keeps matched parallel tool results and synthesizes only missing siblings", () => {
+    const input = castAgentMessages([
+      {
+        role: "assistant",
+        content: [
+          { type: "text", text: "checking" },
+          { type: "toolCall", id: "call_1", name: "read", arguments: {} },
+          { type: "toolCall", id: "call_2", name: "exec", arguments: {} },
+          { type: "toolCall", id: "call_3", name: "write", arguments: {} },
+        ],
+      },
+      { role: "user", content: "user message that should come after tool use" },
+      {
+        role: "toolResult",
+        toolCallId: "call_2",
+        toolName: "exec",
+        content: [{ type: "text", text: "ok" }],
+        isError: false,
+      },
+    ]);
+
+    const result = repairToolUseResultPairing(input, {
+      missingToolResultText: "aborted",
+    });
+
+    expect(result.added.map((message) => message.toolCallId)).toEqual(["call_1", "call_3"]);
+    expect(result.messages.map((m) => m.role)).toEqual([
+      "assistant",
+      "toolResult",
+      "toolResult",
+      "toolResult",
+      "user",
+    ]);
+    expect(getAssistantToolCallBlocks(result.messages)).toMatchObject([
+      { id: "call_1", name: "read" },
+      { id: "call_2", name: "exec" },
+      { id: "call_3", name: "write" },
+    ]);
+    expect((result.messages[1] as { toolCallId?: string }).toolCallId).toBe("call_1");
+    expect((result.messages[2] as { toolCallId?: string }).toolCallId).toBe("call_2");
+    expect((result.messages[3] as { toolCallId?: string }).toolCallId).toBe("call_3");
+    expect(JSON.stringify(result.added)).not.toContain("missing tool result");
+  });
+
  it("repairs blank tool result names from matching tool calls", () => {
    const input = castAgentMessages([
      {
@@ -248,9 +310,8 @@ describe("sanitizeToolUseResultPairing", () => {
    });

    expect(result.droppedOrphanCount).toBe(0);
-    expect(result.messages).toHaveLength(2);
-    expect(result.messages[0]?.role).toBe("assistant");
-    expect(result.messages[1]?.role).toBe("user");
+    expect(result.messages).toHaveLength(1);
+    expect(result.messages[0]?.role).toBe("user");
    expect(result.added).toHaveLength(0);
  });
 });
--- a/src/agents/session-transcript-repair.ts
+++ b/src/agents/session-transcript-repair.ts
@@ -175,6 +175,12 @@ function isReplaySafeThinkingAssistantTurn(
 function makeMissingToolResult(params: {
  toolCallId: string;
  toolName?: string;
+  // OpenAI Responses/Codex replay should match upstream Codex's "aborted"
+  // function_call_output normalization; live coverage in
+  // openai-reasoning-compat.live.test.ts and tool-replay-repair.live.test.ts
+  // sends this repaired history to real models. Other providers keep the older,
+  // explicit OpenClaw diagnostic text unless the caller opts in.
+  text?: string;
 }): Extract<AgentMessage, { role: "toolResult" }> {
  return {
    role: "toolResult",
@@ -183,7 +189,9 @@ function makeMissingToolResult(params: {
    content: [
      {
        type: "text",
-        text: "[openclaw] missing tool result in session history; inserted synthetic error result for transcript repair.",
+        text:
+          params.text ??
+          "[openclaw] missing tool result in session history; inserted synthetic error result for transcript repair.",
      },
    ],
    isError: true,
@@ -232,6 +240,7 @@ export type ErroredAssistantResultPolicy = "preserve" | "drop";

 export type ToolUseResultPairingOptions = {
  erroredAssistantResultPolicy?: ErroredAssistantResultPolicy;
+  missingToolResultText?: string;
 };

 export function stripToolResultDetails(messages: AgentMessage[]): AgentMessage[] {
@@ -529,8 +538,8 @@ export function repairToolUseResultPairing(
    // tool calls in the same turn after malformed siblings are dropped.
    const stopReason = (assistant as { stopReason?: string }).stopReason;
    if (stopReason === "error" || stopReason === "aborted") {
-      out.push(msg);
      if (!shouldDropErroredAssistantResults(options)) {
+        out.push(msg);
        for (const toolCall of toolCalls) {
          const result = spanResultsById.get(toolCall.id);
          if (!result) {
@@ -540,6 +549,8 @@ export function repairToolUseResultPairing(
        }
      } else if (spanResultsById.size > 0) {
        changed = true;
+      } else {
+        changed = true;
      }
      for (const rem of remainder) {
        out.push(rem);
@@ -551,6 +562,8 @@ export function repairToolUseResultPairing(
    out.push(msg);

    if (spanResultsById.size > 0 && remainder.length > 0) {
+      // Preserve real late-arriving results before synthesizing missing siblings;
+      // otherwise parallel tool replay can replace useful output with repair noise.
      moved = true;
      changed = true;
    }
@@ -563,6 +576,7 @@ export function repairToolUseResultPairing(
        const missing = makeMissingToolResult({
          toolCallId: call.id,
          toolName: call.name,
+          text: options?.missingToolResultText,
        });
        added.push(missing);
        changed = true;
--- a/src/agents/tool-replay-repair.live.test.ts
+++ b/src/agents/tool-replay-repair.live.test.ts
@@ -0,0 +1,386 @@
+import type { AgentMessage } from "@mariozechner/pi-agent-core";
+import { completeSimple, type Api, type Context, type Model } from "@mariozechner/pi-ai";
+import { SessionManager } from "@mariozechner/pi-coding-agent";
+import { Type } from "typebox";
+import { describe, expect, it } from "vitest";
+import { loadConfig } from "../config/config.js";
+import { resolveOpenClawAgentDir } from "./agent-paths.js";
+import { isLiveProfileKeyModeEnabled, isLiveTestEnabled } from "./live-test-helpers.js";
+import { getApiKeyForModel, requireApiKey } from "./model-auth.js";
+import { ensureOpenClawModelsJson } from "./models-config.js";
+import { sanitizeSessionHistory } from "./pi-embedded-runner/replay-history.js";
+import { discoverAuthStorage, discoverModels } from "./pi-model-discovery.js";
+import { transformTransportMessages } from "./transport-message-transform.js";
+
+const LIVE = isLiveTestEnabled();
+const REQUIRE_PROFILE_KEYS = isLiveProfileKeyModeEnabled();
+const LIVE_CREDENTIAL_PRECEDENCE = REQUIRE_PROFILE_KEYS ? "profile-first" : "env-first";
+const DEFAULT_TARGET_MODEL_REFS = "openai-codex/gpt-5.5,google/gemini-3-flash-preview";
+const TARGET_MODEL_REFS = parseTargetModelRefs(
+  process.env.OPENCLAW_LIVE_TOOL_REPLAY_REPAIR_MODELS ?? DEFAULT_TARGET_MODEL_REFS,
+);
+const describeLive = LIVE ? describe : describe.skip;
+
+type TargetModelRef = {
+  ref: string;
+  provider: string;
+  modelId: string;
+};
+
+function parseTargetModelRefs(raw: string | undefined): TargetModelRef[] {
+  return (raw ?? "")
+    .split(",")
+    .map((item) => item.trim())
+    .filter(Boolean)
+    .map((ref) => {
+      const [provider, ...rest] = ref.split("/");
+      const modelId = rest.join("/").trim();
+      if (!provider?.trim() || !modelId) {
+        throw new Error(
+          `Invalid OPENCLAW_LIVE_TOOL_REPLAY_REPAIR_MODELS entry: ${JSON.stringify(ref)}`,
+        );
+      }
+      return { ref, provider: provider.trim(), modelId };
+    });
+}
+
+function logProgress(message: string): void {
+  process.stderr.write(`[live] ${message}\n`);
+}
+
+async function completeSimpleWithTimeout<TApi extends Api>(
+  model: Model<TApi>,
+  context: Parameters<typeof completeSimple<TApi>>[1],
+  options: Parameters<typeof completeSimple<TApi>>[2],
+  timeoutMs: number,
+): Promise<Awaited<ReturnType<typeof completeSimple<TApi>>>> {
+  const controller = new AbortController();
+  const abortTimer = setTimeout(() => {
+    controller.abort();
+  }, timeoutMs);
+  abortTimer.unref?.();
+  try {
+    return await Promise.race([
+      completeSimple(model, context, {
+        ...options,
+        signal: controller.signal,
+      }),
+      new Promise<never>((_, reject) => {
+        const hardTimer = setTimeout(() => {
+          reject(new Error(`model call timed out after ${timeoutMs}ms`));
+        }, timeoutMs);
+        hardTimer.unref?.();
+      }),
+    ]);
+  } finally {
+    clearTimeout(abortTimer);
+  }
+}
+
+function isOpenAIResponsesFamily(api: string): boolean {
+  return (
+    api === "openai-responses" ||
+    api === "openai-codex-responses" ||
+    api === "azure-openai-responses"
+  );
+}
+
+function buildReplayMessages(model: Model<Api>): AgentMessage[] {
+  const now = Date.now();
+  // Gemini source metadata deliberately simulates a model switch from a
+  // provider-owned transcript. That forces the same id sanitization and replay
+  // repair path that failed in real session replays, not just the happy path for
+  // a same-provider synthetic fixture.
+  const source =
+    model.provider === "google"
+      ? {
+          api: "google-gemini-cli",
+          provider: "google-antigravity",
+          model: "claude-sonnet-4-20250514",
+        }
+      : {
+          api: model.api,
+          provider: model.provider,
+          model: model.id,
+        };
+
+  return [
+    {
+      role: "user",
+      content: "Use noop.",
+      timestamp: now,
+    },
+    {
+      role: "assistant",
+      provider: source.provider,
+      api: source.api,
+      model: source.model,
+      stopReason: "toolUse",
+      timestamp: now + 1,
+      content: [
+        { type: "toolCall", id: "call_keep", name: "noop", arguments: {} },
+        { type: "toolCall", id: "call_missing_a", name: "noop", arguments: {} },
+        { type: "toolCall", id: "call_missing_b", name: "noop", arguments: {} },
+      ],
+    },
+    {
+      role: "user",
+      content: "Reply with exactly: replay repair ok.",
+      timestamp: now + 2,
+    },
+    {
+      role: "toolResult",
+      toolCallId: "call_keep",
+      toolName: "noop",
+      content: [{ type: "text", text: "ok" }],
+      isError: false,
+      timestamp: now + 3,
+    },
+  ] as unknown as AgentMessage[];
+}
+
+function buildAbortedTransportMessages(model: Model<Api>): Context["messages"] {
+  const now = Date.now();
+  return [
+    {
+      role: "assistant",
+      provider: model.provider,
+      api: model.api,
+      model: model.id,
+      stopReason: "aborted",
+      timestamp: now,
+      content: [{ type: "toolCall", id: "call_transport_aborted", name: "noop", arguments: {} }],
+    },
+    {
+      role: "user",
+      content: "Reply with exactly: transport replay ok.",
+      timestamp: now + 1,
+    },
+  ] as Context["messages"];
+}
+
+function syntheticToolResultText(message: AgentMessage): string | undefined {
+  if (message.role !== "toolResult") {
+    return undefined;
+  }
+  const first = message.content[0] as { type?: unknown; text?: unknown } | undefined;
+  return first?.type === "text" && typeof first.text === "string" ? first.text : undefined;
+}
+
+function assistantToolCallIds(message: AgentMessage): string[] {
+  if (message.role !== "assistant") {
+    return [];
+  }
+  return message.content.filter((block) => block.type === "toolCall").map((block) => block.id);
+}
+
+function isKnownLiveBlocker(errorMessage: string): boolean {
+  return (
+    /not supported when using codex with a chatgpt account/i.test(errorMessage) ||
+    /hit your chatgpt usage limit/i.test(errorMessage)
+  );
+}
+
+describeLive("tool replay repair live", () => {
+  for (const target of TARGET_MODEL_REFS) {
+    it(
+      `accepts repaired displaced and missing tool results with ${target.ref}`,
+      async () => {
+        const cfg = loadConfig();
+        await ensureOpenClawModelsJson(cfg);
+
+        const agentDir = resolveOpenClawAgentDir();
+        const authStorage = discoverAuthStorage(agentDir);
+        const modelRegistry = discoverModels(authStorage, agentDir);
+        const model = modelRegistry.find(target.provider, target.modelId) as Model<Api> | null;
+
+        if (!model) {
+          logProgress(`[tool-replay-repair] model missing from registry: ${target.ref}`);
+          return;
+        }
+
+        let apiKeyInfo;
+        try {
+          apiKeyInfo = await getApiKeyForModel({
+            model,
+            cfg,
+            credentialPrecedence: LIVE_CREDENTIAL_PRECEDENCE,
+          });
+        } catch (error) {
+          logProgress(`[tool-replay-repair] skip ${target.ref} (${String(error)})`);
+          return;
+        }
+
+        if (REQUIRE_PROFILE_KEYS && !apiKeyInfo.source.startsWith("profile:")) {
+          logProgress(
+            `[tool-replay-repair] skip ${target.ref} (non-profile credential source: ${apiKeyInfo.source})`,
+          );
+          return;
+        }
+
+        logProgress(`[tool-replay-repair] target=${target.ref} auth source=${apiKeyInfo.source}`);
+        const sanitized = await sanitizeSessionHistory({
+          messages: buildReplayMessages(model),
+          modelApi: model.api,
+          provider: model.provider,
+          modelId: model.id,
+          sessionManager: SessionManager.inMemory(),
+          sessionId: `tool-replay-repair-live-${target.provider}-${target.modelId}`,
+        });
+
+        expect(sanitized.map((message) => message.role)).toEqual([
+          "user",
+          "assistant",
+          "toolResult",
+          "toolResult",
+          "toolResult",
+          "user",
+        ]);
+        const assistantMessage = sanitized[1];
+        expect(assistantMessage?.role).toBe("assistant");
+        expect(
+          sanitized.slice(2, 5).map((message) => (message as { toolCallId?: string }).toolCallId),
+        ).toEqual(assistantToolCallIds(assistantMessage));
+
+        // These assertions are the model-visible contract: OpenAI Responses
+        // gets Codex-compatible "aborted" outputs, while Gemini proves the
+        // generic repair does not leak OpenAI wording into other providers.
+        const insertedTexts = sanitized.slice(3, 5).map(syntheticToolResultText);
+        if (isOpenAIResponsesFamily(model.api)) {
+          expect(insertedTexts).toEqual(["aborted", "aborted"]);
+        } else {
+          expect(insertedTexts).not.toContain("aborted");
+        }
+
+        // Sending the repaired transcript to the real model is the live proof:
+        // providers reject malformed tool-call adjacency before generation, so
+        // any non-error response here validates the repair shape end to end.
+        const response = await completeSimpleWithTimeout(
+          model,
+          {
+            systemPrompt: "You are a concise assistant. Follow the user's instruction exactly.",
+            messages: sanitized as never,
+            tools: [
+              {
+                name: "noop",
+                description: "Return ok.",
+                parameters: Type.Object({}, { additionalProperties: false }),
+              },
+            ],
+          },
+          {
+            apiKey: requireApiKey(apiKeyInfo, model.provider),
+            reasoning: "low",
+            maxTokens: 96,
+          },
+          120_000,
+        );
+
+        const text = response.content
+          .filter((block) => block.type === "text")
+          .map((block) => block.text.trim())
+          .join(" ")
+          .trim();
+        const errorMessage =
+          typeof (response as { errorMessage?: unknown }).errorMessage === "string"
+            ? ((response as { errorMessage?: string }).errorMessage ?? "")
+            : "";
+        if (errorMessage && isKnownLiveBlocker(errorMessage)) {
+          logProgress(`[tool-replay-repair] skip ${target.ref} (${errorMessage})`);
+          return;
+        }
+
+        expect(response.stopReason).not.toBe("error");
+        if (text.length > 0) {
+          expect(text).toMatch(/^replay repair ok\.?$/i);
+        }
+      },
+      3 * 60 * 1000,
+    );
+
+    it(
+      `accepts transport replay after dropping aborted assistant tool calls with ${target.ref}`,
+      async () => {
+        const cfg = loadConfig();
+        await ensureOpenClawModelsJson(cfg);
+
+        const agentDir = resolveOpenClawAgentDir();
+        const authStorage = discoverAuthStorage(agentDir);
+        const modelRegistry = discoverModels(authStorage, agentDir);
+        const model = modelRegistry.find(target.provider, target.modelId) as Model<Api> | null;
+
+        if (!model) {
+          logProgress(`[tool-replay-repair] model missing from registry: ${target.ref}`);
+          return;
+        }
+
+        let apiKeyInfo;
+        try {
+          apiKeyInfo = await getApiKeyForModel({
+            model,
+            cfg,
+            credentialPrecedence: LIVE_CREDENTIAL_PRECEDENCE,
+          });
+        } catch (error) {
+          logProgress(`[tool-replay-repair] skip ${target.ref} (${String(error)})`);
+          return;
+        }
+
+        if (REQUIRE_PROFILE_KEYS && !apiKeyInfo.source.startsWith("profile:")) {
+          logProgress(
+            `[tool-replay-repair] skip ${target.ref} (non-profile credential source: ${apiKeyInfo.source})`,
+          );
+          return;
+        }
+
+        const transformed = transformTransportMessages(buildAbortedTransportMessages(model), model);
+        expect(transformed.map((message) => message.role)).toEqual(["user"]);
+        expect(JSON.stringify(transformed)).not.toContain("call_transport_aborted");
+
+        // This is the transport replay regression proof: providers reject
+        // assistant(tool_call)->user replays without a matching result, so the
+        // dropped transcript must still be accepted by real model APIs.
+        const response = await completeSimpleWithTimeout(
+          model,
+          {
+            systemPrompt: "You are a concise assistant. Follow the user's instruction exactly.",
+            messages: transformed as never,
+            tools: [
+              {
+                name: "noop",
+                description: "Return ok.",
+                parameters: Type.Object({}, { additionalProperties: false }),
+              },
+            ],
+          },
+          {
+            apiKey: requireApiKey(apiKeyInfo, model.provider),
+            reasoning: "low",
+            maxTokens: 96,
+          },
+          120_000,
+        );
+
+        const text = response.content
+          .filter((block) => block.type === "text")
+          .map((block) => block.text.trim())
+          .join(" ")
+          .trim();
+        const errorMessage =
+          typeof (response as { errorMessage?: unknown }).errorMessage === "string"
+            ? ((response as { errorMessage?: string }).errorMessage ?? "")
+            : "";
+        if (errorMessage && isKnownLiveBlocker(errorMessage)) {
+          logProgress(`[tool-replay-repair] skip ${target.ref} (${errorMessage})`);
+          return;
+        }
+
+        expect(response.stopReason).not.toBe("error");
+        if (text.length > 0) {
+          expect(text).toMatch(/^transport replay ok\.?$/i);
+        }
+      },
+      3 * 60 * 1000,
+    );
+  }
+});
--- a/src/agents/transport-message-transform.test.ts
+++ b/src/agents/transport-message-transform.test.ts
@@ -9,20 +9,21 @@ function makeModel(api: Api, provider: string, id: string): Model<Api> {
 function assistantToolCall(
  id: string,
  name = "read",
+  stopReason: Extract<Context["messages"][number], { role: "assistant" }>["stopReason"] = "toolUse",
 ): Extract<Context["messages"][number], { role: "assistant" }> {
  return {
    role: "assistant",
    provider: "openai",
    api: "openai-responses",
    model: "gpt-5.4",
-    stopReason: "toolUse",
+    stopReason,
    timestamp: Date.now(),
    content: [{ type: "toolCall", id, name, arguments: {} }],
  } as Extract<Context["messages"][number], { role: "assistant" }>;
 }

 describe("transformTransportMessages synthetic tool-result policy", () => {
-  it("does not synthesize missing tool results for OpenAI-compatible transports", () => {
+  it("synthesizes Codex-style aborted tool results for OpenAI Responses transports", () => {
    const messages: Context["messages"] = [
      assistantToolCall("call_openai_1"),
      { role: "user", content: "continue", timestamp: Date.now() },
@@ -33,7 +34,166 @@ describe("transformTransportMessages synthetic tool-result policy", () => {
      makeModel("openai-responses", "openai", "gpt-5.4"),
    );

-    expect(result.map((msg) => msg.role)).toEqual(["assistant", "user"]);
+    expect(result.map((msg) => msg.role)).toEqual(["assistant", "toolResult", "user"]);
+    expect(result[1]).toMatchObject({
+      role: "toolResult",
+      toolCallId: "call_openai_1",
+      isError: true,
+      content: [{ type: "text", text: "aborted" }],
+    });
+  });
+
+  it("preserves real OpenAI transport results and aborts missing parallel siblings", () => {
+    const messages: Context["messages"] = [
+      {
+        ...assistantToolCall("call_keep"),
+        content: [
+          { type: "toolCall", id: "call_keep", name: "read", arguments: {} },
+          { type: "toolCall", id: "call_missing", name: "exec", arguments: {} },
+        ],
+      },
+      {
+        role: "toolResult",
+        toolCallId: "call_keep",
+        toolName: "read",
+        content: [{ type: "text", text: "ok" }],
+        isError: false,
+        timestamp: Date.now(),
+      },
+      { role: "user", content: "continue", timestamp: Date.now() },
+    ];
+
+    const result = transformTransportMessages(
+      messages,
+      makeModel("openclaw-openai-responses-transport" as Api, "openai", "gpt-5.4"),
+    );
+
+    expect(result.map((msg) => msg.role)).toEqual([
+      "assistant",
+      "toolResult",
+      "toolResult",
+      "user",
+    ]);
+    expect(result.slice(1, 3)).toMatchObject([
+      { role: "toolResult", toolCallId: "call_keep", content: [{ type: "text", text: "ok" }] },
+      {
+        role: "toolResult",
+        toolCallId: "call_missing",
+        content: [{ type: "text", text: "aborted" }],
+      },
+    ]);
+  });
+
+  it("moves displaced OpenAI transport results before synthesizing missing siblings", () => {
+    const messages: Context["messages"] = [
+      {
+        ...assistantToolCall("call_keep"),
+        content: [
+          { type: "toolCall", id: "call_keep", name: "read", arguments: {} },
+          { type: "toolCall", id: "call_missing", name: "exec", arguments: {} },
+        ],
+      },
+      { role: "user", content: "continue", timestamp: Date.now() },
+      {
+        role: "toolResult",
+        toolCallId: "call_keep",
+        toolName: "read",
+        content: [{ type: "text", text: "late ok" }],
+        isError: false,
+        timestamp: Date.now(),
+      },
+    ];
+
+    const result = transformTransportMessages(
+      messages,
+      makeModel("openai-responses", "openai", "gpt-5.4"),
+    );
+
+    expect(result.map((msg) => msg.role)).toEqual([
+      "assistant",
+      "toolResult",
+      "toolResult",
+      "user",
+    ]);
+    expect(result.slice(1, 3)).toMatchObject([
+      { role: "toolResult", toolCallId: "call_keep", content: [{ type: "text", text: "late ok" }] },
+      {
+        role: "toolResult",
+        toolCallId: "call_missing",
+        content: [{ type: "text", text: "aborted" }],
+      },
+    ]);
+  });
+
+  it("drops aborted OpenAI transport assistant tool calls before replay", () => {
+    const messages: Context["messages"] = [
+      assistantToolCall("call_aborted", "exec", "aborted"),
+      { role: "user", content: "retry after abort", timestamp: Date.now() },
+    ];
+
+    const result = transformTransportMessages(
+      messages,
+      makeModel("openai-responses", "openai", "gpt-5.4"),
+    );
+
+    expect(result.map((msg) => msg.role)).toEqual(["user"]);
+    expect(JSON.stringify(result)).not.toContain("call_aborted");
+  });
+
+  it("drops text-only aborted and errored transport assistant turns before replay", () => {
+    const messages: Context["messages"] = [
+      {
+        role: "assistant",
+        provider: "openai",
+        api: "openai-responses",
+        model: "gpt-5.4",
+        stopReason: "aborted",
+        timestamp: Date.now(),
+        content: [{ type: "text", text: "partial aborted output" }],
+      } as Extract<Context["messages"][number], { role: "assistant" }>,
+      {
+        role: "assistant",
+        provider: "openai",
+        api: "openai-responses",
+        model: "gpt-5.4",
+        stopReason: "error",
+        timestamp: Date.now(),
+        content: [{ type: "text", text: "partial error output" }],
+      } as Extract<Context["messages"][number], { role: "assistant" }>,
+      { role: "user", content: "retry after failed text turns", timestamp: Date.now() },
+    ];
+
+    const result = transformTransportMessages(
+      messages,
+      makeModel("openai-responses", "openai", "gpt-5.4"),
+    );
+
+    expect(result.map((msg) => msg.role)).toEqual(["user"]);
+    expect(JSON.stringify(result)).not.toContain("partial aborted output");
+    expect(JSON.stringify(result)).not.toContain("partial error output");
+  });
+
+  it("drops errored Anthropic transport assistant tool calls and matching results before replay", () => {
+    const messages: Context["messages"] = [
+      assistantToolCall("call_error", "exec", "error"),
+      {
+        role: "toolResult",
+        toolCallId: "call_error",
+        toolName: "exec",
+        content: [{ type: "text", text: "partial" }],
+        isError: true,
+        timestamp: Date.now(),
+      },
+      { role: "user", content: "retry after error", timestamp: Date.now() },
+    ];
+
+    const result = transformTransportMessages(
+      messages,
+      makeModel("anthropic-messages", "anthropic", "claude-opus-4-6"),
+    );
+
+    expect(result.map((msg) => msg.role)).toEqual(["user"]);
+    expect(JSON.stringify(result)).not.toContain("call_error");
  });

  it("still synthesizes missing tool results for Anthropic transports", () => {
@@ -72,6 +232,10 @@ describe("transformTransportMessages synthetic tool-result policy", () => {
      makeModel("openclaw-google-generative-ai-transport" as Api, "google", "gemini-2.5-pro"),
    );
    expect(googleAlias.map((msg) => msg.role)).toEqual(["assistant", "toolResult", "user"]);
+    expect(googleAlias[1]).toMatchObject({
+      role: "toolResult",
+      content: [{ type: "text", text: "No result provided" }],
+    });

    const bedrockCanonical = transformTransportMessages(
      messages,
--- a/src/agents/transport-message-transform.ts
+++ b/src/agents/transport-message-transform.ts
@@ -1,4 +1,5 @@
 import type { Api, Context, Model } from "@mariozechner/pi-ai";
+import { repairToolUseResultPairing } from "./session-transcript-repair.js";

 const SYNTHETIC_TOOL_RESULT_APIS = new Set<string>([
  "anthropic-messages",
@@ -6,31 +7,34 @@ const SYNTHETIC_TOOL_RESULT_APIS = new Set<string>([
  "bedrock-converse-stream",
  "google-generative-ai",
  "openclaw-google-generative-ai-transport",
+  "openai-responses",
+  "openai-codex-responses",
+  "azure-openai-responses",
+  "openclaw-openai-responses-transport",
+  "openclaw-azure-openai-responses-transport",
 ]);

-type PendingToolCall = { id: string; name: string };
+// "aborted" is an OpenAI Responses-family convention from upstream Codex
+// history normalization. Gemini/Anthropic transports use their own text while
+// still needing synthetic results to satisfy provider turn-shape contracts;
+// tool-replay-repair.live.test.ts exercises both paths against real models.
+const CODEX_STYLE_ABORTED_OUTPUT_APIS = new Set<string>([
+  "openai-responses",
+  "openai-codex-responses",
+  "azure-openai-responses",
+  "openclaw-openai-responses-transport",
+  "openclaw-azure-openai-responses-transport",
+]);

 function defaultAllowSyntheticToolResults(modelApi: Api): boolean {
  return SYNTHETIC_TOOL_RESULT_APIS.has(modelApi);
 }

-function appendMissingToolResults(
-  result: Context["messages"],
-  pendingToolCalls: PendingToolCall[],
-  existingToolResultIds: ReadonlySet<string>,
-): void {
-  for (const toolCall of pendingToolCalls) {
-    if (!existingToolResultIds.has(toolCall.id)) {
-      result.push({
-        role: "toolResult",
-        toolCallId: toolCall.id,
-        toolName: toolCall.name,
-        content: [{ type: "text", text: "No result provided" }],
-        isError: true,
-        timestamp: Date.now(),
-      });
-    }
+function isFailedAssistantTurn(message: Context["messages"][number]): boolean {
+  if (message.role !== "assistant") {
+    return false;
  }
+  return message.stopReason === "error" || message.stopReason === "aborted";
 }

 export function transformTransportMessages(
@@ -43,6 +47,9 @@ export function transformTransportMessages(
  ) => string,
 ): Context["messages"] {
  const allowSyntheticToolResults = defaultAllowSyntheticToolResults(model.api);
+  const syntheticToolResultText = CODEX_STYLE_ABORTED_OUTPUT_APIS.has(model.api)
+    ? "aborted"
+    : "No result provided";
  const toolCallIdMap = new Map<string, string>();
  const transformed = messages.map((msg) => {
    if (msg.role === "user") {
@@ -102,42 +109,21 @@ export function transformTransportMessages(
    }
    return { ...msg, content };
  });
+  // Preserve the old transport replay filter: failed streamed turns can contain
+  // partial text, partial tool calls, or both, and strict providers can treat
+  // them as valid assistant context on retry unless we drop the whole turn.
+  const replayable = transformed.filter((msg) => !isFailedAssistantTurn(msg));

-  const result: Context["messages"] = [];
-  let pendingToolCalls: PendingToolCall[] = [];
-  let existingToolResultIds = new Set<string>();
-  for (const msg of transformed) {
-    if (msg.role === "assistant") {
-      if (allowSyntheticToolResults && pendingToolCalls.length > 0) {
-        appendMissingToolResults(result, pendingToolCalls, existingToolResultIds);
-      }
-      pendingToolCalls = [];
-      existingToolResultIds = new Set();
-      if (msg.stopReason === "error" || msg.stopReason === "aborted") {
-        continue;
-      }
-      const toolCalls = msg.content.filter(
-        (block): block is Extract<(typeof msg.content)[number], { type: "toolCall" }> =>
-          block.type === "toolCall",
-      );
-      if (toolCalls.length > 0) {
-        pendingToolCalls = toolCalls.map((block) => ({ id: block.id, name: block.name }));
-        existingToolResultIds = new Set();
-      }
-      result.push(msg);
-      continue;
-    }
-    if (msg.role === "toolResult") {
-      existingToolResultIds.add(msg.toolCallId);
-      result.push(msg);
-      continue;
-    }
-    if (allowSyntheticToolResults && pendingToolCalls.length > 0) {
-      appendMissingToolResults(result, pendingToolCalls, existingToolResultIds);
-    }
-    pendingToolCalls = [];
-    existingToolResultIds = new Set();
-    result.push(msg);
+  if (!allowSyntheticToolResults) {
+    return replayable;
  }
-  return result;
+
+  // PI's local transform can synthesize missing results, but it does not move
+  // displaced real results back before an intervening user turn. Shared repair
+  // handles both, while preserving the previous transport behavior of dropping
+  // aborted/error assistant tool-call turns before replaying strict providers.
+  return repairToolUseResultPairing(replayable, {
+    erroredAssistantResultPolicy: "drop",
+    missingToolResultText: syntheticToolResultText,
+  }).messages as Context["messages"];
 }