From 42c9a1e6cfcfbbf3114ebf4f36d56ec02bc71762 Mon Sep 17 00:00:00 2001
From: Josh Lehman <josh@martian.engineering>
Date: Wed, 18 Mar 2026 13:35:19 -0700
Subject: [PATCH] fix: repair replay tool result pairing

Regeneration-Prompt: |
  Address the PR review finding that replay sanitization can drop an assistant tool-call turn and leave downstream toolResult messages orphaned in the outbound provider context. Keep the replay-only sanitizer from the previous review fix, but when it changes the message list, immediately run sanitizeToolUseResultPairing before handing the context to the provider. Add a regression test that starts with a dropped malformed assistant tool-call turn followed by a toolResult and verifies the orphaned result is removed.
---
 .../pi-embedded-runner/run/attempt.test.ts    | 41 +++++++++++++++++++
 src/agents/pi-embedded-runner/run/attempt.ts  |  3 +-
 2 files changed, 43 insertions(+), 1 deletion(-)

diff --git a/src/agents/pi-embedded-runner/run/attempt.test.ts b/src/agents/pi-embedded-runner/run/attempt.test.ts
index 8ec81f1c6d6..6585b461c19 100644
--- a/src/agents/pi-embedded-runner/run/attempt.test.ts
+++ b/src/agents/pi-embedded-runner/run/attempt.test.ts
@@ -878,6 +878,47 @@ describe("wrapStreamFnSanitizeMalformedToolCalls", () => {
     expect(toolCall.name).toBe("SESSIONS_SPAWN");
     expect(toolCall.input?.attachments?.[0]?.content).toBe(attachmentContent);
   });
+
+  it("drops orphaned tool results after replay sanitization removes a tool-call turn", async () => {
+    const messages = [
+      {
+        role: "assistant",
+        content: [{ type: "toolCall", name: "read", arguments: {} }],
+        stopReason: "error",
+      },
+      {
+        role: "toolResult",
+        toolCallId: "call_missing",
+        toolName: "read",
+        content: [{ type: "text", text: "stale result" }],
+        isError: false,
+      },
+      {
+        role: "user",
+        content: [{ type: "text", text: "retry" }],
+      },
+    ];
+    const baseFn = vi.fn((_model, _context) =>
+      createFakeStream({ events: [], resultMessage: { role: "assistant", content: [] } }),
+    );
+
+    const wrapped = wrapStreamFnSanitizeMalformedToolCalls(baseFn as never, new Set(["read"]));
+    const stream = wrapped({} as never, { messages } as never, {} as never) as
+      | FakeWrappedStream
+      | Promise<FakeWrappedStream>;
+    await Promise.resolve(stream);
+
+    expect(baseFn).toHaveBeenCalledTimes(1);
+    const seenContext = baseFn.mock.calls[0]?.[1] as {
+      messages: Array<{ role?: string }>;
+    };
+    expect(seenContext.messages).toEqual([
+      {
+        role: "user",
+        content: [{ type: "text", text: "retry" }],
+      },
+    ]);
+  });
 });
 
 describe("wrapStreamFnRepairMalformedToolCallArguments", () => {
diff --git a/src/agents/pi-embedded-runner/run/attempt.ts b/src/agents/pi-embedded-runner/run/attempt.ts
index a3acb863b78..e63cfd3f532 100644
--- a/src/agents/pi-embedded-runner/run/attempt.ts
+++ b/src/agents/pi-embedded-runner/run/attempt.ts
@@ -916,9 +916,10 @@ export function wrapStreamFnSanitizeMalformedToolCalls(
     if (sanitized === messages) {
       return baseFn(model, context, options);
     }
+    const paired = sanitizeToolUseResultPairing(sanitized);
     const nextContext = {
       ...(context as unknown as Record<string, unknown>),
-      messages: sanitized,
+      messages: paired,
     } as unknown;
     return baseFn(model, nextContext as typeof context, options);
   };