From 42c9a1e6cfcfbbf3114ebf4f36d56ec02bc71762 Mon Sep 17 00:00:00 2001 From: Josh Lehman Date: Wed, 18 Mar 2026 13:35:19 -0700 Subject: [PATCH] fix: repair replay tool result pairing Regeneration-Prompt: | Address the PR review finding that replay sanitization can drop an assistant tool-call turn and leave downstream toolResult messages orphaned in the outbound provider context. Keep the replay-only sanitizer from the previous review fix, but when it changes the message list, immediately run sanitizeToolUseResultPairing before handing the context to the provider. Add a regression test that starts with a dropped malformed assistant tool-call turn followed by a toolResult and verifies the orphaned result is removed. --- .../pi-embedded-runner/run/attempt.test.ts | 41 +++++++++++++++++++ src/agents/pi-embedded-runner/run/attempt.ts | 3 +- 2 files changed, 43 insertions(+), 1 deletion(-) diff --git a/src/agents/pi-embedded-runner/run/attempt.test.ts b/src/agents/pi-embedded-runner/run/attempt.test.ts index 8ec81f1c6d6..6585b461c19 100644 --- a/src/agents/pi-embedded-runner/run/attempt.test.ts +++ b/src/agents/pi-embedded-runner/run/attempt.test.ts @@ -878,6 +878,47 @@ describe("wrapStreamFnSanitizeMalformedToolCalls", () => { expect(toolCall.name).toBe("SESSIONS_SPAWN"); expect(toolCall.input?.attachments?.[0]?.content).toBe(attachmentContent); }); + + it("drops orphaned tool results after replay sanitization removes a tool-call turn", async () => { + const messages = [ + { + role: "assistant", + content: [{ type: "toolCall", name: "read", arguments: {} }], + stopReason: "error", + }, + { + role: "toolResult", + toolCallId: "call_missing", + toolName: "read", + content: [{ type: "text", text: "stale result" }], + isError: false, + }, + { + role: "user", + content: [{ type: "text", text: "retry" }], + }, + ]; + const baseFn = vi.fn((_model, _context) => + createFakeStream({ events: [], resultMessage: { role: "assistant", content: [] } }), + ); + + const wrapped = wrapStreamFnSanitizeMalformedToolCalls(baseFn as never, new Set(["read"])); + const stream = wrapped({} as never, { messages } as never, {} as never) as + | FakeWrappedStream + | Promise; + await Promise.resolve(stream); + + expect(baseFn).toHaveBeenCalledTimes(1); + const seenContext = baseFn.mock.calls[0]?.[1] as { + messages: Array<{ role?: string }>; + }; + expect(seenContext.messages).toEqual([ + { + role: "user", + content: [{ type: "text", text: "retry" }], + }, + ]); + }); }); describe("wrapStreamFnRepairMalformedToolCallArguments", () => { diff --git a/src/agents/pi-embedded-runner/run/attempt.ts b/src/agents/pi-embedded-runner/run/attempt.ts index a3acb863b78..e63cfd3f532 100644 --- a/src/agents/pi-embedded-runner/run/attempt.ts +++ b/src/agents/pi-embedded-runner/run/attempt.ts @@ -916,9 +916,10 @@ export function wrapStreamFnSanitizeMalformedToolCalls( if (sanitized === messages) { return baseFn(model, context, options); } + const paired = sanitizeToolUseResultPairing(sanitized); const nextContext = { ...(context as unknown as Record), - messages: sanitized, + messages: paired, } as unknown; return baseFn(model, nextContext as typeof context, options); };