From cfbdb1ffce799c841a969e7177c601736591d79e Mon Sep 17 00:00:00 2001 From: Josh Lehman Date: Fri, 20 Mar 2026 07:10:46 -0700 Subject: [PATCH] fix: drop replay tool calls outside allowlist --- .../pi-embedded-runner/run/attempt.test.ts | 40 +++++++++++++++++++ src/agents/pi-embedded-runner/run/attempt.ts | 17 +++++--- 2 files changed, 52 insertions(+), 5 deletions(-) diff --git a/src/agents/pi-embedded-runner/run/attempt.test.ts b/src/agents/pi-embedded-runner/run/attempt.test.ts index 01d109c0daa..15f0cc53284 100644 --- a/src/agents/pi-embedded-runner/run/attempt.test.ts +++ b/src/agents/pi-embedded-runner/run/attempt.test.ts @@ -1040,6 +1040,46 @@ describe("wrapStreamFnSanitizeMalformedToolCalls", () => { }, ]); }); + + it("drops replayed tool calls that are no longer allowlisted", async () => { + const messages = [ + { + role: "assistant", + content: [{ type: "toolCall", id: "call_1", name: "write", arguments: {} }], + }, + { + role: "toolResult", + toolCallId: "call_1", + toolName: "write", + content: [{ type: "text", text: "stale result" }], + isError: false, + }, + { + role: "user", + content: [{ type: "text", text: "retry" }], + }, + ]; + const baseFn = vi.fn((_model, _context) => + createFakeStream({ events: [], resultMessage: { role: "assistant", content: [] } }), + ); + + const wrapped = wrapStreamFnSanitizeMalformedToolCalls(baseFn as never, new Set(["read"])); + const stream = wrapped({} as never, { messages } as never, {} as never) as + | FakeWrappedStream + | Promise; + await Promise.resolve(stream); + + expect(baseFn).toHaveBeenCalledTimes(1); + const seenContext = baseFn.mock.calls[0]?.[1] as { + messages: Array<{ role?: string }>; + }; + expect(seenContext.messages).toEqual([ + { + role: "user", + content: [{ type: "text", text: "retry" }], + }, + ]); + }); }); describe("wrapStreamFnRepairMalformedToolCallArguments", () => { diff --git a/src/agents/pi-embedded-runner/run/attempt.ts b/src/agents/pi-embedded-runner/run/attempt.ts index 35bc6cd5910..9b76c6be1a9 100644 --- a/src/agents/pi-embedded-runner/run/attempt.ts +++ b/src/agents/pi-embedded-runner/run/attempt.ts @@ -689,7 +689,13 @@ function resolveReplayToolCallName( if (!trimmed || trimmed.length > REPLAY_TOOL_CALL_NAME_MAX_CHARS || /\s/.test(trimmed)) { return null; } - return trimmed; + if (!allowedToolNames || allowedToolNames.size === 0) { + return trimmed; + } + return ( + resolveExactAllowedToolName(trimmed, allowedToolNames) ?? + resolveStructuredAllowedToolName(trimmed, allowedToolNames) + ); } function sanitizeReplayToolCallInputs( @@ -717,22 +723,23 @@ function sanitizeReplayToolCallInputs( nextContent.push(block); continue; } + const replayBlock = block as ReplayToolCallBlock; - if (!replayToolCallHasInput(block) || !replayToolCallNonEmptyString(block.id)) { + if (!replayToolCallHasInput(replayBlock) || !replayToolCallNonEmptyString(replayBlock.id)) { changed = true; messageChanged = true; continue; } - const rawName = typeof block.name === "string" ? block.name : ""; - const resolvedName = resolveReplayToolCallName(rawName, block.id, allowedToolNames); + const rawName = typeof replayBlock.name === "string" ? replayBlock.name : ""; + const resolvedName = resolveReplayToolCallName(rawName, replayBlock.id, allowedToolNames); if (!resolvedName) { changed = true; messageChanged = true; continue; } - if (block.name !== resolvedName) { + if (replayBlock.name !== resolvedName) { nextContent.push({ ...(block as object), name: resolvedName } as typeof block); changed = true; messageChanged = true;