diff --git a/src/agents/pi-embedded-runner.sanitize-session-history.test.ts b/src/agents/pi-embedded-runner.sanitize-session-history.test.ts index dbdf38989b2..c0b6361e1d7 100644 --- a/src/agents/pi-embedded-runner.sanitize-session-history.test.ts +++ b/src/agents/pi-embedded-runner.sanitize-session-history.test.ts @@ -816,7 +816,7 @@ describe("sanitizeSessionHistory", () => { messages, modelApi: "anthropic-messages", provider: "anthropic", - modelId: "claude-opus-4-6", + modelId: "claude-sonnet-4-6", sessionManager, sessionId: TEST_SESSION_ID, }); @@ -856,7 +856,7 @@ describe("sanitizeSessionHistory", () => { messages, modelApi: "anthropic-messages", provider: "anthropic", - modelId: "claude-opus-4-6", + modelId: "claude-sonnet-4-6", sessionManager, sessionId: TEST_SESSION_ID, }); diff --git a/src/agents/pi-embedded-runner/run/attempt.test.ts b/src/agents/pi-embedded-runner/run/attempt.test.ts index 175744b226c..9f985879716 100644 --- a/src/agents/pi-embedded-runner/run/attempt.test.ts +++ b/src/agents/pi-embedded-runner/run/attempt.test.ts @@ -907,7 +907,11 @@ describe("wrapStreamFnSanitizeMalformedToolCalls", () => { createFakeStream({ events: [], resultMessage: { role: "assistant", content: [] } }), ); - const wrapped = wrapStreamFnSanitizeMalformedToolCalls(baseFn as never, new Set(["read"])); + const wrapped = wrapStreamFnSanitizeMalformedToolCalls( + baseFn as never, + new Set(["read"]), + { validateAnthropicTurns: true } as never, + ); const stream = wrapped({} as never, { messages } as never, {} as never) as | FakeWrappedStream | Promise; @@ -935,7 +939,11 @@ describe("wrapStreamFnSanitizeMalformedToolCalls", () => { createFakeStream({ events: [], resultMessage: { role: "assistant", content: [] } }), ); - const wrapped = wrapStreamFnSanitizeMalformedToolCalls(baseFn as never, new Set(["read"])); + const wrapped = wrapStreamFnSanitizeMalformedToolCalls( + baseFn as never, + new Set(["read"]), + { validateAnthropicTurns: true } as never, + ); const stream = wrapped({} as never, { messages } as never, {} as never) as | FakeWrappedStream | Promise; @@ -964,7 +972,11 @@ describe("wrapStreamFnSanitizeMalformedToolCalls", () => { createFakeStream({ events: [], resultMessage: { role: "assistant", content: [] } }), ); - const wrapped = wrapStreamFnSanitizeMalformedToolCalls(baseFn as never, new Set(["read"])); + const wrapped = wrapStreamFnSanitizeMalformedToolCalls( + baseFn as never, + new Set(["read"]), + { validateAnthropicTurns: true } as never, + ); const stream = wrapped( { api: "anthropic-messages" } as never, { messages } as never, @@ -1012,6 +1024,7 @@ describe("wrapStreamFnSanitizeMalformedToolCalls", () => { const wrapped = wrapStreamFnSanitizeMalformedToolCalls( baseFn as never, new Set(["sessions_spawn"]), + { validateAnthropicTurns: true } as never, ); const stream = wrapped( { api: "anthropic-messages" } as never, @@ -1055,6 +1068,7 @@ describe("wrapStreamFnSanitizeMalformedToolCalls", () => { const wrapped = wrapStreamFnSanitizeMalformedToolCalls( baseFn as never, new Set(["sessions_spawn"]), + { validateAnthropicTurns: true } as never, ); const stream = wrapped({} as never, { messages } as never, {} as never) as | FakeWrappedStream @@ -1073,6 +1087,40 @@ describe("wrapStreamFnSanitizeMalformedToolCalls", () => { expect(toolCall.input?.attachments?.[0]?.content).toBe(attachmentContent); }); + it("keeps non-Anthropic thinking turns mutable when Anthropic replay validation is off", async () => { + const messages = [ + { + role: "assistant", + content: [ + { type: "thinking", thinking: "internal", thinkingSignature: "sig_1" }, + { type: "toolCall", id: "call_read", name: " read ", arguments: { path: "README.md" } }, + ], + }, + { + role: "user", + content: [{ type: "text", text: "retry" }], + }, + ]; + const baseFn = vi.fn((_model, _context) => + createFakeStream({ events: [], resultMessage: { role: "assistant", content: [] } }), + ); + + const wrapped = wrapStreamFnSanitizeMalformedToolCalls(baseFn as never, new Set(["read"])); + const stream = wrapped({ api: "google-gemini" } as never, { messages } as never, {} as never) as + | FakeWrappedStream + | Promise; + await Promise.resolve(stream); + + expect(baseFn).toHaveBeenCalledTimes(1); + const seenContext = baseFn.mock.calls[0]?.[1] as { + messages: Array<{ content?: unknown[] }>; + }; + expect(seenContext.messages[0]?.content).toEqual([ + { type: "thinking", thinking: "internal", thinkingSignature: "sig_1" }, + { type: "toolCall", id: "call_read", name: "read", arguments: { path: "README.md" } }, + ]); + }); + it("preserves allowlisted tool names that contain punctuation", async () => { const messages = [ { diff --git a/src/agents/pi-embedded-runner/run/attempt.tool-call-normalization.ts b/src/agents/pi-embedded-runner/run/attempt.tool-call-normalization.ts index 27d8f7c2649..e7b5fad16a8 100644 --- a/src/agents/pi-embedded-runner/run/attempt.tool-call-normalization.ts +++ b/src/agents/pi-embedded-runner/run/attempt.tool-call-normalization.ts @@ -331,6 +331,7 @@ function resolveReplayToolCallName( function sanitizeReplayToolCallInputs( messages: AgentMessage[], allowedToolNames?: Set, + preserveImmutableThinkingTurns?: boolean, ): ReplayToolCallSanitizeReport { let changed = false; let droppedAssistantMessages = 0; @@ -345,7 +346,11 @@ function sanitizeReplayToolCallInputs( out.push(message); continue; } - if (message.content.some((block) => isThinkingLikeReplayBlock(block))) { + if ( + preserveImmutableThinkingTurns && + message.content.some((block) => isThinkingLikeReplayBlock(block)) && + message.content.some((block) => isReplayToolCallBlock(block)) + ) { if (isReplaySafeThinkingTurn(message.content, allowedToolNames)) { out.push(message); } else { @@ -633,7 +638,11 @@ export function wrapStreamFnSanitizeMalformedToolCalls( if (!Array.isArray(messages)) { return baseFn(model, context, options); } - const sanitized = sanitizeReplayToolCallInputs(messages as AgentMessage[], allowedToolNames); + const sanitized = sanitizeReplayToolCallInputs( + messages as AgentMessage[], + allowedToolNames, + transcriptPolicy?.validateAnthropicTurns === true, + ); if (sanitized.messages === messages) { return baseFn(model, context, options); }