From 98e89f59399bfe075ad4b8691396d76425c651b3 Mon Sep 17 00:00:00 2001 From: Shakker Date: Sun, 12 Apr 2026 04:11:42 +0100 Subject: [PATCH] fix: gate immutable thinking replay repair to anthropic --- .../pi-embedded-runner/replay-history.ts | 1 + src/agents/session-transcript-repair.test.ts | 47 ++++++++++++++++++- src/agents/session-transcript-repair.ts | 10 +++- 3 files changed, 54 insertions(+), 4 deletions(-) diff --git a/src/agents/pi-embedded-runner/replay-history.ts b/src/agents/pi-embedded-runner/replay-history.ts index a3061f57ce7..29fe1692eef 100644 --- a/src/agents/pi-embedded-runner/replay-history.ts +++ b/src/agents/pi-embedded-runner/replay-history.ts @@ -418,6 +418,7 @@ export async function sanitizeSessionHistory(params: { : sanitizedImages; const sanitizedToolCalls = sanitizeToolCallInputs(droppedThinking, { allowedToolNames: params.allowedToolNames, + preserveImmutableThinkingTurns: policy.validateAnthropicTurns, }); const repairedTools = policy.repairToolUseResultPairing ? sanitizeToolUseResultPairing(sanitizedToolCalls, { diff --git a/src/agents/session-transcript-repair.test.ts b/src/agents/session-transcript-repair.test.ts index a7e6ac8ad71..dcb36dba379 100644 --- a/src/agents/session-transcript-repair.test.ts +++ b/src/agents/session-transcript-repair.test.ts @@ -403,7 +403,10 @@ describe("sanitizeToolCallInputs", () => { }, ]); - const out = sanitizeToolCallInputs(input, { allowedToolNames: ["read"] }); + const out = sanitizeToolCallInputs(input, { + allowedToolNames: ["read"], + preserveImmutableThinkingTurns: true, + }); expect(out).toEqual([]); }); @@ -432,12 +435,52 @@ describe("sanitizeToolCallInputs", () => { }, ]); - const out = sanitizeToolCallInputs(input, { allowedToolNames: ["sessions_spawn"] }); + const out = sanitizeToolCallInputs(input, { + allowedToolNames: ["sessions_spawn"], + preserveImmutableThinkingTurns: true, + }); expect(out).toEqual([]); expect(JSON.stringify(out)).not.toContain(secret); }); + it("keeps generic thinking turns mutable when immutable preservation is disabled", () => { + const input = castAgentMessages([ + { + role: "assistant", + content: [ + { + type: "thinking", + thinking: "Let me normalize this tool name.", + thinkingSignature: "sig_generic", + }, + { + type: "toolCall", + id: "call_read", + name: " read ", + arguments: { path: "README.md" }, + }, + ], + }, + ]); + + const out = sanitizeToolCallInputs(input, { allowedToolNames: ["read"] }); + const assistant = out[0] as Extract; + expect(assistant.content).toEqual([ + { + type: "thinking", + thinking: "Let me normalize this tool name.", + thinkingSignature: "sig_generic", + }, + { + type: "toolCall", + id: "call_read", + name: "read", + arguments: { path: "README.md" }, + }, + ]); + }); + it.each([ { name: "trims leading whitespace from tool names", diff --git a/src/agents/session-transcript-repair.ts b/src/agents/session-transcript-repair.ts index cd845c0219e..b7196c0745d 100644 --- a/src/agents/session-transcript-repair.ts +++ b/src/agents/session-transcript-repair.ts @@ -176,7 +176,7 @@ function isReplaySafeThinkingAssistantTurn( return false; } } - return sawToolCall || content.some((block) => isThinkingLikeBlock(block)); + return sawToolCall; } function makeMissingToolResult(params: { @@ -232,6 +232,7 @@ export type ToolCallInputRepairReport = { export type ToolCallInputRepairOptions = { allowedToolNames?: Iterable; + preserveImmutableThinkingTurns?: boolean; }; export type ErroredAssistantResultPolicy = "preserve" | "drop"; @@ -269,6 +270,7 @@ export function repairToolCallInputs( let changed = false; const out: AgentMessage[] = []; const allowedToolNames = normalizeAllowedToolNames(options?.allowedToolNames); + const preserveImmutableThinkingTurns = options?.preserveImmutableThinkingTurns === true; for (const msg of messages) { if (!msg || typeof msg !== "object") { @@ -281,7 +283,11 @@ export function repairToolCallInputs( continue; } - if (msg.content.some((block) => isThinkingLikeBlock(block))) { + if ( + preserveImmutableThinkingTurns && + msg.content.some((block) => isThinkingLikeBlock(block)) && + countRawToolCallBlocks(msg.content) > 0 + ) { // Signed Anthropic thinking blocks must remain byte-for-byte stable on // replay. Preserve the turn only if every sibling tool call is already // valid and requires no redaction or normalization. Otherwise drop the