diff --git a/src/agents/session-transcript-repair.test.ts b/src/agents/session-transcript-repair.test.ts index 3f460c40d80..a7e6ac8ad71 100644 --- a/src/agents/session-transcript-repair.test.ts +++ b/src/agents/session-transcript-repair.test.ts @@ -380,7 +380,7 @@ describe("sanitizeToolCallInputs", () => { expect(types).toEqual(["text", "toolUse"]); }); - it("preserves assistant turns that include thinking blocks", () => { + it("drops signed-thinking assistant turns when sibling tool calls are not replay-safe", () => { const input = castAgentMessages([ { role: "assistant", @@ -405,13 +405,37 @@ describe("sanitizeToolCallInputs", () => { const out = sanitizeToolCallInputs(input, { allowedToolNames: ["read"] }); - expect(out).toBe(input); - const assistant = out[0] as Extract; - const types = Array.isArray(assistant.content) - ? assistant.content.map((block) => (block as { type?: unknown }).type) - : []; - expect(types).toEqual(["thinking", "toolCall"]); - expect((assistant.content?.[1] as { name?: unknown })?.name).toBe("gateway"); + expect(out).toEqual([]); + }); + + it("drops signed-thinking assistant turns that would require attachment redaction", () => { + const secret = "SIGNED_THINKING_ATTACHMENT_SECRET"; // pragma: allowlist secret + const input = castAgentMessages([ + { + role: "assistant", + content: [ + { + type: "thinking", + thinking: "Let me spawn a helper.", + thinkingSignature: "sig_spawn", + }, + { + type: "toolUse", + id: "call_spawn", + name: "sessions_spawn", + input: { + task: "inspect attachment", + attachments: [{ name: "snapshot.txt", content: secret }], + }, + }, + ], + }, + ]); + + const out = sanitizeToolCallInputs(input, { allowedToolNames: ["sessions_spawn"] }); + + expect(out).toEqual([]); + expect(JSON.stringify(out)).not.toContain(secret); }); it.each([ diff --git a/src/agents/session-transcript-repair.ts b/src/agents/session-transcript-repair.ts index 044fa9731e1..cd845c0219e 100644 --- a/src/agents/session-transcript-repair.ts +++ b/src/agents/session-transcript-repair.ts @@ -145,6 +145,40 @@ function sanitizeToolCallBlock(block: RawToolCallBlock): RawToolCallBlock { return next as RawToolCallBlock; } +function countRawToolCallBlocks(content: unknown[]): number { + let count = 0; + for (const block of content) { + if (isRawToolCallBlock(block)) { + count += 1; + } + } + return count; +} + +function isReplaySafeThinkingAssistantTurn( + content: unknown[], + allowedToolNames: Set | null, +): boolean { + let sawToolCall = false; + for (const block of content) { + if (!isRawToolCallBlock(block)) { + continue; + } + sawToolCall = true; + if ( + !hasToolCallInput(block) || + !hasToolCallId(block) || + !hasToolCallName(block, allowedToolNames) + ) { + return false; + } + if (sanitizeToolCallBlock(block) !== block) { + return false; + } + } + return sawToolCall || content.some((block) => isThinkingLikeBlock(block)); +} + function makeMissingToolResult(params: { toolCallId: string; toolName?: string; @@ -247,11 +281,18 @@ export function repairToolCallInputs( continue; } - // Preserve provider-owned thinking turns verbatim. Anthropic replays can - // reject any historical assistant turn whose signed thinking block no - // longer matches the original response, including sibling tool calls. if (msg.content.some((block) => isThinkingLikeBlock(block))) { - out.push(msg); + // Signed Anthropic thinking blocks must remain byte-for-byte stable on + // replay. Preserve the turn only if every sibling tool call is already + // valid and requires no redaction or normalization. Otherwise drop the + // whole assistant turn rather than mutating provider-owned content. + if (isReplaySafeThinkingAssistantTurn(msg.content, allowedToolNames)) { + out.push(msg); + } else { + droppedToolCalls += countRawToolCallBlocks(msg.content); + droppedAssistantMessages += 1; + changed = true; + } continue; }