diff --git a/src/agents/pi-embedded-runner.sanitize-session-history.test.ts b/src/agents/pi-embedded-runner.sanitize-session-history.test.ts index a0b47d63bda..7f6a9be8e05 100644 --- a/src/agents/pi-embedded-runner.sanitize-session-history.test.ts +++ b/src/agents/pi-embedded-runner.sanitize-session-history.test.ts @@ -1088,6 +1088,73 @@ describe("sanitizeSessionHistory", () => { expect((result[2] as Extract).toolCallId).toBe("call_1"); }); + it("keeps earlier mutable ids from colliding with later preserved signed ids", async () => { + setNonGoogleModelApi(); + + const sessionManager = makeMockSessionManager(); + const messages = castAgentMessages([ + makeUserMessage("first"), + makeAssistantMessage([{ type: "toolCall", id: "call_1", name: "read", arguments: {} }]), + castAgentMessage({ + role: "toolResult", + toolCallId: "call_1", + toolName: "read", + content: [{ type: "text", text: "first result" }], + isError: false, + }), + makeUserMessage("second"), + makeAssistantMessage( + [ + { type: "thinking", thinking: "internal", thinkingSignature: "sig_1" }, + { type: "toolCall", id: "call1", name: "read", arguments: {} }, + ] as unknown as AssistantMessage["content"], + { stopReason: "toolUse" }, + ), + castAgentMessage({ + role: "toolResult", + toolCallId: "call1", + toolName: "read", + content: [{ type: "text", text: "second result" }], + isError: false, + }), + makeUserMessage("retry"), + ]); + + const sanitized = await sanitizeSessionHistory({ + messages, + modelApi: "anthropic-messages", + provider: "anthropic", + modelId: "claude-sonnet-4-6", + sessionManager, + sessionId: TEST_SESSION_ID, + }); + const validated = await validateReplayTurns({ + messages: sanitized, + modelApi: "anthropic-messages", + provider: "anthropic", + modelId: "claude-sonnet-4-6", + sessionId: TEST_SESSION_ID, + }); + + const firstAssistant = sanitized[1] as Extract; + const secondAssistant = sanitized[4] as Extract; + const firstToolCall = firstAssistant.content[0] as { id?: string }; + const secondToolCall = secondAssistant.content[1] as { id?: string }; + expect(firstToolCall.id).not.toBe("call1"); + expect(secondToolCall.id).toBe("call1"); + expect(firstToolCall.id).not.toBe(secondToolCall.id); + expect((sanitized[2] as Extract).toolCallId).toBe( + firstToolCall.id, + ); + expect((sanitized[5] as Extract).toolCallId).toBe( + "call1", + ); + expect((validated[4] as Extract).content).toEqual([ + { type: "thinking", thinking: "internal", thinkingSignature: "sig_1" }, + { type: "toolCall", id: "call1", name: "read", arguments: {} }, + ]); + }); + it("keeps mutable thinking turns outside exact anthropic replay", async () => { setNonGoogleModelApi(); diff --git a/src/agents/pi-embedded-runner/run/attempt.test.ts b/src/agents/pi-embedded-runner/run/attempt.test.ts index 76b15ecd148..a1660f35e8c 100644 --- a/src/agents/pi-embedded-runner/run/attempt.test.ts +++ b/src/agents/pi-embedded-runner/run/attempt.test.ts @@ -1034,6 +1034,51 @@ describe("wrapStreamFnSanitizeMalformedToolCalls", () => { ]); }); + it("drops signed thinking turns when sibling replay tool calls reuse an id", async () => { + const messages = [ + { + role: "assistant", + content: [ + { type: "thinking", thinking: "internal", thinkingSignature: "sig_1" }, + { type: "toolCall", id: "call_1", name: "read", arguments: {} }, + { type: "functionCall", id: "call_1", name: "read", arguments: {} }, + ], + }, + { + role: "user", + content: [{ type: "text", text: "retry" }], + }, + ]; + const baseFn = vi.fn((_model, _context) => + createFakeStream({ events: [], resultMessage: { role: "assistant", content: [] } }), + ); + + const wrapped = wrapStreamFnSanitizeMalformedToolCalls( + baseFn as never, + new Set(["read"]), + { + validateAnthropicTurns: true, + preserveSignatures: true, + dropThinkingBlocks: false, + } as never, + ); + const stream = wrapped( + { api: "anthropic-messages" } as never, + { messages } as never, + {} as never, + ) as FakeWrappedStream | Promise; + await Promise.resolve(stream); + + expect(baseFn).toHaveBeenCalledTimes(1); + const seenContext = baseFn.mock.calls[0]?.[1] as { messages: unknown[] }; + expect(seenContext.messages).toEqual([ + { + role: "user", + content: [{ type: "text", text: "retry" }], + }, + ]); + }); + it("drops signed thinking turns when replay would expose inline sessions_spawn attachments", async () => { const attachmentContent = "SIGNED_THINKING_INLINE_ATTACHMENT"; const messages = [ diff --git a/src/agents/pi-embedded-runner/run/attempt.tool-call-normalization.ts b/src/agents/pi-embedded-runner/run/attempt.tool-call-normalization.ts index 4991db0a086..75052a7c766 100644 --- a/src/agents/pi-embedded-runner/run/attempt.tool-call-normalization.ts +++ b/src/agents/pi-embedded-runner/run/attempt.tool-call-normalization.ts @@ -267,18 +267,22 @@ function hasUnredactedSessionsSpawnAttachments(block: ReplayToolCallBlock): bool } function isReplaySafeThinkingTurn(content: unknown[], allowedToolNames?: Set): boolean { + const seenToolCallIds = new Set(); for (const block of content) { if (!isReplayToolCallBlock(block)) { continue; } const replayBlock = block; + const toolCallId = typeof replayBlock.id === "string" ? replayBlock.id.trim() : ""; if ( !replayToolCallHasInput(replayBlock) || - !replayToolCallNonEmptyString(replayBlock.id) || + !toolCallId || + seenToolCallIds.has(toolCallId) || hasUnredactedSessionsSpawnAttachments(replayBlock) ) { return false; } + seenToolCallIds.add(toolCallId); const rawName = typeof replayBlock.name === "string" ? replayBlock.name : ""; const resolvedName = resolveReplayToolCallName(rawName, replayBlock.id, allowedToolNames); if (!resolvedName || replayBlock.name !== resolvedName) { diff --git a/src/agents/session-transcript-repair.test.ts b/src/agents/session-transcript-repair.test.ts index bffcc328083..eb5e64d8176 100644 --- a/src/agents/session-transcript-repair.test.ts +++ b/src/agents/session-transcript-repair.test.ts @@ -411,6 +411,30 @@ describe("sanitizeToolCallInputs", () => { expect(out).toEqual([]); }); + it("drops signed-thinking assistant turns when sibling tool calls reuse an id", () => { + const input = castAgentMessages([ + { + role: "assistant", + content: [ + { + type: "thinking", + thinking: "Let me reuse the tool id.", + thinkingSignature: "sig_duplicate", + }, + { type: "toolCall", id: "call_shared", name: "read", arguments: { path: "a" } }, + { type: "toolUse", id: "call_shared", name: "read", input: { path: "b" } }, + ], + }, + ]); + + const out = sanitizeToolCallInputs(input, { + allowedToolNames: ["read"], + allowProviderOwnedThinkingReplay: true, + }); + + expect(out).toEqual([]); + }); + it("drops signed-thinking assistant turns that would require attachment redaction", () => { const secret = "SIGNED_THINKING_ATTACHMENT_SECRET"; // pragma: allowlist secret const input = castAgentMessages([ diff --git a/src/agents/session-transcript-repair.ts b/src/agents/session-transcript-repair.ts index 81cd70ad378..84fffbf2d01 100644 --- a/src/agents/session-transcript-repair.ts +++ b/src/agents/session-transcript-repair.ts @@ -201,18 +201,22 @@ function isReplaySafeThinkingAssistantTurn( allowedToolNames: Set | null, ): boolean { let sawToolCall = false; + const seenToolCallIds = new Set(); for (const block of content) { if (!isRawToolCallBlock(block)) { continue; } sawToolCall = true; + const toolCallId = typeof block.id === "string" ? block.id.trim() : ""; if ( !hasToolCallInput(block) || - !hasToolCallId(block) || + !toolCallId || + seenToolCallIds.has(toolCallId) || !hasToolCallName(block, allowedToolNames) ) { return false; } + seenToolCallIds.add(toolCallId); if (sanitizeToolCallBlock(block) !== block) { return false; } diff --git a/src/agents/tool-call-id.test.ts b/src/agents/tool-call-id.test.ts index 749cf6061ad..cf8864ed26a 100644 --- a/src/agents/tool-call-id.test.ts +++ b/src/agents/tool-call-id.test.ts @@ -322,6 +322,52 @@ describe("sanitizeToolCallIdsForCloudCodeAssist", () => { expect((out[1] as Extract).toolCallId).toBe("call_1"); }); + it("rewrites earlier mutable ids away from later preserved signed ids", () => { + const input = castAgentMessages([ + { + role: "assistant", + content: [{ type: "toolCall", id: "call_1", name: "read", arguments: {} }], + }, + { + role: "toolResult", + toolCallId: "call_1", + toolName: "read", + content: [{ type: "text", text: "first" }], + }, + { + role: "assistant", + content: [ + { type: "thinking", thinking: "internal", thinkingSignature: "sig_1" }, + { type: "toolCall", id: "call1", name: "read", arguments: {} }, + ], + }, + { + role: "toolResult", + toolCallId: "call1", + toolName: "read", + content: [{ type: "text", text: "second" }], + }, + ]); + + const out = sanitizeToolCallIdsForCloudCodeAssist(input, "strict", { + preserveReplaySafeThinkingToolCallIds: true, + allowedToolNames: ["read"], + }); + + expect(out).not.toBe(input); + const firstAssistant = out[0] as Extract; + const secondAssistant = out[2] as Extract; + const firstToolCall = firstAssistant.content?.[0] as { id?: string }; + const secondToolCall = secondAssistant.content?.[1] as { id?: string }; + expect(firstToolCall.id).not.toBe("call1"); + expect(secondToolCall.id).toBe("call1"); + expect(firstToolCall.id).not.toBe(secondToolCall.id); + expect((out[1] as Extract).toolCallId).toBe( + firstToolCall.id, + ); + expect((out[3] as Extract).toolCallId).toBe("call1"); + }); + it("avoids collisions with alphanumeric-only suffixes", () => { const input = buildDuplicateIdCollisionInput(); diff --git a/src/agents/tool-call-id.ts b/src/agents/tool-call-id.ts index 071008968cc..752bb323128 100644 --- a/src/agents/tool-call-id.ts +++ b/src/agents/tool-call-id.ts @@ -111,10 +111,6 @@ function hasToolCallInput(block: ReplaySafeToolCallBlock): boolean { return hasInput || hasArguments; } -function hasNonEmptyStringField(value: unknown): value is string { - return typeof value === "string" && value.trim().length > 0; -} - function normalizeAllowedToolNames(allowedToolNames?: Iterable): Set | null { if (!allowedToolNames) { return null; @@ -212,6 +208,7 @@ function isReplaySafeThinkingAssistantMessage( let sawThinking = false; let sawToolCall = false; + const seenToolCallIds = new Set(); for (const block of content) { if (isThinkingLikeBlock(block)) { sawThinking = true; @@ -228,18 +225,41 @@ function isReplaySafeThinkingAssistantMessage( continue; } sawToolCall = true; + const toolCallId = typeof typedBlock.id === "string" ? typedBlock.id.trim() : ""; if ( !hasToolCallInput(typedBlock) || - !hasNonEmptyStringField(typedBlock.id) || + !toolCallId || + seenToolCallIds.has(toolCallId) || !hasReplaySafeToolCallName(typedBlock, allowedToolNames) || toolCallNeedsReplayMutation(typedBlock) ) { return false; } + seenToolCallIds.add(toolCallId); } return sawThinking && sawToolCall; } +function collectReplaySafeThinkingToolIds( + messages: AgentMessage[], + allowedToolNames: Set | null, +): Set { + const reserved = new Set(); + for (const message of messages) { + if (!message || typeof message !== "object" || message.role !== "assistant") { + continue; + } + const assistant = message as Extract; + if (!isReplaySafeThinkingAssistantMessage(assistant, allowedToolNames)) { + continue; + } + for (const toolCall of extractToolCallsFromAssistant(assistant)) { + reserved.add(toolCall.id); + } + } + return reserved; +} + export function isValidCloudCodeAssistToolId(id: string, mode: ToolCallIdMode = "strict"): boolean { if (!id || typeof id !== "string") { return false; @@ -308,13 +328,16 @@ function makeUniqueToolId(params: { id: string; used: Set; mode: ToolCal function createOccurrenceAwareResolver( mode: ToolCallIdMode, - options?: { preserveNativeAnthropicToolUseIds?: boolean }, + options?: { + preserveNativeAnthropicToolUseIds?: boolean; + reservedIds?: Iterable; + }, ): { resolveAssistantId: (id: string) => string; resolveToolResultId: (id: string) => string; preserveAssistantId: (id: string) => string; } { - const used = new Set(); + const used = new Set(options?.reservedIds ?? []); const assistantOccurrences = new Map(); const orphanToolResultOccurrences = new Map(); const pendingByRawId = new Map(); @@ -479,11 +502,17 @@ export function sanitizeToolCallIdsForCloudCodeAssist( // duplicate tool-call IDs. Track assistant occurrences in-order so repeated // raw IDs receive distinct rewritten IDs, while matching tool results consume // the same rewritten IDs in encounter order. - const { resolveAssistantId, resolveToolResultId, preserveAssistantId } = - createOccurrenceAwareResolver(mode, options); const allowedToolNames = normalizeAllowedToolNames(options?.allowedToolNames); const preserveReplaySafeThinkingToolCallIds = options?.preserveReplaySafeThinkingToolCallIds === true; + const reservedIds = preserveReplaySafeThinkingToolCallIds + ? collectReplaySafeThinkingToolIds(messages, allowedToolNames) + : undefined; + const { resolveAssistantId, resolveToolResultId, preserveAssistantId } = + createOccurrenceAwareResolver(mode, { + ...options, + reservedIds, + }); let changed = false; const out = messages.map((msg) => {