diff --git a/src/agents/pi-embedded-runner.sanitize-session-history.test.ts b/src/agents/pi-embedded-runner.sanitize-session-history.test.ts index 8c9757cadff..0252f7a6d15 100644 --- a/src/agents/pi-embedded-runner.sanitize-session-history.test.ts +++ b/src/agents/pi-embedded-runner.sanitize-session-history.test.ts @@ -968,6 +968,48 @@ describe("sanitizeSessionHistory", () => { ]); }); + it("uses immutable thinking replay for anthropic-compatible providers when policy preserves signatures", async () => { + setNonGoogleModelApi(); + + const messages = castAgentMessages([ + makeUserMessage("retry"), + makeAssistantMessage([ + { + type: "thinking", + thinking: "internal", + thinkingSignature: "sig_1", + }, + { type: "toolCall", id: "call_1", name: " read ", arguments: {} }, + ] as unknown as AssistantMessage["content"]), + ]); + + const result = await sanitizeAnthropicHistory({ + provider: "anthropic-vertex", + messages, + policy: { + sanitizeMode: "full", + sanitizeToolCallIds: true, + toolCallIdMode: "strict", + preserveNativeAnthropicToolUseIds: true, + repairToolUseResultPairing: true, + preserveSignatures: true, + sanitizeThoughtSignatures: undefined, + sanitizeThinkingSignatures: false, + dropThinkingBlocks: false, + applyGoogleTurnOrdering: false, + validateGeminiTurns: false, + validateAnthropicTurns: true, + allowSyntheticToolResults: true, + }, + }); + + expect(result).toHaveLength(1); + expect(result[0]).toMatchObject({ + role: "user", + content: "retry", + }); + }); + it("keeps mutable thinking turns outside exact anthropic replay", async () => { setNonGoogleModelApi(); diff --git a/src/agents/pi-embedded-runner/replay-history.ts b/src/agents/pi-embedded-runner/replay-history.ts index 45aaf25499c..11f7ed02f75 100644 --- a/src/agents/pi-embedded-runner/replay-history.ts +++ b/src/agents/pi-embedded-runner/replay-history.ts @@ -29,7 +29,10 @@ import { stripToolResultDetails, } from "../session-transcript-repair.js"; import type { TranscriptPolicy } from "../transcript-policy.js"; -import { resolveTranscriptPolicy } from "../transcript-policy.js"; +import { + resolveTranscriptPolicy, + shouldAllowProviderOwnedThinkingReplay, +} from "../transcript-policy.js"; import { makeZeroUsageSnapshot, normalizeUsage, @@ -418,10 +421,10 @@ export async function sanitizeSessionHistory(params: { : sanitizedImages; const sanitizedToolCalls = sanitizeToolCallInputs(droppedThinking, { allowedToolNames: params.allowedToolNames, - allowProviderOwnedThinkingReplay: - policy.validateAnthropicTurns && - params.provider === "anthropic" && - params.modelApi === "anthropic-messages", + allowProviderOwnedThinkingReplay: shouldAllowProviderOwnedThinkingReplay({ + modelApi: params.modelApi, + policy, + }), }); const repairedTools = policy.repairToolUseResultPairing ? sanitizeToolUseResultPairing(sanitizedToolCalls, { diff --git a/src/agents/pi-embedded-runner/run/attempt.test.ts b/src/agents/pi-embedded-runner/run/attempt.test.ts index dbe9e035fed..b96ff7e9533 100644 --- a/src/agents/pi-embedded-runner/run/attempt.test.ts +++ b/src/agents/pi-embedded-runner/run/attempt.test.ts @@ -910,7 +910,11 @@ describe("wrapStreamFnSanitizeMalformedToolCalls", () => { const wrapped = wrapStreamFnSanitizeMalformedToolCalls( baseFn as never, new Set(["read"]), - { validateAnthropicTurns: true } as never, + { + validateAnthropicTurns: true, + preserveSignatures: true, + dropThinkingBlocks: false, + } as never, ); const stream = wrapped({} as never, { messages } as never, {} as never) as | FakeWrappedStream @@ -942,7 +946,11 @@ describe("wrapStreamFnSanitizeMalformedToolCalls", () => { const wrapped = wrapStreamFnSanitizeMalformedToolCalls( baseFn as never, new Set(["read"]), - { validateAnthropicTurns: true } as never, + { + validateAnthropicTurns: true, + preserveSignatures: true, + dropThinkingBlocks: false, + } as never, ); const stream = wrapped({} as never, { messages } as never, {} as never) as | FakeWrappedStream @@ -975,7 +983,11 @@ describe("wrapStreamFnSanitizeMalformedToolCalls", () => { const wrapped = wrapStreamFnSanitizeMalformedToolCalls( baseFn as never, new Set(["read"]), - { validateAnthropicTurns: true } as never, + { + validateAnthropicTurns: true, + preserveSignatures: true, + dropThinkingBlocks: false, + } as never, ); const stream = wrapped( { api: "anthropic-messages" } as never, @@ -1024,7 +1036,11 @@ describe("wrapStreamFnSanitizeMalformedToolCalls", () => { const wrapped = wrapStreamFnSanitizeMalformedToolCalls( baseFn as never, new Set(["sessions_spawn"]), - { validateAnthropicTurns: true } as never, + { + validateAnthropicTurns: true, + preserveSignatures: true, + dropThinkingBlocks: false, + } as never, ); const stream = wrapped( { api: "anthropic-messages" } as never, @@ -1079,7 +1095,11 @@ describe("wrapStreamFnSanitizeMalformedToolCalls", () => { const wrapped = wrapStreamFnSanitizeMalformedToolCalls( baseFn as never, new Set(["sessions_spawn"]), - { validateAnthropicTurns: true } as never, + { + validateAnthropicTurns: true, + preserveSignatures: true, + dropThinkingBlocks: false, + } as never, ); const stream = wrapped( { api: "anthropic-messages" } as never, diff --git a/src/agents/pi-embedded-runner/run/attempt.tool-call-normalization.ts b/src/agents/pi-embedded-runner/run/attempt.tool-call-normalization.ts index b73ad716534..2ee5bd8e6ca 100644 --- a/src/agents/pi-embedded-runner/run/attempt.tool-call-normalization.ts +++ b/src/agents/pi-embedded-runner/run/attempt.tool-call-normalization.ts @@ -6,6 +6,7 @@ import { isRedactedSessionsSpawnAttachment, sanitizeToolUseResultPairing, } from "../../session-transcript-repair.js"; +import { shouldAllowProviderOwnedThinkingReplay } from "../../transcript-policy.js"; import { normalizeToolName } from "../../tool-policy.js"; import type { TranscriptPolicy } from "../../transcript-policy.js"; @@ -626,7 +627,10 @@ export function wrapStreamFnTrimToolCallNames( export function wrapStreamFnSanitizeMalformedToolCalls( baseFn: StreamFn, allowedToolNames?: Set, - transcriptPolicy?: Pick, + transcriptPolicy?: Pick< + TranscriptPolicy, + "validateGeminiTurns" | "validateAnthropicTurns" | "preserveSignatures" | "dropThinkingBlocks" + >, ): StreamFn { return (model, context, options) => { const ctx = context as unknown as { messages?: unknown }; @@ -637,8 +641,14 @@ export function wrapStreamFnSanitizeMalformedToolCalls( const sanitized = sanitizeReplayToolCallInputs( messages as AgentMessage[], allowedToolNames, - transcriptPolicy?.validateAnthropicTurns === true && - (model as { api?: unknown })?.api === "anthropic-messages", + shouldAllowProviderOwnedThinkingReplay({ + modelApi: (model as { api?: unknown })?.api as string | null | undefined, + policy: { + validateAnthropicTurns: transcriptPolicy?.validateAnthropicTurns === true, + preserveSignatures: transcriptPolicy?.preserveSignatures === true, + dropThinkingBlocks: transcriptPolicy?.dropThinkingBlocks === true, + }, + }), ); if (sanitized.messages === messages) { return baseFn(model, context, options); diff --git a/src/agents/transcript-policy.test.ts b/src/agents/transcript-policy.test.ts index 9922ed44b4a..8ad1509771e 100644 --- a/src/agents/transcript-policy.test.ts +++ b/src/agents/transcript-policy.test.ts @@ -178,10 +178,13 @@ vi.mock("../plugins/provider-runtime.js", async () => { }); let resolveTranscriptPolicy: typeof import("./transcript-policy.js").resolveTranscriptPolicy; +let shouldAllowProviderOwnedThinkingReplay: typeof import("./transcript-policy.js").shouldAllowProviderOwnedThinkingReplay; describe("resolveTranscriptPolicy", () => { beforeAll(async () => { - ({ resolveTranscriptPolicy } = await import("./transcript-policy.js")); + ({ resolveTranscriptPolicy, shouldAllowProviderOwnedThinkingReplay } = await import( + "./transcript-policy.js" + )); }); beforeEach(() => { @@ -404,6 +407,34 @@ describe("resolveTranscriptPolicy", () => { expect(policy.preserveSignatures).toBe(preserveSignatures); }); + it("allows immutable provider-owned thinking replay for anthropic-compatible native replay policies", () => { + const policy = resolveTranscriptPolicy({ + provider: "minimax", + modelId: "MiniMax-M2.7", + modelApi: "anthropic-messages", + }); + expect( + shouldAllowProviderOwnedThinkingReplay({ + modelApi: "anthropic-messages", + policy, + }), + ).toBe(true); + }); + + it("does not allow immutable provider-owned thinking replay for strict openai-compatible replay", () => { + const policy = resolveTranscriptPolicy({ + provider: "vllm", + modelId: "gemma-3-27b", + modelApi: "openai-completions", + }); + expect( + shouldAllowProviderOwnedThinkingReplay({ + modelApi: "openai-completions", + policy, + }), + ).toBe(false); + }); + it("enables turn-ordering and assistant-merge for strict OpenAI-compatible providers (#38962)", () => { const policy = resolveTranscriptPolicy({ provider: "vllm", diff --git a/src/agents/transcript-policy.ts b/src/agents/transcript-policy.ts index 4a7b0b0d416..3185c8823e9 100644 --- a/src/agents/transcript-policy.ts +++ b/src/agents/transcript-policy.ts @@ -29,6 +29,18 @@ export type TranscriptPolicy = { allowSyntheticToolResults: boolean; }; +export function shouldAllowProviderOwnedThinkingReplay(params: { + modelApi?: string | null; + policy: Pick; +}): boolean { + return ( + params.modelApi === "anthropic-messages" && + params.policy.validateAnthropicTurns === true && + params.policy.preserveSignatures === true && + params.policy.dropThinkingBlocks !== true + ); +} + const DEFAULT_TRANSCRIPT_POLICY: TranscriptPolicy = { sanitizeMode: "images-only", sanitizeToolCallIds: false,