fix: gate immutable thinking replay by transcript policy

2026-05-06 11:00:42 +00:00 · 2026-04-12 05:11:08 +01:00
parent 5c244b3bd2
commit c6e2298950
6 changed files with 132 additions and 14 deletions
--- a/src/agents/pi-embedded-runner.sanitize-session-history.test.ts
+++ b/src/agents/pi-embedded-runner.sanitize-session-history.test.ts
@@ -968,6 +968,48 @@ describe("sanitizeSessionHistory", () => {
    ]);
  });

+  it("uses immutable thinking replay for anthropic-compatible providers when policy preserves signatures", async () => {
+    setNonGoogleModelApi();
+
+    const messages = castAgentMessages([
+      makeUserMessage("retry"),
+      makeAssistantMessage([
+        {
+          type: "thinking",
+          thinking: "internal",
+          thinkingSignature: "sig_1",
+        },
+        { type: "toolCall", id: "call_1", name: " read ", arguments: {} },
+      ] as unknown as AssistantMessage["content"]),
+    ]);
+
+    const result = await sanitizeAnthropicHistory({
+      provider: "anthropic-vertex",
+      messages,
+      policy: {
+        sanitizeMode: "full",
+        sanitizeToolCallIds: true,
+        toolCallIdMode: "strict",
+        preserveNativeAnthropicToolUseIds: true,
+        repairToolUseResultPairing: true,
+        preserveSignatures: true,
+        sanitizeThoughtSignatures: undefined,
+        sanitizeThinkingSignatures: false,
+        dropThinkingBlocks: false,
+        applyGoogleTurnOrdering: false,
+        validateGeminiTurns: false,
+        validateAnthropicTurns: true,
+        allowSyntheticToolResults: true,
+      },
+    });
+
+    expect(result).toHaveLength(1);
+    expect(result[0]).toMatchObject({
+      role: "user",
+      content: "retry",
+    });
+  });
+
  it("keeps mutable thinking turns outside exact anthropic replay", async () => {
    setNonGoogleModelApi();

--- a/src/agents/pi-embedded-runner/replay-history.ts
+++ b/src/agents/pi-embedded-runner/replay-history.ts
@@ -29,7 +29,10 @@ import {
  stripToolResultDetails,
 } from "../session-transcript-repair.js";
 import type { TranscriptPolicy } from "../transcript-policy.js";
-import { resolveTranscriptPolicy } from "../transcript-policy.js";
+import {
+  resolveTranscriptPolicy,
+  shouldAllowProviderOwnedThinkingReplay,
+} from "../transcript-policy.js";
 import {
  makeZeroUsageSnapshot,
  normalizeUsage,
@@ -418,10 +421,10 @@ export async function sanitizeSessionHistory(params: {
    : sanitizedImages;
  const sanitizedToolCalls = sanitizeToolCallInputs(droppedThinking, {
    allowedToolNames: params.allowedToolNames,
-    allowProviderOwnedThinkingReplay:
-      policy.validateAnthropicTurns &&
-      params.provider === "anthropic" &&
-      params.modelApi === "anthropic-messages",
+    allowProviderOwnedThinkingReplay: shouldAllowProviderOwnedThinkingReplay({
+      modelApi: params.modelApi,
+      policy,
+    }),
  });
  const repairedTools = policy.repairToolUseResultPairing
    ? sanitizeToolUseResultPairing(sanitizedToolCalls, {
--- a/src/agents/pi-embedded-runner/run/attempt.test.ts
+++ b/src/agents/pi-embedded-runner/run/attempt.test.ts
@@ -910,7 +910,11 @@ describe("wrapStreamFnSanitizeMalformedToolCalls", () => {
    const wrapped = wrapStreamFnSanitizeMalformedToolCalls(
      baseFn as never,
      new Set(["read"]),
-      { validateAnthropicTurns: true } as never,
+      {
+        validateAnthropicTurns: true,
+        preserveSignatures: true,
+        dropThinkingBlocks: false,
+      } as never,
    );
    const stream = wrapped({} as never, { messages } as never, {} as never) as
      | FakeWrappedStream
@@ -942,7 +946,11 @@ describe("wrapStreamFnSanitizeMalformedToolCalls", () => {
    const wrapped = wrapStreamFnSanitizeMalformedToolCalls(
      baseFn as never,
      new Set(["read"]),
-      { validateAnthropicTurns: true } as never,
+      {
+        validateAnthropicTurns: true,
+        preserveSignatures: true,
+        dropThinkingBlocks: false,
+      } as never,
    );
    const stream = wrapped({} as never, { messages } as never, {} as never) as
      | FakeWrappedStream
@@ -975,7 +983,11 @@ describe("wrapStreamFnSanitizeMalformedToolCalls", () => {
    const wrapped = wrapStreamFnSanitizeMalformedToolCalls(
      baseFn as never,
      new Set(["read"]),
-      { validateAnthropicTurns: true } as never,
+      {
+        validateAnthropicTurns: true,
+        preserveSignatures: true,
+        dropThinkingBlocks: false,
+      } as never,
    );
    const stream = wrapped(
      { api: "anthropic-messages" } as never,
@@ -1024,7 +1036,11 @@ describe("wrapStreamFnSanitizeMalformedToolCalls", () => {
    const wrapped = wrapStreamFnSanitizeMalformedToolCalls(
      baseFn as never,
      new Set(["sessions_spawn"]),
-      { validateAnthropicTurns: true } as never,
+      {
+        validateAnthropicTurns: true,
+        preserveSignatures: true,
+        dropThinkingBlocks: false,
+      } as never,
    );
    const stream = wrapped(
      { api: "anthropic-messages" } as never,
@@ -1079,7 +1095,11 @@ describe("wrapStreamFnSanitizeMalformedToolCalls", () => {
    const wrapped = wrapStreamFnSanitizeMalformedToolCalls(
      baseFn as never,
      new Set(["sessions_spawn"]),
-      { validateAnthropicTurns: true } as never,
+      {
+        validateAnthropicTurns: true,
+        preserveSignatures: true,
+        dropThinkingBlocks: false,
+      } as never,
    );
    const stream = wrapped(
      { api: "anthropic-messages" } as never,
--- a/src/agents/pi-embedded-runner/run/attempt.tool-call-normalization.ts
+++ b/src/agents/pi-embedded-runner/run/attempt.tool-call-normalization.ts
@@ -6,6 +6,7 @@ import {
  isRedactedSessionsSpawnAttachment,
  sanitizeToolUseResultPairing,
 } from "../../session-transcript-repair.js";
+import { shouldAllowProviderOwnedThinkingReplay } from "../../transcript-policy.js";
 import { normalizeToolName } from "../../tool-policy.js";
 import type { TranscriptPolicy } from "../../transcript-policy.js";

@@ -626,7 +627,10 @@ export function wrapStreamFnTrimToolCallNames(
 export function wrapStreamFnSanitizeMalformedToolCalls(
  baseFn: StreamFn,
  allowedToolNames?: Set<string>,
-  transcriptPolicy?: Pick<TranscriptPolicy, "validateGeminiTurns" | "validateAnthropicTurns">,
+  transcriptPolicy?: Pick<
+    TranscriptPolicy,
+    "validateGeminiTurns" | "validateAnthropicTurns" | "preserveSignatures" | "dropThinkingBlocks"
+  >,
 ): StreamFn {
  return (model, context, options) => {
    const ctx = context as unknown as { messages?: unknown };
@@ -637,8 +641,14 @@ export function wrapStreamFnSanitizeMalformedToolCalls(
    const sanitized = sanitizeReplayToolCallInputs(
      messages as AgentMessage[],
      allowedToolNames,
-      transcriptPolicy?.validateAnthropicTurns === true &&
-        (model as { api?: unknown })?.api === "anthropic-messages",
+      shouldAllowProviderOwnedThinkingReplay({
+        modelApi: (model as { api?: unknown })?.api as string | null | undefined,
+        policy: {
+          validateAnthropicTurns: transcriptPolicy?.validateAnthropicTurns === true,
+          preserveSignatures: transcriptPolicy?.preserveSignatures === true,
+          dropThinkingBlocks: transcriptPolicy?.dropThinkingBlocks === true,
+        },
+      }),
    );
    if (sanitized.messages === messages) {
      return baseFn(model, context, options);
--- a/src/agents/transcript-policy.test.ts
+++ b/src/agents/transcript-policy.test.ts
@@ -178,10 +178,13 @@ vi.mock("../plugins/provider-runtime.js", async () => {
 });

 let resolveTranscriptPolicy: typeof import("./transcript-policy.js").resolveTranscriptPolicy;
+let shouldAllowProviderOwnedThinkingReplay: typeof import("./transcript-policy.js").shouldAllowProviderOwnedThinkingReplay;

 describe("resolveTranscriptPolicy", () => {
  beforeAll(async () => {
-    ({ resolveTranscriptPolicy } = await import("./transcript-policy.js"));
+    ({ resolveTranscriptPolicy, shouldAllowProviderOwnedThinkingReplay } = await import(
+      "./transcript-policy.js"
+    ));
  });

  beforeEach(() => {
@@ -404,6 +407,34 @@ describe("resolveTranscriptPolicy", () => {
    expect(policy.preserveSignatures).toBe(preserveSignatures);
  });

+  it("allows immutable provider-owned thinking replay for anthropic-compatible native replay policies", () => {
+    const policy = resolveTranscriptPolicy({
+      provider: "minimax",
+      modelId: "MiniMax-M2.7",
+      modelApi: "anthropic-messages",
+    });
+    expect(
+      shouldAllowProviderOwnedThinkingReplay({
+        modelApi: "anthropic-messages",
+        policy,
+      }),
+    ).toBe(true);
+  });
+
+  it("does not allow immutable provider-owned thinking replay for strict openai-compatible replay", () => {
+    const policy = resolveTranscriptPolicy({
+      provider: "vllm",
+      modelId: "gemma-3-27b",
+      modelApi: "openai-completions",
+    });
+    expect(
+      shouldAllowProviderOwnedThinkingReplay({
+        modelApi: "openai-completions",
+        policy,
+      }),
+    ).toBe(false);
+  });
+
  it("enables turn-ordering and assistant-merge for strict OpenAI-compatible providers (#38962)", () => {
    const policy = resolveTranscriptPolicy({
      provider: "vllm",
--- a/src/agents/transcript-policy.ts
+++ b/src/agents/transcript-policy.ts
@@ -29,6 +29,18 @@ export type TranscriptPolicy = {
  allowSyntheticToolResults: boolean;
 };

+export function shouldAllowProviderOwnedThinkingReplay(params: {
+  modelApi?: string | null;
+  policy: Pick<TranscriptPolicy, "validateAnthropicTurns" | "preserveSignatures" | "dropThinkingBlocks">;
+}): boolean {
+  return (
+    params.modelApi === "anthropic-messages" &&
+    params.policy.validateAnthropicTurns === true &&
+    params.policy.preserveSignatures === true &&
+    params.policy.dropThinkingBlocks !== true
+  );
+}
+
 const DEFAULT_TRANSCRIPT_POLICY: TranscriptPolicy = {
  sanitizeMode: "images-only",
  sanitizeToolCallIds: false,