fix: drop conflicting signed replay turns before mutation

2026-05-06 13:40:44 +00:00 · 2026-04-12 06:52:43 +01:00
parent 91465f620b
commit 941aca5e5e
4 changed files with 144 additions and 3 deletions
--- a/src/agents/pi-embedded-runner.sanitize-session-history.test.ts
+++ b/src/agents/pi-embedded-runner.sanitize-session-history.test.ts
@@ -17,6 +17,7 @@ import {
 } from "./pi-embedded-runner.sanitize-session-history.test-harness.js";
 import { validateReplayTurns } from "./pi-embedded-runner/replay-history.js";
 import { castAgentMessage, castAgentMessages } from "./test-helpers/agent-message-fixtures.js";
+import { extractToolCallsFromAssistant } from "./tool-call-id.js";
 import type { TranscriptPolicy } from "./transcript-policy.js";
 import { makeZeroUsageSnapshot } from "./usage.js";

@@ -1182,6 +1183,93 @@ describe("sanitizeSessionHistory", () => {
    ]);
  });

+  it("drops later preserved signed turns that reuse an earlier raw tool id across the transcript", async () => {
+    setNonGoogleModelApi();
+
+    const sessionManager = makeMockSessionManager();
+    const messages = castAgentMessages([
+      makeUserMessage("first"),
+      makeAssistantMessage(
+        [
+          { type: "thinking", thinking: "internal", thinkingSignature: "sig_1" },
+          { type: "toolCall", id: "call1", name: "read", arguments: {} },
+        ] as unknown as AssistantMessage["content"],
+        { stopReason: "toolUse" },
+      ),
+      castAgentMessage({
+        role: "toolResult",
+        toolCallId: "call1",
+        toolName: "read",
+        content: [{ type: "text", text: "first result" }],
+        isError: false,
+      }),
+      makeUserMessage("second"),
+      makeAssistantMessage(
+        [
+          { type: "thinking", thinking: "internal", thinkingSignature: "sig_2" },
+          { type: "toolCall", id: "call1", name: "read", arguments: {} },
+        ] as unknown as AssistantMessage["content"],
+        { stopReason: "toolUse" },
+      ),
+      castAgentMessage({
+        role: "toolResult",
+        toolCallId: "call1",
+        toolName: "read",
+        content: [{ type: "text", text: "second result" }],
+        isError: false,
+      }),
+      makeUserMessage("retry"),
+    ]);
+
+    const sanitized = await sanitizeSessionHistory({
+      messages,
+      modelApi: "anthropic-messages",
+      provider: "anthropic",
+      modelId: "claude-sonnet-4-6",
+      sessionManager,
+      sessionId: TEST_SESSION_ID,
+    });
+    const validated = await validateReplayTurns({
+      messages: sanitized,
+      modelApi: "anthropic-messages",
+      provider: "anthropic",
+      modelId: "claude-sonnet-4-6",
+      sessionId: TEST_SESSION_ID,
+    });
+
+    expect(
+      sanitized.filter(
+        (message) =>
+          message &&
+          typeof message === "object" &&
+          message.role === "assistant" &&
+          extractToolCallsFromAssistant(message as Extract<AgentMessage, { role: "assistant" }>)
+            .length > 0,
+      ),
+    ).toHaveLength(1);
+    expect(
+      sanitized.filter(
+        (message) => message && typeof message === "object" && message.role === "toolResult",
+      ),
+    ).toHaveLength(1);
+    expect(
+      validated.filter(
+        (message) =>
+          message &&
+          typeof message === "object" &&
+          message.role === "assistant" &&
+          extractToolCallsFromAssistant(message as Extract<AgentMessage, { role: "assistant" }>)
+            .length > 0,
+      ),
+    ).toHaveLength(1);
+    expect(
+      validated.filter(
+        (message) => message && typeof message === "object" && message.role === "toolResult",
+      ),
+    ).toHaveLength(1);
+    expect(JSON.stringify(validated)).not.toContain("[tool calls omitted]");
+  });
+
  it("keeps the earlier anthropic replay prefix stable after a later subagent turn", async () => {
    setNonGoogleModelApi();

--- a/src/agents/pi-embedded-runner/run/attempt.tool-call-normalization.ts
+++ b/src/agents/pi-embedded-runner/run/attempt.tool-call-normalization.ts
@@ -6,6 +6,7 @@ import {
  isRedactedSessionsSpawnAttachment,
  sanitizeToolUseResultPairing,
 } from "../../session-transcript-repair.js";
+import { extractToolCallsFromAssistant } from "../../tool-call-id.js";
 import { normalizeToolName } from "../../tool-policy.js";
 import { shouldAllowProviderOwnedThinkingReplay } from "../../transcript-policy.js";
 import type { TranscriptPolicy } from "../../transcript-policy.js";
@@ -284,7 +285,7 @@ function isReplaySafeThinkingTurn(content: unknown[], allowedToolNames?: Set<str
    }
    seenToolCallIds.add(toolCallId);
    const rawName = typeof replayBlock.name === "string" ? replayBlock.name : "";
-    const resolvedName = resolveReplayToolCallName(rawName, replayBlock.id, allowedToolNames);
+    const resolvedName = resolveReplayToolCallName(rawName, toolCallId, allowedToolNames);
    if (!resolvedName || replayBlock.name !== resolvedName) {
      return false;
    }
@@ -337,6 +338,7 @@ function sanitizeReplayToolCallInputs(
  let changed = false;
  let droppedAssistantMessages = 0;
  const out: AgentMessage[] = [];
+  const claimedReplaySafeToolCallIds = new Set<string>();

  for (const message of messages) {
    if (!message || typeof message !== "object" || message.role !== "assistant") {
@@ -352,7 +354,16 @@ function sanitizeReplayToolCallInputs(
      message.content.some((block) => isThinkingLikeReplayBlock(block)) &&
      message.content.some((block) => isReplayToolCallBlock(block))
    ) {
-      if (isReplaySafeThinkingTurn(message.content, allowedToolNames)) {
+      const replaySafeToolCalls = extractToolCallsFromAssistant(
+        message as Extract<AgentMessage, { role: "assistant" }>,
+      );
+      if (
+        isReplaySafeThinkingTurn(message.content, allowedToolNames) &&
+        replaySafeToolCalls.every((toolCall) => !claimedReplaySafeToolCallIds.has(toolCall.id))
+      ) {
+        for (const toolCall of replaySafeToolCalls) {
+          claimedReplaySafeToolCallIds.add(toolCall.id);
+        }
        out.push(message);
      } else {
        changed = true;
--- a/src/agents/session-transcript-repair.test.ts
+++ b/src/agents/session-transcript-repair.test.ts
@@ -435,6 +435,40 @@ describe("sanitizeToolCallInputs", () => {
    expect(out).toEqual([]);
  });

+  it("drops later signed-thinking assistant turns that reuse an earlier signed tool id", () => {
+    const input = castAgentMessages([
+      {
+        role: "assistant",
+        content: [
+          {
+            type: "thinking",
+            thinking: "First signed replay turn.",
+            thinkingSignature: "sig_first",
+          },
+          { type: "toolCall", id: "call_shared", name: "read", arguments: { path: "a" } },
+        ],
+      },
+      {
+        role: "assistant",
+        content: [
+          {
+            type: "thinking",
+            thinking: "Second signed replay turn.",
+            thinkingSignature: "sig_second",
+          },
+          { type: "toolUse", id: "call_shared", name: "read", input: { path: "b" } },
+        ],
+      },
+    ]);
+
+    const out = sanitizeToolCallInputs(input, {
+      allowedToolNames: ["read"],
+      allowProviderOwnedThinkingReplay: true,
+    });
+
+    expect(out).toEqual([input[0]]);
+  });
+
  it("drops signed-thinking assistant turns that would require attachment redaction", () => {
    const secret = "SIGNED_THINKING_ATTACHMENT_SECRET"; // pragma: allowlist secret
    const input = castAgentMessages([
--- a/src/agents/session-transcript-repair.ts
+++ b/src/agents/session-transcript-repair.ts
@@ -316,6 +316,7 @@ export function repairToolCallInputs(
  const out: AgentMessage[] = [];
  const allowedToolNames = normalizeAllowedToolNames(options?.allowedToolNames);
  const allowProviderOwnedThinkingReplay = options?.allowProviderOwnedThinkingReplay === true;
+  const claimedReplaySafeToolCallIds = new Set<string>();

  for (const msg of messages) {
    if (!msg || typeof msg !== "object") {
@@ -337,7 +338,14 @@ export function repairToolCallInputs(
      // replay. Preserve the turn only if every sibling tool call is already
      // valid and requires no redaction or normalization. Otherwise drop the
      // whole assistant turn rather than mutating provider-owned content.
-      if (isReplaySafeThinkingAssistantTurn(msg.content, allowedToolNames)) {
+      const replaySafeToolCalls = extractToolCallsFromAssistant(msg);
+      if (
+        isReplaySafeThinkingAssistantTurn(msg.content, allowedToolNames) &&
+        replaySafeToolCalls.every((toolCall) => !claimedReplaySafeToolCallIds.has(toolCall.id))
+      ) {
+        for (const toolCall of replaySafeToolCalls) {
+          claimedReplaySafeToolCallIds.add(toolCall.id);
+        }
        out.push(msg);
      } else {
        droppedToolCalls += countRawToolCallBlocks(msg.content);