fix: harden immutable replay id uniqueness

2026-05-06 13:40:44 +00:00 · 2026-04-12 06:36:16 +01:00
parent eed627d3f2
commit b27595f278
7 changed files with 230 additions and 11 deletions
--- a/src/agents/pi-embedded-runner.sanitize-session-history.test.ts
+++ b/src/agents/pi-embedded-runner.sanitize-session-history.test.ts
@@ -1088,6 +1088,73 @@ describe("sanitizeSessionHistory", () => {
    expect((result[2] as Extract<AgentMessage, { role: "toolResult" }>).toolCallId).toBe("call_1");
  });

+  it("keeps earlier mutable ids from colliding with later preserved signed ids", async () => {
+    setNonGoogleModelApi();
+
+    const sessionManager = makeMockSessionManager();
+    const messages = castAgentMessages([
+      makeUserMessage("first"),
+      makeAssistantMessage([{ type: "toolCall", id: "call_1", name: "read", arguments: {} }]),
+      castAgentMessage({
+        role: "toolResult",
+        toolCallId: "call_1",
+        toolName: "read",
+        content: [{ type: "text", text: "first result" }],
+        isError: false,
+      }),
+      makeUserMessage("second"),
+      makeAssistantMessage(
+        [
+          { type: "thinking", thinking: "internal", thinkingSignature: "sig_1" },
+          { type: "toolCall", id: "call1", name: "read", arguments: {} },
+        ] as unknown as AssistantMessage["content"],
+        { stopReason: "toolUse" },
+      ),
+      castAgentMessage({
+        role: "toolResult",
+        toolCallId: "call1",
+        toolName: "read",
+        content: [{ type: "text", text: "second result" }],
+        isError: false,
+      }),
+      makeUserMessage("retry"),
+    ]);
+
+    const sanitized = await sanitizeSessionHistory({
+      messages,
+      modelApi: "anthropic-messages",
+      provider: "anthropic",
+      modelId: "claude-sonnet-4-6",
+      sessionManager,
+      sessionId: TEST_SESSION_ID,
+    });
+    const validated = await validateReplayTurns({
+      messages: sanitized,
+      modelApi: "anthropic-messages",
+      provider: "anthropic",
+      modelId: "claude-sonnet-4-6",
+      sessionId: TEST_SESSION_ID,
+    });
+
+    const firstAssistant = sanitized[1] as Extract<AgentMessage, { role: "assistant" }>;
+    const secondAssistant = sanitized[4] as Extract<AgentMessage, { role: "assistant" }>;
+    const firstToolCall = firstAssistant.content[0] as { id?: string };
+    const secondToolCall = secondAssistant.content[1] as { id?: string };
+    expect(firstToolCall.id).not.toBe("call1");
+    expect(secondToolCall.id).toBe("call1");
+    expect(firstToolCall.id).not.toBe(secondToolCall.id);
+    expect((sanitized[2] as Extract<AgentMessage, { role: "toolResult" }>).toolCallId).toBe(
+      firstToolCall.id,
+    );
+    expect((sanitized[5] as Extract<AgentMessage, { role: "toolResult" }>).toolCallId).toBe(
+      "call1",
+    );
+    expect((validated[4] as Extract<AgentMessage, { role: "assistant" }>).content).toEqual([
+      { type: "thinking", thinking: "internal", thinkingSignature: "sig_1" },
+      { type: "toolCall", id: "call1", name: "read", arguments: {} },
+    ]);
+  });
+
  it("keeps mutable thinking turns outside exact anthropic replay", async () => {
    setNonGoogleModelApi();

--- a/src/agents/pi-embedded-runner/run/attempt.test.ts
+++ b/src/agents/pi-embedded-runner/run/attempt.test.ts
@@ -1034,6 +1034,51 @@ describe("wrapStreamFnSanitizeMalformedToolCalls", () => {
    ]);
  });

+  it("drops signed thinking turns when sibling replay tool calls reuse an id", async () => {
+    const messages = [
+      {
+        role: "assistant",
+        content: [
+          { type: "thinking", thinking: "internal", thinkingSignature: "sig_1" },
+          { type: "toolCall", id: "call_1", name: "read", arguments: {} },
+          { type: "functionCall", id: "call_1", name: "read", arguments: {} },
+        ],
+      },
+      {
+        role: "user",
+        content: [{ type: "text", text: "retry" }],
+      },
+    ];
+    const baseFn = vi.fn((_model, _context) =>
+      createFakeStream({ events: [], resultMessage: { role: "assistant", content: [] } }),
+    );
+
+    const wrapped = wrapStreamFnSanitizeMalformedToolCalls(
+      baseFn as never,
+      new Set(["read"]),
+      {
+        validateAnthropicTurns: true,
+        preserveSignatures: true,
+        dropThinkingBlocks: false,
+      } as never,
+    );
+    const stream = wrapped(
+      { api: "anthropic-messages" } as never,
+      { messages } as never,
+      {} as never,
+    ) as FakeWrappedStream | Promise<FakeWrappedStream>;
+    await Promise.resolve(stream);
+
+    expect(baseFn).toHaveBeenCalledTimes(1);
+    const seenContext = baseFn.mock.calls[0]?.[1] as { messages: unknown[] };
+    expect(seenContext.messages).toEqual([
+      {
+        role: "user",
+        content: [{ type: "text", text: "retry" }],
+      },
+    ]);
+  });
+
  it("drops signed thinking turns when replay would expose inline sessions_spawn attachments", async () => {
    const attachmentContent = "SIGNED_THINKING_INLINE_ATTACHMENT";
    const messages = [
--- a/src/agents/pi-embedded-runner/run/attempt.tool-call-normalization.ts
+++ b/src/agents/pi-embedded-runner/run/attempt.tool-call-normalization.ts
@@ -267,18 +267,22 @@ function hasUnredactedSessionsSpawnAttachments(block: ReplayToolCallBlock): bool
 }

 function isReplaySafeThinkingTurn(content: unknown[], allowedToolNames?: Set<string>): boolean {
+  const seenToolCallIds = new Set<string>();
  for (const block of content) {
    if (!isReplayToolCallBlock(block)) {
      continue;
    }
    const replayBlock = block;
+    const toolCallId = typeof replayBlock.id === "string" ? replayBlock.id.trim() : "";
    if (
      !replayToolCallHasInput(replayBlock) ||
-      !replayToolCallNonEmptyString(replayBlock.id) ||
+      !toolCallId ||
+      seenToolCallIds.has(toolCallId) ||
      hasUnredactedSessionsSpawnAttachments(replayBlock)
    ) {
      return false;
    }
+    seenToolCallIds.add(toolCallId);
    const rawName = typeof replayBlock.name === "string" ? replayBlock.name : "";
    const resolvedName = resolveReplayToolCallName(rawName, replayBlock.id, allowedToolNames);
    if (!resolvedName || replayBlock.name !== resolvedName) {
--- a/src/agents/session-transcript-repair.test.ts
+++ b/src/agents/session-transcript-repair.test.ts
@@ -411,6 +411,30 @@ describe("sanitizeToolCallInputs", () => {
    expect(out).toEqual([]);
  });

+  it("drops signed-thinking assistant turns when sibling tool calls reuse an id", () => {
+    const input = castAgentMessages([
+      {
+        role: "assistant",
+        content: [
+          {
+            type: "thinking",
+            thinking: "Let me reuse the tool id.",
+            thinkingSignature: "sig_duplicate",
+          },
+          { type: "toolCall", id: "call_shared", name: "read", arguments: { path: "a" } },
+          { type: "toolUse", id: "call_shared", name: "read", input: { path: "b" } },
+        ],
+      },
+    ]);
+
+    const out = sanitizeToolCallInputs(input, {
+      allowedToolNames: ["read"],
+      allowProviderOwnedThinkingReplay: true,
+    });
+
+    expect(out).toEqual([]);
+  });
+
  it("drops signed-thinking assistant turns that would require attachment redaction", () => {
    const secret = "SIGNED_THINKING_ATTACHMENT_SECRET"; // pragma: allowlist secret
    const input = castAgentMessages([
--- a/src/agents/session-transcript-repair.ts
+++ b/src/agents/session-transcript-repair.ts
@@ -201,18 +201,22 @@ function isReplaySafeThinkingAssistantTurn(
  allowedToolNames: Set<string> | null,
 ): boolean {
  let sawToolCall = false;
+  const seenToolCallIds = new Set<string>();
  for (const block of content) {
    if (!isRawToolCallBlock(block)) {
      continue;
    }
    sawToolCall = true;
+    const toolCallId = typeof block.id === "string" ? block.id.trim() : "";
    if (
      !hasToolCallInput(block) ||
-      !hasToolCallId(block) ||
+      !toolCallId ||
+      seenToolCallIds.has(toolCallId) ||
      !hasToolCallName(block, allowedToolNames)
    ) {
      return false;
    }
+    seenToolCallIds.add(toolCallId);
    if (sanitizeToolCallBlock(block) !== block) {
      return false;
    }
--- a/src/agents/tool-call-id.test.ts
+++ b/src/agents/tool-call-id.test.ts
@@ -322,6 +322,52 @@ describe("sanitizeToolCallIdsForCloudCodeAssist", () => {
      expect((out[1] as Extract<AgentMessage, { role: "toolResult" }>).toolCallId).toBe("call_1");
    });

+    it("rewrites earlier mutable ids away from later preserved signed ids", () => {
+      const input = castAgentMessages([
+        {
+          role: "assistant",
+          content: [{ type: "toolCall", id: "call_1", name: "read", arguments: {} }],
+        },
+        {
+          role: "toolResult",
+          toolCallId: "call_1",
+          toolName: "read",
+          content: [{ type: "text", text: "first" }],
+        },
+        {
+          role: "assistant",
+          content: [
+            { type: "thinking", thinking: "internal", thinkingSignature: "sig_1" },
+            { type: "toolCall", id: "call1", name: "read", arguments: {} },
+          ],
+        },
+        {
+          role: "toolResult",
+          toolCallId: "call1",
+          toolName: "read",
+          content: [{ type: "text", text: "second" }],
+        },
+      ]);
+
+      const out = sanitizeToolCallIdsForCloudCodeAssist(input, "strict", {
+        preserveReplaySafeThinkingToolCallIds: true,
+        allowedToolNames: ["read"],
+      });
+
+      expect(out).not.toBe(input);
+      const firstAssistant = out[0] as Extract<AgentMessage, { role: "assistant" }>;
+      const secondAssistant = out[2] as Extract<AgentMessage, { role: "assistant" }>;
+      const firstToolCall = firstAssistant.content?.[0] as { id?: string };
+      const secondToolCall = secondAssistant.content?.[1] as { id?: string };
+      expect(firstToolCall.id).not.toBe("call1");
+      expect(secondToolCall.id).toBe("call1");
+      expect(firstToolCall.id).not.toBe(secondToolCall.id);
+      expect((out[1] as Extract<AgentMessage, { role: "toolResult" }>).toolCallId).toBe(
+        firstToolCall.id,
+      );
+      expect((out[3] as Extract<AgentMessage, { role: "toolResult" }>).toolCallId).toBe("call1");
+    });
+
    it("avoids collisions with alphanumeric-only suffixes", () => {
      const input = buildDuplicateIdCollisionInput();

--- a/src/agents/tool-call-id.ts
+++ b/src/agents/tool-call-id.ts
@@ -111,10 +111,6 @@ function hasToolCallInput(block: ReplaySafeToolCallBlock): boolean {
  return hasInput || hasArguments;
 }

-function hasNonEmptyStringField(value: unknown): value is string {
-  return typeof value === "string" && value.trim().length > 0;
-}
-
 function normalizeAllowedToolNames(allowedToolNames?: Iterable<string>): Set<string> | null {
  if (!allowedToolNames) {
    return null;
@@ -212,6 +208,7 @@ function isReplaySafeThinkingAssistantMessage(

  let sawThinking = false;
  let sawToolCall = false;
+  const seenToolCallIds = new Set<string>();
  for (const block of content) {
    if (isThinkingLikeBlock(block)) {
      sawThinking = true;
@@ -228,18 +225,41 @@ function isReplaySafeThinkingAssistantMessage(
      continue;
    }
    sawToolCall = true;
+    const toolCallId = typeof typedBlock.id === "string" ? typedBlock.id.trim() : "";
    if (
      !hasToolCallInput(typedBlock) ||
-      !hasNonEmptyStringField(typedBlock.id) ||
+      !toolCallId ||
+      seenToolCallIds.has(toolCallId) ||
      !hasReplaySafeToolCallName(typedBlock, allowedToolNames) ||
      toolCallNeedsReplayMutation(typedBlock)
    ) {
      return false;
    }
+    seenToolCallIds.add(toolCallId);
  }
  return sawThinking && sawToolCall;
 }

+function collectReplaySafeThinkingToolIds(
+  messages: AgentMessage[],
+  allowedToolNames: Set<string> | null,
+): Set<string> {
+  const reserved = new Set<string>();
+  for (const message of messages) {
+    if (!message || typeof message !== "object" || message.role !== "assistant") {
+      continue;
+    }
+    const assistant = message as Extract<AgentMessage, { role: "assistant" }>;
+    if (!isReplaySafeThinkingAssistantMessage(assistant, allowedToolNames)) {
+      continue;
+    }
+    for (const toolCall of extractToolCallsFromAssistant(assistant)) {
+      reserved.add(toolCall.id);
+    }
+  }
+  return reserved;
+}
+
 export function isValidCloudCodeAssistToolId(id: string, mode: ToolCallIdMode = "strict"): boolean {
  if (!id || typeof id !== "string") {
    return false;
@@ -308,13 +328,16 @@ function makeUniqueToolId(params: { id: string; used: Set<string>; mode: ToolCal

 function createOccurrenceAwareResolver(
  mode: ToolCallIdMode,
-  options?: { preserveNativeAnthropicToolUseIds?: boolean },
+  options?: {
+    preserveNativeAnthropicToolUseIds?: boolean;
+    reservedIds?: Iterable<string>;
+  },
 ): {
  resolveAssistantId: (id: string) => string;
  resolveToolResultId: (id: string) => string;
  preserveAssistantId: (id: string) => string;
 } {
-  const used = new Set<string>();
+  const used = new Set<string>(options?.reservedIds ?? []);
  const assistantOccurrences = new Map<string, number>();
  const orphanToolResultOccurrences = new Map<string, number>();
  const pendingByRawId = new Map<string, string[]>();
@@ -479,11 +502,17 @@ export function sanitizeToolCallIdsForCloudCodeAssist(
  // duplicate tool-call IDs. Track assistant occurrences in-order so repeated
  // raw IDs receive distinct rewritten IDs, while matching tool results consume
  // the same rewritten IDs in encounter order.
-  const { resolveAssistantId, resolveToolResultId, preserveAssistantId } =
-    createOccurrenceAwareResolver(mode, options);
  const allowedToolNames = normalizeAllowedToolNames(options?.allowedToolNames);
  const preserveReplaySafeThinkingToolCallIds =
    options?.preserveReplaySafeThinkingToolCallIds === true;
+  const reservedIds = preserveReplaySafeThinkingToolCallIds
+    ? collectReplaySafeThinkingToolIds(messages, allowedToolNames)
+    : undefined;
+  const { resolveAssistantId, resolveToolResultId, preserveAssistantId } =
+    createOccurrenceAwareResolver(mode, {
+      ...options,
+      reservedIds,
+    });

  let changed = false;
  const out = messages.map((msg) => {