From 1e35eed2772e65ceef2e2cb8739643927c0ddf6c Mon Sep 17 00:00:00 2001
From: Shakker <shakkerdroid@gmail.com>
Date: Sun, 12 Apr 2026 06:07:32 +0100
Subject: [PATCH] fix: tighten signed-thinking tool-result trust

---
 ...pi-embedded-helpers.validate-turns.test.ts |  31 ++---
 src/agents/pi-embedded-helpers/turns.ts       |  16 ---
 .../pi-embedded-runner/run/attempt.test.ts    | 118 ++++++++++++------
 .../run/attempt.tool-call-normalization.ts    |  80 ++++++++----
 4 files changed, 148 insertions(+), 97 deletions(-)

diff --git a/src/agents/pi-embedded-helpers.validate-turns.test.ts b/src/agents/pi-embedded-helpers.validate-turns.test.ts
index f87e61d1014..01eb27e32ed 100644
--- a/src/agents/pi-embedded-helpers.validate-turns.test.ts
+++ b/src/agents/pi-embedded-helpers.validate-turns.test.ts
@@ -555,7 +555,7 @@ describe("validateAnthropicTurns strips dangling tool_use blocks", () => {
     ]);
   });
 
-  it("preserves signed-thinking turns when the matching tool result is embedded in user content", () => {
+  it("drops signed-thinking turns when the only matching tool result is embedded in user content", () => {
     const msgs = asMessages([
       { role: "user", content: [{ type: "text", text: "Use tool" }] },
       {
@@ -577,13 +577,13 @@ describe("validateAnthropicTurns strips dangling tool_use blocks", () => {
     const result = validateAnthropicTurns(msgs);
 
     expect(result).toHaveLength(3);
+    expect((result[1] as { role?: unknown }).role).toBe("assistant");
     expect((result[1] as { content?: unknown[] }).content).toEqual([
-      { type: "thinking", thinking: "internal", thinkingSignature: "sig_1" },
-      { type: "toolUse", id: "tool-1", name: "gateway", arguments: {} },
+      { type: "text", text: "[tool calls omitted]" },
     ]);
   });
 
-  it("preserves signed-thinking turns when a tool result carries both stale and current id aliases", () => {
+  it("preserves signed-thinking turns when a trusted tool result carries both stale and current id aliases", () => {
     const msgs = asMessages([
       { role: "user", content: [{ type: "text", text: "Use tool" }] },
       {
@@ -594,22 +594,19 @@ describe("validateAnthropicTurns strips dangling tool_use blocks", () => {
         ],
       },
       {
-        role: "user",
-        content: [
-          {
-            type: "toolResult",
-            toolUseId: "tool-stale",
-            toolCallId: "tool-current",
-            content: [{ type: "text", text: "ok" }],
-          },
-          { type: "text", text: "Continue" },
-        ],
+        role: "toolResult",
+        toolUseId: "tool-stale",
+        toolCallId: "tool-current",
+        toolName: "gateway",
+        content: [{ type: "text", text: "ok" }],
+        isError: false,
       },
+      { role: "user", content: [{ type: "text", text: "Continue" }] },
     ]);
 
     const result = validateAnthropicTurns(msgs);
 
-    expect(result).toHaveLength(3);
+    expect(result).toHaveLength(4);
     expect((result[1] as { content?: unknown[] }).content).toEqual([
       { type: "thinking", thinking: "internal", thinkingSignature: "sig_1" },
       { type: "toolCall", id: "tool-current", name: "gateway", arguments: {} },
@@ -683,9 +680,7 @@ describe("validateAnthropicTurns strips dangling tool_use blocks", () => {
 
     expect(result).toHaveLength(3);
     const assistantContent = (result[1] as { content?: unknown[] }).content;
-    expect(assistantContent).toEqual([
-      { type: "text", text: "[tool calls omitted]" },
-    ]);
+    expect(assistantContent).toEqual([{ type: "text", text: "[tool calls omitted]" }]);
   });
 
   it("is replay-safe across repeated validation passes", () => {
diff --git a/src/agents/pi-embedded-helpers/turns.ts b/src/agents/pi-embedded-helpers/turns.ts
index e9dd1dd15e7..c64efbe4ff9 100644
--- a/src/agents/pi-embedded-helpers/turns.ts
+++ b/src/agents/pi-embedded-helpers/turns.ts
@@ -120,22 +120,6 @@ function collectTrustedToolResultMatches(message: AgentMessage): Map<string, Set
     addMatch(extractToolResultMatchIds(record), extractToolResultMatchName(record));
   }
 
-  const content = (message as { content?: unknown }).content;
-  if (!Array.isArray(content)) {
-    return matches;
-  }
-
-  for (const block of content) {
-    if (!block || typeof block !== "object") {
-      continue;
-    }
-    const record = block as Record<string, unknown>;
-    if (record.type !== "toolResult" && record.type !== "tool") {
-      continue;
-    }
-    addMatch(extractToolResultMatchIds(record), extractToolResultMatchName(record));
-  }
-
   return matches;
 }
 
diff --git a/src/agents/pi-embedded-runner/run/attempt.test.ts b/src/agents/pi-embedded-runner/run/attempt.test.ts
index 5460876a3e3..76b15ecd148 100644
--- a/src/agents/pi-embedded-runner/run/attempt.test.ts
+++ b/src/agents/pi-embedded-runner/run/attempt.test.ts
@@ -907,15 +907,11 @@ describe("wrapStreamFnSanitizeMalformedToolCalls", () => {
       createFakeStream({ events: [], resultMessage: { role: "assistant", content: [] } }),
     );
 
-    const wrapped = wrapStreamFnSanitizeMalformedToolCalls(
-      baseFn as never,
-      new Set(["read"]),
-      {
-        validateAnthropicTurns: true,
-        preserveSignatures: true,
-        dropThinkingBlocks: false,
-      } as never,
-    );
+    const wrapped = wrapStreamFnSanitizeMalformedToolCalls(baseFn as never, new Set(["read"]), {
+      validateAnthropicTurns: true,
+      preserveSignatures: true,
+      dropThinkingBlocks: false,
+    } as never);
     const stream = wrapped({} as never, { messages } as never, {} as never) as
       | FakeWrappedStream
       | Promise<FakeWrappedStream>;
@@ -943,15 +939,11 @@ describe("wrapStreamFnSanitizeMalformedToolCalls", () => {
       createFakeStream({ events: [], resultMessage: { role: "assistant", content: [] } }),
     );
 
-    const wrapped = wrapStreamFnSanitizeMalformedToolCalls(
-      baseFn as never,
-      new Set(["read"]),
-      {
-        validateAnthropicTurns: true,
-        preserveSignatures: true,
-        dropThinkingBlocks: false,
-      } as never,
-    );
+    const wrapped = wrapStreamFnSanitizeMalformedToolCalls(baseFn as never, new Set(["read"]), {
+      validateAnthropicTurns: true,
+      preserveSignatures: true,
+      dropThinkingBlocks: false,
+    } as never);
     const stream = wrapped({} as never, { messages } as never, {} as never) as
       | FakeWrappedStream
       | Promise<FakeWrappedStream>;
@@ -980,15 +972,11 @@ describe("wrapStreamFnSanitizeMalformedToolCalls", () => {
       createFakeStream({ events: [], resultMessage: { role: "assistant", content: [] } }),
     );
 
-    const wrapped = wrapStreamFnSanitizeMalformedToolCalls(
-      baseFn as never,
-      new Set(["read"]),
-      {
-        validateAnthropicTurns: true,
-        preserveSignatures: true,
-        dropThinkingBlocks: false,
-      } as never,
-    );
+    const wrapped = wrapStreamFnSanitizeMalformedToolCalls(baseFn as never, new Set(["read"]), {
+      validateAnthropicTurns: true,
+      preserveSignatures: true,
+      dropThinkingBlocks: false,
+    } as never);
     const stream = wrapped(
       { api: "anthropic-messages" } as never,
       { messages } as never,
@@ -1024,15 +1012,11 @@ describe("wrapStreamFnSanitizeMalformedToolCalls", () => {
       createFakeStream({ events: [], resultMessage: { role: "assistant", content: [] } }),
     );
 
-    const wrapped = wrapStreamFnSanitizeMalformedToolCalls(
-      baseFn as never,
-      new Set(["read"]),
-      {
-        validateAnthropicTurns: true,
-        preserveSignatures: true,
-        dropThinkingBlocks: false,
-      } as never,
-    );
+    const wrapped = wrapStreamFnSanitizeMalformedToolCalls(baseFn as never, new Set(["read"]), {
+      validateAnthropicTurns: true,
+      preserveSignatures: true,
+      dropThinkingBlocks: false,
+    } as never);
     const stream = wrapped(
       { api: "bedrock-converse-stream" } as never,
       { messages } as never,
@@ -1180,11 +1164,9 @@ describe("wrapStreamFnSanitizeMalformedToolCalls", () => {
       createFakeStream({ events: [], resultMessage: { role: "assistant", content: [] } }),
     );
 
-    const wrapped = wrapStreamFnSanitizeMalformedToolCalls(
-      baseFn as never,
-      new Set(["read"]),
-      { validateAnthropicTurns: true } as never,
-    );
+    const wrapped = wrapStreamFnSanitizeMalformedToolCalls(baseFn as never, new Set(["read"]), {
+      validateAnthropicTurns: true,
+    } as never);
     const stream = wrapped(
       { api: "openai-completions" } as never,
       { messages } as never,
@@ -1703,6 +1685,60 @@ describe("wrapStreamFnSanitizeMalformedToolCalls", () => {
     ]);
   });
 
+  it("drops embedded Anthropic user tool_result blocks when signed-thinking replay must stay provider-owned", async () => {
+    const messages = [
+      {
+        role: "assistant",
+        content: [
+          { type: "thinking", thinking: "internal", thinkingSignature: "sig_1" },
+          { type: "toolUse", id: "call_1", name: "read", input: { path: "." } },
+        ],
+      },
+      {
+        role: "user",
+        content: [
+          {
+            type: "toolResult",
+            toolUseId: "call_1",
+            content: [{ type: "text", text: "embedded result" }],
+          },
+          { type: "text", text: "retry" },
+        ],
+      },
+    ];
+    const baseFn = vi.fn((_model, _context) =>
+      createFakeStream({ events: [], resultMessage: { role: "assistant", content: [] } }),
+    );
+
+    const wrapped = wrapStreamFnSanitizeMalformedToolCalls(baseFn as never, new Set(["read"]), {
+      validateGeminiTurns: false,
+      validateAnthropicTurns: true,
+      preserveSignatures: true,
+      dropThinkingBlocks: false,
+    });
+    const stream = wrapped(
+      { api: "anthropic-messages" } as never,
+      { messages } as never,
+      {} as never,
+    ) as FakeWrappedStream | Promise<FakeWrappedStream>;
+    await Promise.resolve(stream);
+
+    expect(baseFn).toHaveBeenCalledTimes(1);
+    const seenContext = baseFn.mock.calls[0]?.[1] as {
+      messages: Array<{ role?: string; content?: unknown[] }>;
+    };
+    expect(seenContext.messages).toEqual([
+      {
+        role: "assistant",
+        content: [{ type: "text", text: "[tool calls omitted]" }],
+      },
+      {
+        role: "user",
+        content: [{ type: "text", text: "retry" }],
+      },
+    ]);
+  });
+
   it.each(["toolCall", "functionCall"] as const)(
     "preserves matching Anthropic user tool_result blocks after %s replay turns",
     async (toolCallType) => {
diff --git a/src/agents/pi-embedded-runner/run/attempt.tool-call-normalization.ts b/src/agents/pi-embedded-runner/run/attempt.tool-call-normalization.ts
index 2ee5bd8e6ca..4991db0a086 100644
--- a/src/agents/pi-embedded-runner/run/attempt.tool-call-normalization.ts
+++ b/src/agents/pi-embedded-runner/run/attempt.tool-call-normalization.ts
@@ -6,8 +6,8 @@ import {
   isRedactedSessionsSpawnAttachment,
   sanitizeToolUseResultPairing,
 } from "../../session-transcript-repair.js";
-import { shouldAllowProviderOwnedThinkingReplay } from "../../transcript-policy.js";
 import { normalizeToolName } from "../../tool-policy.js";
+import { shouldAllowProviderOwnedThinkingReplay } from "../../transcript-policy.js";
 import type { TranscriptPolicy } from "../../transcript-policy.js";
 
 function resolveCaseInsensitiveAllowedToolName(
@@ -231,6 +231,9 @@ type ReplayToolCallSanitizeReport = {
 type AnthropicToolResultContentBlock = {
   type?: unknown;
   toolUseId?: unknown;
+  toolCallId?: unknown;
+  tool_use_id?: unknown;
+  tool_call_id?: unknown;
 };
 
 function isThinkingLikeReplayBlock(block: unknown): boolean {
@@ -263,15 +266,12 @@ function hasUnredactedSessionsSpawnAttachments(block: ReplayToolCallBlock): bool
   return false;
 }
 
-function isReplaySafeThinkingTurn(
-  content: unknown[],
-  allowedToolNames?: Set<string>,
-): boolean {
+function isReplaySafeThinkingTurn(content: unknown[], allowedToolNames?: Set<string>): boolean {
   for (const block of content) {
     if (!isReplayToolCallBlock(block)) {
       continue;
     }
-    const replayBlock = block as ReplayToolCallBlock;
+    const replayBlock = block;
     if (
       !replayToolCallHasInput(replayBlock) ||
       !replayToolCallNonEmptyString(replayBlock.id) ||
@@ -409,9 +409,30 @@ function sanitizeReplayToolCallInputs(
   };
 }
 
-function sanitizeAnthropicReplayToolResults(messages: AgentMessage[]): AgentMessage[] {
+function extractAnthropicReplayToolResultIds(block: AnthropicToolResultContentBlock): string[] {
+  const ids: string[] = [];
+  for (const value of [block.toolUseId, block.toolCallId, block.tool_use_id, block.tool_call_id]) {
+    if (typeof value !== "string") {
+      continue;
+    }
+    const trimmed = value.trim();
+    if (!trimmed || ids.includes(trimmed)) {
+      continue;
+    }
+    ids.push(trimmed);
+  }
+  return ids;
+}
+
+function sanitizeAnthropicReplayToolResults(
+  messages: AgentMessage[],
+  options?: {
+    allowEmbeddedUserToolResults?: boolean;
+  },
+): AgentMessage[] {
   let changed = false;
   const out: AgentMessage[] = [];
+  const allowEmbeddedUserToolResults = options?.allowEmbeddedUserToolResults !== false;
 
   for (let index = 0; index < messages.length; index += 1) {
     const message = messages[index];
@@ -450,10 +471,19 @@ function sanitizeAnthropicReplayToolResults(messages: AgentMessage[]): AgentMess
         return true;
       }
       const typedBlock = block as AnthropicToolResultContentBlock;
-      if (typedBlock.type !== "toolResult" || typeof typedBlock.toolUseId !== "string") {
+      if (typedBlock.type !== "toolResult" && typedBlock.type !== "tool") {
         return true;
       }
-      return validToolUseIds.size > 0 && validToolUseIds.has(typedBlock.toolUseId);
+      if (!allowEmbeddedUserToolResults) {
+        changed = true;
+        return false;
+      }
+      const resultIds = extractAnthropicReplayToolResultIds(typedBlock);
+      if (resultIds.length === 0) {
+        changed = true;
+        return false;
+      }
+      return validToolUseIds.size > 0 && resultIds.some((id) => validToolUseIds.has(id));
     });
 
     if (nextContent.length === message.content.length) {
@@ -638,24 +668,30 @@ export function wrapStreamFnSanitizeMalformedToolCalls(
     if (!Array.isArray(messages)) {
       return baseFn(model, context, options);
     }
+    const allowProviderOwnedThinkingReplay = shouldAllowProviderOwnedThinkingReplay({
+      modelApi: (model as { api?: unknown })?.api as string | null | undefined,
+      policy: {
+        validateAnthropicTurns: transcriptPolicy?.validateAnthropicTurns === true,
+        preserveSignatures: transcriptPolicy?.preserveSignatures === true,
+        dropThinkingBlocks: transcriptPolicy?.dropThinkingBlocks === true,
+      },
+    });
     const sanitized = sanitizeReplayToolCallInputs(
       messages as AgentMessage[],
       allowedToolNames,
-      shouldAllowProviderOwnedThinkingReplay({
-        modelApi: (model as { api?: unknown })?.api as string | null | undefined,
-        policy: {
-          validateAnthropicTurns: transcriptPolicy?.validateAnthropicTurns === true,
-          preserveSignatures: transcriptPolicy?.preserveSignatures === true,
-          dropThinkingBlocks: transcriptPolicy?.dropThinkingBlocks === true,
-        },
-      }),
+      allowProviderOwnedThinkingReplay,
     );
-    if (sanitized.messages === messages) {
-      return baseFn(model, context, options);
-    }
-    let nextMessages = sanitizeToolUseResultPairing(sanitized.messages);
+    const replayInputsChanged = sanitized.messages !== messages;
+    let nextMessages = replayInputsChanged
+      ? sanitizeToolUseResultPairing(sanitized.messages)
+      : sanitized.messages;
     if (transcriptPolicy?.validateAnthropicTurns) {
-      nextMessages = sanitizeAnthropicReplayToolResults(nextMessages);
+      nextMessages = sanitizeAnthropicReplayToolResults(nextMessages, {
+        allowEmbeddedUserToolResults: !allowProviderOwnedThinkingReplay,
+      });
+    }
+    if (nextMessages === messages) {
+      return baseFn(model, context, options);
     }
     if (sanitized.droppedAssistantMessages > 0 || transcriptPolicy?.validateAnthropicTurns) {
       if (transcriptPolicy?.validateGeminiTurns) {