fix: drop unsafe signed-thinking turns during transcript repair

2026-05-06 08:10:44 +00:00 · 2026-04-12 03:43:20 +01:00
parent 5568cada24
commit 408e07f96b
2 changed files with 77 additions and 12 deletions
--- a/src/agents/session-transcript-repair.test.ts
+++ b/src/agents/session-transcript-repair.test.ts
@@ -380,7 +380,7 @@ describe("sanitizeToolCallInputs", () => {
    expect(types).toEqual(["text", "toolUse"]);
  });

-  it("preserves assistant turns that include thinking blocks", () => {
+  it("drops signed-thinking assistant turns when sibling tool calls are not replay-safe", () => {
    const input = castAgentMessages([
      {
        role: "assistant",
@@ -405,13 +405,37 @@ describe("sanitizeToolCallInputs", () => {

    const out = sanitizeToolCallInputs(input, { allowedToolNames: ["read"] });

-    expect(out).toBe(input);
-    const assistant = out[0] as Extract<AgentMessage, { role: "assistant" }>;
-    const types = Array.isArray(assistant.content)
-      ? assistant.content.map((block) => (block as { type?: unknown }).type)
-      : [];
-    expect(types).toEqual(["thinking", "toolCall"]);
-    expect((assistant.content?.[1] as { name?: unknown })?.name).toBe("gateway");
+    expect(out).toEqual([]);
+  });
+
+  it("drops signed-thinking assistant turns that would require attachment redaction", () => {
+    const secret = "SIGNED_THINKING_ATTACHMENT_SECRET"; // pragma: allowlist secret
+    const input = castAgentMessages([
+      {
+        role: "assistant",
+        content: [
+          {
+            type: "thinking",
+            thinking: "Let me spawn a helper.",
+            thinkingSignature: "sig_spawn",
+          },
+          {
+            type: "toolUse",
+            id: "call_spawn",
+            name: "sessions_spawn",
+            input: {
+              task: "inspect attachment",
+              attachments: [{ name: "snapshot.txt", content: secret }],
+            },
+          },
+        ],
+      },
+    ]);
+
+    const out = sanitizeToolCallInputs(input, { allowedToolNames: ["sessions_spawn"] });
+
+    expect(out).toEqual([]);
+    expect(JSON.stringify(out)).not.toContain(secret);
  });

  it.each([
--- a/src/agents/session-transcript-repair.ts
+++ b/src/agents/session-transcript-repair.ts
@@ -145,6 +145,40 @@ function sanitizeToolCallBlock(block: RawToolCallBlock): RawToolCallBlock {
  return next as RawToolCallBlock;
 }

+function countRawToolCallBlocks(content: unknown[]): number {
+  let count = 0;
+  for (const block of content) {
+    if (isRawToolCallBlock(block)) {
+      count += 1;
+    }
+  }
+  return count;
+}
+
+function isReplaySafeThinkingAssistantTurn(
+  content: unknown[],
+  allowedToolNames: Set<string> | null,
+): boolean {
+  let sawToolCall = false;
+  for (const block of content) {
+    if (!isRawToolCallBlock(block)) {
+      continue;
+    }
+    sawToolCall = true;
+    if (
+      !hasToolCallInput(block) ||
+      !hasToolCallId(block) ||
+      !hasToolCallName(block, allowedToolNames)
+    ) {
+      return false;
+    }
+    if (sanitizeToolCallBlock(block) !== block) {
+      return false;
+    }
+  }
+  return sawToolCall || content.some((block) => isThinkingLikeBlock(block));
+}
+
 function makeMissingToolResult(params: {
  toolCallId: string;
  toolName?: string;
@@ -247,11 +281,18 @@ export function repairToolCallInputs(
      continue;
    }

-    // Preserve provider-owned thinking turns verbatim. Anthropic replays can
-    // reject any historical assistant turn whose signed thinking block no
-    // longer matches the original response, including sibling tool calls.
    if (msg.content.some((block) => isThinkingLikeBlock(block))) {
-      out.push(msg);
+      // Signed Anthropic thinking blocks must remain byte-for-byte stable on
+      // replay. Preserve the turn only if every sibling tool call is already
+      // valid and requires no redaction or normalization. Otherwise drop the
+      // whole assistant turn rather than mutating provider-owned content.
+      if (isReplaySafeThinkingAssistantTurn(msg.content, allowedToolNames)) {
+        out.push(msg);
+      } else {
+        droppedToolCalls += countRawToolCallBlocks(msg.content);
+        droppedAssistantMessages += 1;
+        changed = true;
+      }
      continue;
    }