fix: preserve replay-safe signed tool ids

2026-05-06 13:30:42 +00:00 · 2026-04-12 06:10:06 +01:00
parent 1e35eed277
commit eed627d3f2
5 changed files with 297 additions and 12 deletions
--- a/src/agents/pi-embedded-runner.sanitize-session-history.test.ts
+++ b/src/agents/pi-embedded-runner.sanitize-session-history.test.ts
@@ -1038,6 +1038,56 @@ describe("sanitizeSessionHistory", () => {
    });
  });

+  it.each([
+    {
+      provider: "anthropic",
+      modelApi: "anthropic-messages",
+      label: "anthropic",
+    },
+    {
+      provider: "amazon-bedrock",
+      modelApi: "bedrock-converse-stream",
+      label: "bedrock",
+    },
+  ])("preserves replay-safe signed tool ids for $label history", async ({ provider, modelApi }) => {
+    setNonGoogleModelApi();
+
+    const messages = castAgentMessages([
+      makeUserMessage("retry"),
+      makeAssistantMessage([
+        {
+          type: "thinking",
+          thinking: "internal",
+          thinkingSignature: "sig_1",
+        },
+        { type: "toolCall", id: "call_1", name: "read", arguments: {} },
+      ] as unknown as AssistantMessage["content"]),
+      castAgentMessage({
+        role: "toolResult",
+        toolCallId: "call_1",
+        toolName: "read",
+        content: [{ type: "text", text: "ok" }],
+        isError: false,
+      }),
+    ]);
+
+    const result = await sanitizeAnthropicHistory({
+      provider,
+      modelApi,
+      messages,
+    });
+
+    expect((result[1] as Extract<AgentMessage, { role: "assistant" }>).content).toEqual([
+      {
+        type: "thinking",
+        thinking: "internal",
+        thinkingSignature: "sig_1",
+      },
+      { type: "toolCall", id: "call_1", name: "read", arguments: {} },
+    ]);
+    expect((result[2] as Extract<AgentMessage, { role: "toolResult" }>).toolCallId).toBe("call_1");
+  });
+
  it("keeps mutable thinking turns outside exact anthropic replay", async () => {
    setNonGoogleModelApi();

--- a/src/agents/pi-embedded-runner/replay-history.ts
+++ b/src/agents/pi-embedded-runner/replay-history.ts
@@ -33,6 +33,7 @@ import {
  resolveTranscriptPolicy,
  shouldAllowProviderOwnedThinkingReplay,
 } from "../transcript-policy.js";
+import { sanitizeToolCallIdsForCloudCodeAssist } from "../tool-call-id.js";
 import {
  makeZeroUsageSnapshot,
  normalizeUsage,
@@ -403,12 +404,16 @@ export async function sanitizeSessionHistory(params: {
      model: params.model,
    });
  const withInterSessionMarkers = annotateInterSessionUserMessages(params.messages);
+  const allowProviderOwnedThinkingReplay = shouldAllowProviderOwnedThinkingReplay({
+    modelApi: params.modelApi,
+    policy,
+  });
  const sanitizedImages = await sanitizeSessionMessagesImages(
    withInterSessionMarkers,
    "session:history",
    {
      sanitizeMode: policy.sanitizeMode,
-      sanitizeToolCallIds: policy.sanitizeToolCallIds,
+      sanitizeToolCallIds: policy.sanitizeToolCallIds && !allowProviderOwnedThinkingReplay,
      toolCallIdMode: policy.toolCallIdMode,
      preserveNativeAnthropicToolUseIds: policy.preserveNativeAnthropicToolUseIds,
      preserveSignatures: policy.preserveSignatures,
@@ -421,16 +426,21 @@ export async function sanitizeSessionHistory(params: {
    : sanitizedImages;
  const sanitizedToolCalls = sanitizeToolCallInputs(droppedThinking, {
    allowedToolNames: params.allowedToolNames,
-    allowProviderOwnedThinkingReplay: shouldAllowProviderOwnedThinkingReplay({
-      modelApi: params.modelApi,
-      policy,
-    }),
+    allowProviderOwnedThinkingReplay,
  });
+  const sanitizedToolIds =
+    policy.sanitizeToolCallIds && policy.toolCallIdMode
+      ? sanitizeToolCallIdsForCloudCodeAssist(sanitizedToolCalls, policy.toolCallIdMode, {
+          preserveNativeAnthropicToolUseIds: policy.preserveNativeAnthropicToolUseIds,
+          preserveReplaySafeThinkingToolCallIds: allowProviderOwnedThinkingReplay,
+          allowedToolNames: params.allowedToolNames,
+        })
+      : sanitizedToolCalls;
  const repairedTools = policy.repairToolUseResultPairing
-    ? sanitizeToolUseResultPairing(sanitizedToolCalls, {
+    ? sanitizeToolUseResultPairing(sanitizedToolIds, {
        erroredAssistantResultPolicy: "drop",
      })
-    : sanitizedToolCalls;
+    : sanitizedToolIds;
  const sanitizedToolResults = stripToolResultDetails(repairedTools);
  const sanitizedCompactionUsage = ensureAssistantUsageSnapshots(
    stripStaleAssistantUsageBeforeLatestCompaction(sanitizedToolResults),
--- a/src/agents/pi-embedded-runner/run/attempt.ts
+++ b/src/agents/pi-embedded-runner/run/attempt.ts
@@ -111,7 +111,10 @@ import { buildSystemPromptParams } from "../../system-prompt-params.js";
 import { buildSystemPromptReport } from "../../system-prompt-report.js";
 import { resolveAgentTimeoutMs } from "../../timeout.js";
 import { sanitizeToolCallIdsForCloudCodeAssist } from "../../tool-call-id.js";
-import { resolveTranscriptPolicy } from "../../transcript-policy.js";
+import {
+  resolveTranscriptPolicy,
+  shouldAllowProviderOwnedThinkingReplay,
+} from "../../transcript-policy.js";
 import { normalizeUsage, type NormalizedUsage } from "../../usage.js";
 import { DEFAULT_BOOTSTRAP_FILENAME } from "../../workspace.js";
 import { isRunnerAbortError } from "../abort.js";
@@ -1156,11 +1159,17 @@ export async function runEmbeddedAttempt(
          if (!Array.isArray(messages)) {
            return inner(model, context, options);
          }
+          const allowProviderOwnedThinkingReplay = shouldAllowProviderOwnedThinkingReplay({
+            modelApi: (model as { api?: unknown })?.api as string | null | undefined,
+            policy: transcriptPolicy,
+          });
          const sanitized = sanitizeToolCallIdsForCloudCodeAssist(
            messages as AgentMessage[],
            mode,
            {
              preserveNativeAnthropicToolUseIds: transcriptPolicy.preserveNativeAnthropicToolUseIds,
+              preserveReplaySafeThinkingToolCallIds: allowProviderOwnedThinkingReplay,
+              allowedToolNames,
            },
          );
          if (sanitized === messages) {
--- a/src/agents/tool-call-id.test.ts
+++ b/src/agents/tool-call-id.test.ts
@@ -293,6 +293,35 @@ describe("sanitizeToolCallIdsForCloudCodeAssist", () => {
      ).toBe("call123fc123");
    });

+    it("preserves replay-safe signed-thinking tool ids when requested", () => {
+      const input = castAgentMessages([
+        {
+          role: "assistant",
+          content: [
+            { type: "thinking", thinking: "internal", thinkingSignature: "sig_1" },
+            { type: "toolCall", id: "call_1", name: "read", arguments: {} },
+          ],
+        },
+        {
+          role: "toolResult",
+          toolCallId: "call_1",
+          toolName: "read",
+          content: [{ type: "text", text: "ok" }],
+        },
+      ]);
+
+      const out = sanitizeToolCallIdsForCloudCodeAssist(input, "strict", {
+        preserveReplaySafeThinkingToolCallIds: true,
+        allowedToolNames: ["read"],
+      });
+
+      expect(out).toBe(input);
+      expect(((out[0] as Extract<AgentMessage, { role: "assistant" }>).content?.[1] as { id?: string }).id).toBe(
+        "call_1",
+      );
+      expect((out[1] as Extract<AgentMessage, { role: "toolResult" }>).toolCallId).toBe("call_1");
+    });
+
    it("avoids collisions with alphanumeric-only suffixes", () => {
      const input = buildDuplicateIdCollisionInput();

--- a/src/agents/tool-call-id.ts
+++ b/src/agents/tool-call-id.ts
@@ -1,8 +1,13 @@
 import { createHash } from "node:crypto";
 import type { AgentMessage } from "@mariozechner/pi-agent-core";
+import { normalizeLowercaseStringOrEmpty } from "../shared/string-coerce.js";

 export type ToolCallIdMode = "strict" | "strict9";
 const NATIVE_ANTHROPIC_TOOL_USE_ID_RE = /^toolu_[A-Za-z0-9_]+$/;
+const REDACTED_SESSIONS_SPAWN_ATTACHMENT_CONTENT = "__OPENCLAW_REDACTED__";
+const SESSIONS_SPAWN_ATTACHMENT_METADATA_KEYS = ["name", "encoding", "mimeType"] as const;
+const TOOL_CALL_NAME_MAX_CHARS = 64;
+const TOOL_CALL_NAME_RE = /^[A-Za-z0-9_:.-]+$/;

 const STRICT9_LEN = 9;
 const TOOL_CALL_TYPES = new Set(["toolCall", "toolUse", "functionCall"]);
@@ -12,6 +17,14 @@ export type ToolCallLike = {
  name?: string;
 };

+type ReplaySafeToolCallBlock = {
+  type?: unknown;
+  id?: unknown;
+  name?: unknown;
+  input?: unknown;
+  arguments?: unknown;
+};
+
 /**
 * Sanitize a tool call ID to be compatible with various providers.
 *
@@ -83,6 +96,150 @@ export function extractToolResultId(
  return null;
 }

+function isThinkingLikeBlock(block: unknown): boolean {
+  if (!block || typeof block !== "object") {
+    return false;
+  }
+  const type = (block as { type?: unknown }).type;
+  return type === "thinking" || type === "redacted_thinking";
+}
+
+function hasToolCallInput(block: ReplaySafeToolCallBlock): boolean {
+  const hasInput = "input" in block ? block.input !== undefined && block.input !== null : false;
+  const hasArguments =
+    "arguments" in block ? block.arguments !== undefined && block.arguments !== null : false;
+  return hasInput || hasArguments;
+}
+
+function hasNonEmptyStringField(value: unknown): value is string {
+  return typeof value === "string" && value.trim().length > 0;
+}
+
+function normalizeAllowedToolNames(allowedToolNames?: Iterable<string>): Set<string> | null {
+  if (!allowedToolNames) {
+    return null;
+  }
+  const normalized = new Set<string>();
+  for (const name of allowedToolNames) {
+    if (typeof name !== "string") {
+      continue;
+    }
+    const trimmed = name.trim();
+    if (!trimmed) {
+      continue;
+    }
+    normalized.add(normalizeLowercaseStringOrEmpty(trimmed));
+  }
+  return normalized.size > 0 ? normalized : null;
+}
+
+function isRedactedSessionsSpawnAttachment(item: unknown): boolean {
+  if (!item || typeof item !== "object") {
+    return false;
+  }
+  const attachment = item as Record<string, unknown>;
+  if (attachment.content !== REDACTED_SESSIONS_SPAWN_ATTACHMENT_CONTENT) {
+    return false;
+  }
+  for (const key of Object.keys(attachment)) {
+    if (key === "content") {
+      continue;
+    }
+    if (!(SESSIONS_SPAWN_ATTACHMENT_METADATA_KEYS as readonly string[]).includes(key)) {
+      return false;
+    }
+    if (typeof attachment[key] !== "string" || (attachment[key] as string).trim().length === 0) {
+      return false;
+    }
+  }
+  return true;
+}
+
+function toolCallNeedsReplayMutation(block: ReplaySafeToolCallBlock): boolean {
+  const rawName = typeof block.name === "string" ? block.name : undefined;
+  const trimmedName = rawName?.trim();
+  if (rawName && rawName !== trimmedName) {
+    return true;
+  }
+  if (normalizeLowercaseStringOrEmpty(trimmedName) !== "sessions_spawn") {
+    return false;
+  }
+  for (const payload of [block.arguments, block.input]) {
+    if (!payload || typeof payload !== "object") {
+      continue;
+    }
+    const attachments = (payload as { attachments?: unknown }).attachments;
+    if (!Array.isArray(attachments)) {
+      continue;
+    }
+    for (const attachment of attachments) {
+      if (!isRedactedSessionsSpawnAttachment(attachment)) {
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+function hasReplaySafeToolCallName(
+  block: ReplaySafeToolCallBlock,
+  allowedToolNames: Set<string> | null,
+): boolean {
+  if (typeof block.name !== "string") {
+    return false;
+  }
+  const trimmed = block.name.trim();
+  if (!trimmed) {
+    return false;
+  }
+  if (trimmed.length > TOOL_CALL_NAME_MAX_CHARS || !TOOL_CALL_NAME_RE.test(trimmed)) {
+    return false;
+  }
+  if (!allowedToolNames) {
+    return true;
+  }
+  return allowedToolNames.has(normalizeLowercaseStringOrEmpty(trimmed));
+}
+
+function isReplaySafeThinkingAssistantMessage(
+  message: Extract<AgentMessage, { role: "assistant" }>,
+  allowedToolNames: Set<string> | null,
+): boolean {
+  const content = message.content;
+  if (!Array.isArray(content)) {
+    return false;
+  }
+
+  let sawThinking = false;
+  let sawToolCall = false;
+  for (const block of content) {
+    if (isThinkingLikeBlock(block)) {
+      sawThinking = true;
+      continue;
+    }
+    if (!block || typeof block !== "object") {
+      continue;
+    }
+    const typedBlock = block as ReplaySafeToolCallBlock;
+    if (
+      typeof typedBlock.type !== "string" ||
+      !TOOL_CALL_TYPES.has(typedBlock.type)
+    ) {
+      continue;
+    }
+    sawToolCall = true;
+    if (
+      !hasToolCallInput(typedBlock) ||
+      !hasNonEmptyStringField(typedBlock.id) ||
+      !hasReplaySafeToolCallName(typedBlock, allowedToolNames) ||
+      toolCallNeedsReplayMutation(typedBlock)
+    ) {
+      return false;
+    }
+  }
+  return sawThinking && sawToolCall;
+}
+
 export function isValidCloudCodeAssistToolId(id: string, mode: ToolCallIdMode = "strict"): boolean {
  if (!id || typeof id !== "string") {
    return false;
@@ -155,6 +312,7 @@ function createOccurrenceAwareResolver(
 ): {
  resolveAssistantId: (id: string) => string;
  resolveToolResultId: (id: string) => string;
+  preserveAssistantId: (id: string) => string;
 } {
  const used = new Set<string>();
  const assistantOccurrences = new Map<string, number>();
@@ -218,7 +376,18 @@ function createOccurrenceAwareResolver(
    return allocate(`${id}:tool_result:${occurrence}`);
  };

-  return { resolveAssistantId, resolveToolResultId };
+  const preserveAssistantId = (id: string): string => {
+    used.add(id);
+    const pending = pendingByRawId.get(id);
+    if (pending) {
+      pending.push(id);
+    } else {
+      pendingByRawId.set(id, [id]);
+    }
+    return id;
+  };
+
+  return { resolveAssistantId, resolveToolResultId, preserveAssistantId };
 }

 function rewriteAssistantToolCallIds(params: {
@@ -298,7 +467,11 @@ function rewriteToolResultIds(params: {
 export function sanitizeToolCallIdsForCloudCodeAssist(
  messages: AgentMessage[],
  mode: ToolCallIdMode = "strict",
-  options?: { preserveNativeAnthropicToolUseIds?: boolean },
+  options?: {
+    preserveNativeAnthropicToolUseIds?: boolean;
+    preserveReplaySafeThinkingToolCallIds?: boolean;
+    allowedToolNames?: Iterable<string>;
+  },
 ): AgentMessage[] {
  // Strict mode: only [a-zA-Z0-9]
  // Strict9 mode: only [a-zA-Z0-9], length 9 (Mistral tool call requirement)
@@ -306,7 +479,11 @@ export function sanitizeToolCallIdsForCloudCodeAssist(
  // duplicate tool-call IDs. Track assistant occurrences in-order so repeated
  // raw IDs receive distinct rewritten IDs, while matching tool results consume
  // the same rewritten IDs in encounter order.
-  const { resolveAssistantId, resolveToolResultId } = createOccurrenceAwareResolver(mode, options);
+  const { resolveAssistantId, resolveToolResultId, preserveAssistantId } =
+    createOccurrenceAwareResolver(mode, options);
+  const allowedToolNames = normalizeAllowedToolNames(options?.allowedToolNames);
+  const preserveReplaySafeThinkingToolCallIds =
+    options?.preserveReplaySafeThinkingToolCallIds === true;

  let changed = false;
  const out = messages.map((msg) => {
@@ -315,8 +492,18 @@ export function sanitizeToolCallIdsForCloudCodeAssist(
    }
    const role = (msg as { role?: unknown }).role;
    if (role === "assistant") {
+      const assistant = msg as Extract<AgentMessage, { role: "assistant" }>;
+      if (
+        preserveReplaySafeThinkingToolCallIds &&
+        isReplaySafeThinkingAssistantMessage(assistant, allowedToolNames)
+      ) {
+        for (const toolCall of extractToolCallsFromAssistant(assistant)) {
+          preserveAssistantId(toolCall.id);
+        }
+        return msg;
+      }
      const next = rewriteAssistantToolCallIds({
-        message: msg as Extract<AgentMessage, { role: "assistant" }>,
+        message: assistant,
        resolveId: resolveAssistantId,
      });
      if (next !== msg) {