fix: repair sanitized replay tool results before send (#67620) (thanks @stainlu)

* fix(agents): preserve native Anthropic tool IDs for hybrid providers Fixes #66892 MiniMax and other hybrid providers use api.minimaxi.com/anthropic (modelApi: anthropic-messages), which generates and expects native Anthropic tool_call_ids in toolu_* format. The hybrid replay policy (buildHybridAnthropicOrOpenAIReplayPolicy) applied strict sanitization that stripped underscores from these IDs, causing MiniMax to reject them with error 2013. The native Anthropic provider already preserved these IDs via preserveNativeAnthropicToolUseIds (added in 4613f121ad). This commit enables the same flag for the hybrid anthropic-messages branch, so toolu_* IDs pass through unsanitized while other synthetic IDs still get strict cleanup. * fix(agents): repair sanitized replay tool results before send * fix: repair sanitized replay tool results before send (#67620) (thanks @stainlu) * fix: preserve aborted-span tool results during replay sanitize (#67620) (thanks @stainlu) --------- Co-authored-by: Ayaan Zaidi <hi@obviy.us>
2026-05-06 16:50:43 +00:00 · 2026-04-16 21:08:57 +08:00
parent de129a6530
commit c3c7a9953f
5 changed files with 145 additions and 18 deletions
--- a/src/agents/pi-embedded-runner/run/attempt.tool-call-normalization.test.ts
+++ b/src/agents/pi-embedded-runner/run/attempt.tool-call-normalization.test.ts
@@ -0,0 +1,107 @@
+import type { AgentMessage } from "@mariozechner/pi-agent-core";
+import { describe, expect, it } from "vitest";
+import { sanitizeReplayToolCallIdsForStream } from "./attempt.tool-call-normalization.js";
+
+describe("sanitizeReplayToolCallIdsForStream", () => {
+  it("drops orphaned tool results after strict id sanitization", () => {
+    const messages: AgentMessage[] = [
+      {
+        role: "toolResult",
+        toolCallId: "call_function_av7cbkigmk7x1",
+        toolUseId: "call_function_av7cbkigmk7x1",
+        toolName: "read",
+        content: [{ type: "text", text: "stale" }],
+        isError: false,
+      } as never,
+    ];
+
+    expect(
+      sanitizeReplayToolCallIdsForStream({
+        messages,
+        mode: "strict",
+        repairToolUseResultPairing: true,
+      }),
+    ).toEqual([]);
+  });
+
+  it("keeps matched assistant and tool-result ids aligned", () => {
+    const rawId = "call_function_av7cbkigmk7x1";
+    const messages: AgentMessage[] = [
+      {
+        role: "assistant",
+        content: [{ type: "toolUse", id: rawId, name: "read", input: { path: "." } }],
+      } as never,
+      {
+        role: "toolResult",
+        toolCallId: rawId,
+        toolUseId: rawId,
+        toolName: "read",
+        content: [{ type: "text", text: "ok" }],
+        isError: false,
+      } as never,
+    ];
+
+    const out = sanitizeReplayToolCallIdsForStream({
+      messages,
+      mode: "strict",
+      repairToolUseResultPairing: true,
+    });
+
+    expect(out).toMatchObject([
+      {
+        role: "assistant",
+        content: [{ type: "toolUse", id: "callfunctionav7cbkigmk7x1", name: "read" }],
+      },
+      {
+        role: "toolResult",
+        toolCallId: "callfunctionav7cbkigmk7x1",
+        toolUseId: "callfunctionav7cbkigmk7x1",
+        toolName: "read",
+      },
+    ]);
+  });
+
+  it("keeps real tool results for aborted assistant spans", () => {
+    const rawId = "call_function_av7cbkigmk7x1";
+    const out = sanitizeReplayToolCallIdsForStream({
+      messages: [
+        {
+          role: "assistant",
+          stopReason: "aborted",
+          content: [{ type: "toolUse", id: rawId, name: "read", input: { path: "." } }],
+        } as never,
+        {
+          role: "toolResult",
+          toolCallId: rawId,
+          toolUseId: rawId,
+          toolName: "read",
+          content: [{ type: "text", text: "partial" }],
+          isError: false,
+        } as never,
+        {
+          role: "user",
+          content: [{ type: "text", text: "retry" }],
+        } as never,
+      ],
+      mode: "strict",
+      repairToolUseResultPairing: true,
+    });
+
+    expect(out).toMatchObject([
+      {
+        role: "assistant",
+        stopReason: "aborted",
+        content: [{ type: "toolUse", id: "callfunctionav7cbkigmk7x1", name: "read" }],
+      },
+      {
+        role: "toolResult",
+        toolCallId: "callfunctionav7cbkigmk7x1",
+        toolUseId: "callfunctionav7cbkigmk7x1",
+        toolName: "read",
+      },
+      {
+        role: "user",
+      },
+    ]);
+  });
+});
--- a/src/agents/pi-embedded-runner/run/attempt.tool-call-normalization.ts
+++ b/src/agents/pi-embedded-runner/run/attempt.tool-call-normalization.ts
@@ -6,7 +6,11 @@ import {
  isRedactedSessionsSpawnAttachment,
  sanitizeToolUseResultPairing,
 } from "../../session-transcript-repair.js";
-import { extractToolCallsFromAssistant } from "../../tool-call-id.js";
+import {
+  extractToolCallsFromAssistant,
+  sanitizeToolCallIdsForCloudCodeAssist,
+  type ToolCallIdMode,
+} from "../../tool-call-id.js";
 import { normalizeToolName } from "../../tool-policy.js";
 import { shouldAllowProviderOwnedThinkingReplay } from "../../transcript-policy.js";
 import type { TranscriptPolicy } from "../../transcript-policy.js";
@@ -868,6 +872,25 @@ export function wrapStreamFnTrimToolCallNames(
  };
 }

+export function sanitizeReplayToolCallIdsForStream(params: {
+  messages: AgentMessage[];
+  mode: ToolCallIdMode;
+  allowedToolNames?: Set<string>;
+  preserveNativeAnthropicToolUseIds?: boolean;
+  preserveReplaySafeThinkingToolCallIds?: boolean;
+  repairToolUseResultPairing?: boolean;
+}): AgentMessage[] {
+  const sanitized = sanitizeToolCallIdsForCloudCodeAssist(params.messages, params.mode, {
+    preserveNativeAnthropicToolUseIds: params.preserveNativeAnthropicToolUseIds,
+    preserveReplaySafeThinkingToolCallIds: params.preserveReplaySafeThinkingToolCallIds,
+    allowedToolNames: params.allowedToolNames,
+  });
+  if (!params.repairToolUseResultPairing) {
+    return sanitized;
+  }
+  return sanitizeToolUseResultPairing(sanitized);
+}
+
 export function wrapStreamFnSanitizeMalformedToolCalls(
  baseFn: StreamFn,
  allowedToolNames?: Set<string>,
--- a/src/agents/pi-embedded-runner/run/attempt.ts
+++ b/src/agents/pi-embedded-runner/run/attempt.ts
@@ -115,7 +115,6 @@ import { resolveSystemPromptOverride } from "../../system-prompt-override.js";
 import { buildSystemPromptParams } from "../../system-prompt-params.js";
 import { buildSystemPromptReport } from "../../system-prompt-report.js";
 import { resolveAgentTimeoutMs } from "../../timeout.js";
-import { sanitizeToolCallIdsForCloudCodeAssist } from "../../tool-call-id.js";
 import { UNKNOWN_TOOL_THRESHOLD } from "../../tool-loop-detection.js";
 import {
  resolveTranscriptPolicy,
@@ -225,6 +224,7 @@ import {
  wrapStreamFnRepairMalformedToolCallArguments,
 } from "./attempt.tool-call-argument-repair.js";
 import {
+  sanitizeReplayToolCallIdsForStream,
  wrapStreamFnSanitizeMalformedToolCalls,
  wrapStreamFnTrimToolCallNames,
 } from "./attempt.tool-call-normalization.js";
@@ -1251,25 +1251,23 @@ export async function runEmbeddedAttempt(
          if (!Array.isArray(messages)) {
            return inner(model, context, options);
          }
-          const allowProviderOwnedThinkingReplay = shouldAllowProviderOwnedThinkingReplay({
-            modelApi: (model as { api?: unknown })?.api as string | null | undefined,
-            policy: transcriptPolicy,
-          });
-          const sanitized = sanitizeToolCallIdsForCloudCodeAssist(
-            messages as AgentMessage[],
+          const nextMessages = sanitizeReplayToolCallIdsForStream({
+            messages: messages as AgentMessage[],
            mode,
-            {
-              preserveNativeAnthropicToolUseIds: transcriptPolicy.preserveNativeAnthropicToolUseIds,
-              preserveReplaySafeThinkingToolCallIds: allowProviderOwnedThinkingReplay,
-              allowedToolNames,
-            },
-          );
-          if (sanitized === messages) {
+            allowedToolNames,
+            preserveNativeAnthropicToolUseIds: transcriptPolicy.preserveNativeAnthropicToolUseIds,
+            preserveReplaySafeThinkingToolCallIds: shouldAllowProviderOwnedThinkingReplay({
+              modelApi: (model as { api?: unknown })?.api as string | null | undefined,
+              policy: transcriptPolicy,
+            }),
+            repairToolUseResultPairing: transcriptPolicy.repairToolUseResultPairing,
+          });
+          if (nextMessages === messages) {
            return inner(model, context, options);
          }
          const nextContext = {
            ...(context as unknown as Record<string, unknown>),
-            messages: sanitized,
+            messages: nextMessages,
          } as unknown;
          return inner(model, nextContext as typeof context, options);
        };
--- a/src/plugins/provider-replay-helpers.test.ts
+++ b/src/plugins/provider-replay-helpers.test.ts
@@ -93,7 +93,6 @@ describe("provider replay helpers", () => {
  });

  it("builds hybrid anthropic or openai replay policy", () => {
-    // Sonnet 4.6 preserves thinking blocks even when flag is set
    const sonnet46Policy = buildHybridAnthropicOrOpenAIReplayPolicy(
      {
        provider: "minimax",
@@ -107,7 +106,6 @@ describe("provider replay helpers", () => {
    });
    expect(sonnet46Policy).not.toHaveProperty("dropThinkingBlocks");

-    // Legacy model still drops
    expect(
      buildHybridAnthropicOrOpenAIReplayPolicy(
        {