fix: repair sanitized replay tool results before send (#67620) (thanks @stainlu)

* fix(agents): preserve native Anthropic tool IDs for hybrid providers

Fixes #66892

MiniMax and other hybrid providers use api.minimaxi.com/anthropic
(modelApi: anthropic-messages), which generates and expects native
Anthropic tool_call_ids in toolu_* format. The hybrid replay policy
(buildHybridAnthropicOrOpenAIReplayPolicy) applied strict
sanitization that stripped underscores from these IDs, causing
MiniMax to reject them with error 2013.

The native Anthropic provider already preserved these IDs via
preserveNativeAnthropicToolUseIds (added in 4613f121ad). This
commit enables the same flag for the hybrid anthropic-messages
branch, so toolu_* IDs pass through unsanitized while other
synthetic IDs still get strict cleanup.

* fix(agents): repair sanitized replay tool results before send

* fix: repair sanitized replay tool results before send (#67620) (thanks @stainlu)

* fix: preserve aborted-span tool results during replay sanitize (#67620) (thanks @stainlu)

---------

Co-authored-by: Ayaan Zaidi <hi@obviy.us>
This commit is contained in:
stain lu
2026-04-16 21:08:57 +08:00
committed by GitHub
parent de129a6530
commit c3c7a9953f
5 changed files with 145 additions and 18 deletions

View File

@@ -0,0 +1,107 @@
import type { AgentMessage } from "@mariozechner/pi-agent-core";
import { describe, expect, it } from "vitest";
import { sanitizeReplayToolCallIdsForStream } from "./attempt.tool-call-normalization.js";
describe("sanitizeReplayToolCallIdsForStream", () => {
it("drops orphaned tool results after strict id sanitization", () => {
const messages: AgentMessage[] = [
{
role: "toolResult",
toolCallId: "call_function_av7cbkigmk7x1",
toolUseId: "call_function_av7cbkigmk7x1",
toolName: "read",
content: [{ type: "text", text: "stale" }],
isError: false,
} as never,
];
expect(
sanitizeReplayToolCallIdsForStream({
messages,
mode: "strict",
repairToolUseResultPairing: true,
}),
).toEqual([]);
});
it("keeps matched assistant and tool-result ids aligned", () => {
const rawId = "call_function_av7cbkigmk7x1";
const messages: AgentMessage[] = [
{
role: "assistant",
content: [{ type: "toolUse", id: rawId, name: "read", input: { path: "." } }],
} as never,
{
role: "toolResult",
toolCallId: rawId,
toolUseId: rawId,
toolName: "read",
content: [{ type: "text", text: "ok" }],
isError: false,
} as never,
];
const out = sanitizeReplayToolCallIdsForStream({
messages,
mode: "strict",
repairToolUseResultPairing: true,
});
expect(out).toMatchObject([
{
role: "assistant",
content: [{ type: "toolUse", id: "callfunctionav7cbkigmk7x1", name: "read" }],
},
{
role: "toolResult",
toolCallId: "callfunctionav7cbkigmk7x1",
toolUseId: "callfunctionav7cbkigmk7x1",
toolName: "read",
},
]);
});
it("keeps real tool results for aborted assistant spans", () => {
const rawId = "call_function_av7cbkigmk7x1";
const out = sanitizeReplayToolCallIdsForStream({
messages: [
{
role: "assistant",
stopReason: "aborted",
content: [{ type: "toolUse", id: rawId, name: "read", input: { path: "." } }],
} as never,
{
role: "toolResult",
toolCallId: rawId,
toolUseId: rawId,
toolName: "read",
content: [{ type: "text", text: "partial" }],
isError: false,
} as never,
{
role: "user",
content: [{ type: "text", text: "retry" }],
} as never,
],
mode: "strict",
repairToolUseResultPairing: true,
});
expect(out).toMatchObject([
{
role: "assistant",
stopReason: "aborted",
content: [{ type: "toolUse", id: "callfunctionav7cbkigmk7x1", name: "read" }],
},
{
role: "toolResult",
toolCallId: "callfunctionav7cbkigmk7x1",
toolUseId: "callfunctionav7cbkigmk7x1",
toolName: "read",
},
{
role: "user",
},
]);
});
});

View File

@@ -6,7 +6,11 @@ import {
isRedactedSessionsSpawnAttachment,
sanitizeToolUseResultPairing,
} from "../../session-transcript-repair.js";
import { extractToolCallsFromAssistant } from "../../tool-call-id.js";
import {
extractToolCallsFromAssistant,
sanitizeToolCallIdsForCloudCodeAssist,
type ToolCallIdMode,
} from "../../tool-call-id.js";
import { normalizeToolName } from "../../tool-policy.js";
import { shouldAllowProviderOwnedThinkingReplay } from "../../transcript-policy.js";
import type { TranscriptPolicy } from "../../transcript-policy.js";
@@ -868,6 +872,25 @@ export function wrapStreamFnTrimToolCallNames(
};
}
export function sanitizeReplayToolCallIdsForStream(params: {
messages: AgentMessage[];
mode: ToolCallIdMode;
allowedToolNames?: Set<string>;
preserveNativeAnthropicToolUseIds?: boolean;
preserveReplaySafeThinkingToolCallIds?: boolean;
repairToolUseResultPairing?: boolean;
}): AgentMessage[] {
const sanitized = sanitizeToolCallIdsForCloudCodeAssist(params.messages, params.mode, {
preserveNativeAnthropicToolUseIds: params.preserveNativeAnthropicToolUseIds,
preserveReplaySafeThinkingToolCallIds: params.preserveReplaySafeThinkingToolCallIds,
allowedToolNames: params.allowedToolNames,
});
if (!params.repairToolUseResultPairing) {
return sanitized;
}
return sanitizeToolUseResultPairing(sanitized);
}
export function wrapStreamFnSanitizeMalformedToolCalls(
baseFn: StreamFn,
allowedToolNames?: Set<string>,

View File

@@ -115,7 +115,6 @@ import { resolveSystemPromptOverride } from "../../system-prompt-override.js";
import { buildSystemPromptParams } from "../../system-prompt-params.js";
import { buildSystemPromptReport } from "../../system-prompt-report.js";
import { resolveAgentTimeoutMs } from "../../timeout.js";
import { sanitizeToolCallIdsForCloudCodeAssist } from "../../tool-call-id.js";
import { UNKNOWN_TOOL_THRESHOLD } from "../../tool-loop-detection.js";
import {
resolveTranscriptPolicy,
@@ -225,6 +224,7 @@ import {
wrapStreamFnRepairMalformedToolCallArguments,
} from "./attempt.tool-call-argument-repair.js";
import {
sanitizeReplayToolCallIdsForStream,
wrapStreamFnSanitizeMalformedToolCalls,
wrapStreamFnTrimToolCallNames,
} from "./attempt.tool-call-normalization.js";
@@ -1251,25 +1251,23 @@ export async function runEmbeddedAttempt(
if (!Array.isArray(messages)) {
return inner(model, context, options);
}
const allowProviderOwnedThinkingReplay = shouldAllowProviderOwnedThinkingReplay({
modelApi: (model as { api?: unknown })?.api as string | null | undefined,
policy: transcriptPolicy,
});
const sanitized = sanitizeToolCallIdsForCloudCodeAssist(
messages as AgentMessage[],
const nextMessages = sanitizeReplayToolCallIdsForStream({
messages: messages as AgentMessage[],
mode,
{
preserveNativeAnthropicToolUseIds: transcriptPolicy.preserveNativeAnthropicToolUseIds,
preserveReplaySafeThinkingToolCallIds: allowProviderOwnedThinkingReplay,
allowedToolNames,
},
);
if (sanitized === messages) {
allowedToolNames,
preserveNativeAnthropicToolUseIds: transcriptPolicy.preserveNativeAnthropicToolUseIds,
preserveReplaySafeThinkingToolCallIds: shouldAllowProviderOwnedThinkingReplay({
modelApi: (model as { api?: unknown })?.api as string | null | undefined,
policy: transcriptPolicy,
}),
repairToolUseResultPairing: transcriptPolicy.repairToolUseResultPairing,
});
if (nextMessages === messages) {
return inner(model, context, options);
}
const nextContext = {
...(context as unknown as Record<string, unknown>),
messages: sanitized,
messages: nextMessages,
} as unknown;
return inner(model, nextContext as typeof context, options);
};

View File

@@ -93,7 +93,6 @@ describe("provider replay helpers", () => {
});
it("builds hybrid anthropic or openai replay policy", () => {
// Sonnet 4.6 preserves thinking blocks even when flag is set
const sonnet46Policy = buildHybridAnthropicOrOpenAIReplayPolicy(
{
provider: "minimax",
@@ -107,7 +106,6 @@ describe("provider replay helpers", () => {
});
expect(sonnet46Policy).not.toHaveProperty("dropThinkingBlocks");
// Legacy model still drops
expect(
buildHybridAnthropicOrOpenAIReplayPolicy(
{