mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 16:50:43 +00:00
fix: repair sanitized replay tool results before send (#67620) (thanks @stainlu)
* fix(agents): preserve native Anthropic tool IDs for hybrid providers
Fixes #66892
MiniMax and other hybrid providers use api.minimaxi.com/anthropic
(modelApi: anthropic-messages), which generates and expects native
Anthropic tool_call_ids in toolu_* format. The hybrid replay policy
(buildHybridAnthropicOrOpenAIReplayPolicy) applied strict
sanitization that stripped underscores from these IDs, causing
MiniMax to reject them with error 2013.
The native Anthropic provider already preserved these IDs via
preserveNativeAnthropicToolUseIds (added in 4613f121ad). This
commit enables the same flag for the hybrid anthropic-messages
branch, so toolu_* IDs pass through unsanitized while other
synthetic IDs still get strict cleanup.
* fix(agents): repair sanitized replay tool results before send
* fix: repair sanitized replay tool results before send (#67620) (thanks @stainlu)
* fix: preserve aborted-span tool results during replay sanitize (#67620) (thanks @stainlu)
---------
Co-authored-by: Ayaan Zaidi <hi@obviy.us>
This commit is contained in:
@@ -0,0 +1,107 @@
|
||||
import type { AgentMessage } from "@mariozechner/pi-agent-core";
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { sanitizeReplayToolCallIdsForStream } from "./attempt.tool-call-normalization.js";
|
||||
|
||||
describe("sanitizeReplayToolCallIdsForStream", () => {
|
||||
it("drops orphaned tool results after strict id sanitization", () => {
|
||||
const messages: AgentMessage[] = [
|
||||
{
|
||||
role: "toolResult",
|
||||
toolCallId: "call_function_av7cbkigmk7x1",
|
||||
toolUseId: "call_function_av7cbkigmk7x1",
|
||||
toolName: "read",
|
||||
content: [{ type: "text", text: "stale" }],
|
||||
isError: false,
|
||||
} as never,
|
||||
];
|
||||
|
||||
expect(
|
||||
sanitizeReplayToolCallIdsForStream({
|
||||
messages,
|
||||
mode: "strict",
|
||||
repairToolUseResultPairing: true,
|
||||
}),
|
||||
).toEqual([]);
|
||||
});
|
||||
|
||||
it("keeps matched assistant and tool-result ids aligned", () => {
|
||||
const rawId = "call_function_av7cbkigmk7x1";
|
||||
const messages: AgentMessage[] = [
|
||||
{
|
||||
role: "assistant",
|
||||
content: [{ type: "toolUse", id: rawId, name: "read", input: { path: "." } }],
|
||||
} as never,
|
||||
{
|
||||
role: "toolResult",
|
||||
toolCallId: rawId,
|
||||
toolUseId: rawId,
|
||||
toolName: "read",
|
||||
content: [{ type: "text", text: "ok" }],
|
||||
isError: false,
|
||||
} as never,
|
||||
];
|
||||
|
||||
const out = sanitizeReplayToolCallIdsForStream({
|
||||
messages,
|
||||
mode: "strict",
|
||||
repairToolUseResultPairing: true,
|
||||
});
|
||||
|
||||
expect(out).toMatchObject([
|
||||
{
|
||||
role: "assistant",
|
||||
content: [{ type: "toolUse", id: "callfunctionav7cbkigmk7x1", name: "read" }],
|
||||
},
|
||||
{
|
||||
role: "toolResult",
|
||||
toolCallId: "callfunctionav7cbkigmk7x1",
|
||||
toolUseId: "callfunctionav7cbkigmk7x1",
|
||||
toolName: "read",
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it("keeps real tool results for aborted assistant spans", () => {
|
||||
const rawId = "call_function_av7cbkigmk7x1";
|
||||
const out = sanitizeReplayToolCallIdsForStream({
|
||||
messages: [
|
||||
{
|
||||
role: "assistant",
|
||||
stopReason: "aborted",
|
||||
content: [{ type: "toolUse", id: rawId, name: "read", input: { path: "." } }],
|
||||
} as never,
|
||||
{
|
||||
role: "toolResult",
|
||||
toolCallId: rawId,
|
||||
toolUseId: rawId,
|
||||
toolName: "read",
|
||||
content: [{ type: "text", text: "partial" }],
|
||||
isError: false,
|
||||
} as never,
|
||||
{
|
||||
role: "user",
|
||||
content: [{ type: "text", text: "retry" }],
|
||||
} as never,
|
||||
],
|
||||
mode: "strict",
|
||||
repairToolUseResultPairing: true,
|
||||
});
|
||||
|
||||
expect(out).toMatchObject([
|
||||
{
|
||||
role: "assistant",
|
||||
stopReason: "aborted",
|
||||
content: [{ type: "toolUse", id: "callfunctionav7cbkigmk7x1", name: "read" }],
|
||||
},
|
||||
{
|
||||
role: "toolResult",
|
||||
toolCallId: "callfunctionav7cbkigmk7x1",
|
||||
toolUseId: "callfunctionav7cbkigmk7x1",
|
||||
toolName: "read",
|
||||
},
|
||||
{
|
||||
role: "user",
|
||||
},
|
||||
]);
|
||||
});
|
||||
});
|
||||
@@ -6,7 +6,11 @@ import {
|
||||
isRedactedSessionsSpawnAttachment,
|
||||
sanitizeToolUseResultPairing,
|
||||
} from "../../session-transcript-repair.js";
|
||||
import { extractToolCallsFromAssistant } from "../../tool-call-id.js";
|
||||
import {
|
||||
extractToolCallsFromAssistant,
|
||||
sanitizeToolCallIdsForCloudCodeAssist,
|
||||
type ToolCallIdMode,
|
||||
} from "../../tool-call-id.js";
|
||||
import { normalizeToolName } from "../../tool-policy.js";
|
||||
import { shouldAllowProviderOwnedThinkingReplay } from "../../transcript-policy.js";
|
||||
import type { TranscriptPolicy } from "../../transcript-policy.js";
|
||||
@@ -868,6 +872,25 @@ export function wrapStreamFnTrimToolCallNames(
|
||||
};
|
||||
}
|
||||
|
||||
export function sanitizeReplayToolCallIdsForStream(params: {
|
||||
messages: AgentMessage[];
|
||||
mode: ToolCallIdMode;
|
||||
allowedToolNames?: Set<string>;
|
||||
preserveNativeAnthropicToolUseIds?: boolean;
|
||||
preserveReplaySafeThinkingToolCallIds?: boolean;
|
||||
repairToolUseResultPairing?: boolean;
|
||||
}): AgentMessage[] {
|
||||
const sanitized = sanitizeToolCallIdsForCloudCodeAssist(params.messages, params.mode, {
|
||||
preserveNativeAnthropicToolUseIds: params.preserveNativeAnthropicToolUseIds,
|
||||
preserveReplaySafeThinkingToolCallIds: params.preserveReplaySafeThinkingToolCallIds,
|
||||
allowedToolNames: params.allowedToolNames,
|
||||
});
|
||||
if (!params.repairToolUseResultPairing) {
|
||||
return sanitized;
|
||||
}
|
||||
return sanitizeToolUseResultPairing(sanitized);
|
||||
}
|
||||
|
||||
export function wrapStreamFnSanitizeMalformedToolCalls(
|
||||
baseFn: StreamFn,
|
||||
allowedToolNames?: Set<string>,
|
||||
|
||||
@@ -115,7 +115,6 @@ import { resolveSystemPromptOverride } from "../../system-prompt-override.js";
|
||||
import { buildSystemPromptParams } from "../../system-prompt-params.js";
|
||||
import { buildSystemPromptReport } from "../../system-prompt-report.js";
|
||||
import { resolveAgentTimeoutMs } from "../../timeout.js";
|
||||
import { sanitizeToolCallIdsForCloudCodeAssist } from "../../tool-call-id.js";
|
||||
import { UNKNOWN_TOOL_THRESHOLD } from "../../tool-loop-detection.js";
|
||||
import {
|
||||
resolveTranscriptPolicy,
|
||||
@@ -225,6 +224,7 @@ import {
|
||||
wrapStreamFnRepairMalformedToolCallArguments,
|
||||
} from "./attempt.tool-call-argument-repair.js";
|
||||
import {
|
||||
sanitizeReplayToolCallIdsForStream,
|
||||
wrapStreamFnSanitizeMalformedToolCalls,
|
||||
wrapStreamFnTrimToolCallNames,
|
||||
} from "./attempt.tool-call-normalization.js";
|
||||
@@ -1251,25 +1251,23 @@ export async function runEmbeddedAttempt(
|
||||
if (!Array.isArray(messages)) {
|
||||
return inner(model, context, options);
|
||||
}
|
||||
const allowProviderOwnedThinkingReplay = shouldAllowProviderOwnedThinkingReplay({
|
||||
modelApi: (model as { api?: unknown })?.api as string | null | undefined,
|
||||
policy: transcriptPolicy,
|
||||
});
|
||||
const sanitized = sanitizeToolCallIdsForCloudCodeAssist(
|
||||
messages as AgentMessage[],
|
||||
const nextMessages = sanitizeReplayToolCallIdsForStream({
|
||||
messages: messages as AgentMessage[],
|
||||
mode,
|
||||
{
|
||||
preserveNativeAnthropicToolUseIds: transcriptPolicy.preserveNativeAnthropicToolUseIds,
|
||||
preserveReplaySafeThinkingToolCallIds: allowProviderOwnedThinkingReplay,
|
||||
allowedToolNames,
|
||||
},
|
||||
);
|
||||
if (sanitized === messages) {
|
||||
allowedToolNames,
|
||||
preserveNativeAnthropicToolUseIds: transcriptPolicy.preserveNativeAnthropicToolUseIds,
|
||||
preserveReplaySafeThinkingToolCallIds: shouldAllowProviderOwnedThinkingReplay({
|
||||
modelApi: (model as { api?: unknown })?.api as string | null | undefined,
|
||||
policy: transcriptPolicy,
|
||||
}),
|
||||
repairToolUseResultPairing: transcriptPolicy.repairToolUseResultPairing,
|
||||
});
|
||||
if (nextMessages === messages) {
|
||||
return inner(model, context, options);
|
||||
}
|
||||
const nextContext = {
|
||||
...(context as unknown as Record<string, unknown>),
|
||||
messages: sanitized,
|
||||
messages: nextMessages,
|
||||
} as unknown;
|
||||
return inner(model, nextContext as typeof context, options);
|
||||
};
|
||||
|
||||
@@ -93,7 +93,6 @@ describe("provider replay helpers", () => {
|
||||
});
|
||||
|
||||
it("builds hybrid anthropic or openai replay policy", () => {
|
||||
// Sonnet 4.6 preserves thinking blocks even when flag is set
|
||||
const sonnet46Policy = buildHybridAnthropicOrOpenAIReplayPolicy(
|
||||
{
|
||||
provider: "minimax",
|
||||
@@ -107,7 +106,6 @@ describe("provider replay helpers", () => {
|
||||
});
|
||||
expect(sonnet46Policy).not.toHaveProperty("dropThinkingBlocks");
|
||||
|
||||
// Legacy model still drops
|
||||
expect(
|
||||
buildHybridAnthropicOrOpenAIReplayPolicy(
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user