fix: preserve anthropic replay tool results

This commit is contained in:
Peter Steinberger
2026-04-06 14:07:53 +01:00
parent ab495f4c90
commit 191b7cb5e6
3 changed files with 144 additions and 1 deletions

View File

@@ -0,0 +1,102 @@
import { describe, expect, it, vi } from "vitest";
import {
completeSimpleWithLiveTimeout,
extractAssistantText,
logLiveCache,
} from "./live-cache-test-support.js";
import { isLiveTestEnabled } from "./live-test-helpers.js";
import { wrapStreamFnSanitizeMalformedToolCalls } from "./pi-embedded-runner/run/attempt.tool-call-normalization.js";
const ANTHROPIC_LIVE = isLiveTestEnabled(["ANTHROPIC_LIVE_TEST"]);
const describeLive = ANTHROPIC_LIVE ? describe : describe.skip;
const ANTHROPIC_TIMEOUT_MS = 120_000;
const TOOL_OUTPUT_SENTINEL = "TOOL-RESULT-LIVE-MAGENTA";
function buildLiveAnthropicModel() {
const apiKey = process.env.ANTHROPIC_API_KEY;
if (!apiKey) {
throw new Error("missing ANTHROPIC_API_KEY");
}
const modelId =
(process.env.OPENCLAW_LIVE_ANTHROPIC_CACHE_MODEL || "claude-sonnet-4-6")
.split(/[/:]/)
.filter(Boolean)
.pop() || "claude-sonnet-4-6";
return {
apiKey,
model: {
id: modelId,
name: modelId,
api: "anthropic-messages" as const,
provider: "anthropic",
baseUrl: "https://api.anthropic.com/v1",
reasoning: true,
input: ["text"] as const,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 200_000,
maxTokens: 8_192,
},
};
}
describeLive("pi embedded anthropic replay sanitization (live)", () => {
it(
"preserves toolCall replay history that Anthropic accepts end-to-end",
async () => {
const { apiKey, model } = buildLiveAnthropicModel();
const messages = [
{
role: "assistant",
content: [{ type: "toolCall", id: "call_1", name: "noop", arguments: {} }],
},
{
role: "user",
content: [
{
type: "toolResult",
toolUseId: "call_1",
content: [{ type: "text", text: TOOL_OUTPUT_SENTINEL }],
},
{ type: "text", text: "The tool finished." },
],
},
{
role: "user",
content: "Reply with exactly OK as plain text if this replay history is valid.",
},
];
const baseFn = vi.fn((_model: unknown, context: unknown) => ({ context }));
const wrapped = wrapStreamFnSanitizeMalformedToolCalls(baseFn as never, new Set(["noop"]), {
validateGeminiTurns: false,
validateAnthropicTurns: true,
});
await Promise.resolve(wrapped(model as never, { messages } as never, {} as never));
expect(baseFn).toHaveBeenCalledTimes(1);
const seenMessages = (baseFn.mock.calls[0]?.[1] as { messages?: unknown[] })?.messages;
expect(seenMessages).toEqual(messages);
logLiveCache(`anthropic replay live model=${model.provider}/${model.id}`);
const response = await completeSimpleWithLiveTimeout(
model,
{ messages: seenMessages as typeof messages },
{
apiKey,
cacheRetention: "none",
sessionId: "anthropic-tool-replay-live",
maxTokens: 64,
temperature: 0,
},
"anthropic replay live synthetic transcript",
ANTHROPIC_TIMEOUT_MS,
);
const text = extractAssistantText(response);
logLiveCache(`anthropic replay live result=${JSON.stringify(text)}`);
expect(response.content.length).toBeGreaterThanOrEqual(0);
},
6 * 60_000,
);
});

View File

@@ -1409,6 +1409,47 @@ describe("wrapStreamFnSanitizeMalformedToolCalls", () => {
]);
});
it.each(["toolCall", "functionCall"] as const)(
"preserves matching Anthropic user tool_result blocks after %s replay turns",
async (toolCallType) => {
const messages = [
{
role: "assistant",
content: [{ type: toolCallType, id: "call_1", name: "read", arguments: {} }],
},
{
role: "user",
content: [
{
type: "toolResult",
toolUseId: "call_1",
content: [{ type: "text", text: "kept result" }],
},
{ type: "text", text: "retry" },
],
},
];
const baseFn = vi.fn((_model, _context) =>
createFakeStream({ events: [], resultMessage: { role: "assistant", content: [] } }),
);
const wrapped = wrapStreamFnSanitizeMalformedToolCalls(baseFn as never, new Set(["read"]), {
validateGeminiTurns: false,
validateAnthropicTurns: true,
});
const stream = wrapped({} as never, { messages } as never, {} as never) as
| FakeWrappedStream
| Promise<FakeWrappedStream>;
await Promise.resolve(stream);
expect(baseFn).toHaveBeenCalledTimes(1);
const seenContext = baseFn.mock.calls[0]?.[1] as {
messages: Array<{ role?: string; content?: unknown[] }>;
};
expect(seenContext.messages).toEqual(messages);
},
);
it("drops orphaned Anthropic user tool_result blocks after dropping an assistant replay turn", async () => {
const messages = [
{

View File

@@ -360,7 +360,7 @@ function sanitizeAnthropicReplayToolResults(messages: AgentMessage[]): AgentMess
continue;
}
const typedBlock = block as { type?: unknown; id?: unknown };
if (typedBlock.type !== "toolUse" || typeof typedBlock.id !== "string") {
if (!isToolCallBlockType(typedBlock.type) || typeof typedBlock.id !== "string") {
continue;
}
const trimmedId = typedBlock.id.trim();