mirror of
https://github.com/openclaw/openclaw.git
synced 2026-04-19 13:11:40 +00:00
fix: preserve anthropic replay tool results
This commit is contained in:
102
src/agents/pi-embedded-runner.anthropic-tool-replay.live.test.ts
Normal file
102
src/agents/pi-embedded-runner.anthropic-tool-replay.live.test.ts
Normal file
@@ -0,0 +1,102 @@
|
||||
import { describe, expect, it, vi } from "vitest";
|
||||
import {
|
||||
completeSimpleWithLiveTimeout,
|
||||
extractAssistantText,
|
||||
logLiveCache,
|
||||
} from "./live-cache-test-support.js";
|
||||
import { isLiveTestEnabled } from "./live-test-helpers.js";
|
||||
import { wrapStreamFnSanitizeMalformedToolCalls } from "./pi-embedded-runner/run/attempt.tool-call-normalization.js";
|
||||
|
||||
const ANTHROPIC_LIVE = isLiveTestEnabled(["ANTHROPIC_LIVE_TEST"]);
|
||||
const describeLive = ANTHROPIC_LIVE ? describe : describe.skip;
|
||||
const ANTHROPIC_TIMEOUT_MS = 120_000;
|
||||
const TOOL_OUTPUT_SENTINEL = "TOOL-RESULT-LIVE-MAGENTA";
|
||||
|
||||
function buildLiveAnthropicModel() {
|
||||
const apiKey = process.env.ANTHROPIC_API_KEY;
|
||||
if (!apiKey) {
|
||||
throw new Error("missing ANTHROPIC_API_KEY");
|
||||
}
|
||||
const modelId =
|
||||
(process.env.OPENCLAW_LIVE_ANTHROPIC_CACHE_MODEL || "claude-sonnet-4-6")
|
||||
.split(/[/:]/)
|
||||
.filter(Boolean)
|
||||
.pop() || "claude-sonnet-4-6";
|
||||
return {
|
||||
apiKey,
|
||||
model: {
|
||||
id: modelId,
|
||||
name: modelId,
|
||||
api: "anthropic-messages" as const,
|
||||
provider: "anthropic",
|
||||
baseUrl: "https://api.anthropic.com/v1",
|
||||
reasoning: true,
|
||||
input: ["text"] as const,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
contextWindow: 200_000,
|
||||
maxTokens: 8_192,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
describeLive("pi embedded anthropic replay sanitization (live)", () => {
|
||||
it(
|
||||
"preserves toolCall replay history that Anthropic accepts end-to-end",
|
||||
async () => {
|
||||
const { apiKey, model } = buildLiveAnthropicModel();
|
||||
const messages = [
|
||||
{
|
||||
role: "assistant",
|
||||
content: [{ type: "toolCall", id: "call_1", name: "noop", arguments: {} }],
|
||||
},
|
||||
{
|
||||
role: "user",
|
||||
content: [
|
||||
{
|
||||
type: "toolResult",
|
||||
toolUseId: "call_1",
|
||||
content: [{ type: "text", text: TOOL_OUTPUT_SENTINEL }],
|
||||
},
|
||||
{ type: "text", text: "The tool finished." },
|
||||
],
|
||||
},
|
||||
{
|
||||
role: "user",
|
||||
content: "Reply with exactly OK as plain text if this replay history is valid.",
|
||||
},
|
||||
];
|
||||
|
||||
const baseFn = vi.fn((_model: unknown, context: unknown) => ({ context }));
|
||||
const wrapped = wrapStreamFnSanitizeMalformedToolCalls(baseFn as never, new Set(["noop"]), {
|
||||
validateGeminiTurns: false,
|
||||
validateAnthropicTurns: true,
|
||||
});
|
||||
|
||||
await Promise.resolve(wrapped(model as never, { messages } as never, {} as never));
|
||||
|
||||
expect(baseFn).toHaveBeenCalledTimes(1);
|
||||
const seenMessages = (baseFn.mock.calls[0]?.[1] as { messages?: unknown[] })?.messages;
|
||||
expect(seenMessages).toEqual(messages);
|
||||
|
||||
logLiveCache(`anthropic replay live model=${model.provider}/${model.id}`);
|
||||
const response = await completeSimpleWithLiveTimeout(
|
||||
model,
|
||||
{ messages: seenMessages as typeof messages },
|
||||
{
|
||||
apiKey,
|
||||
cacheRetention: "none",
|
||||
sessionId: "anthropic-tool-replay-live",
|
||||
maxTokens: 64,
|
||||
temperature: 0,
|
||||
},
|
||||
"anthropic replay live synthetic transcript",
|
||||
ANTHROPIC_TIMEOUT_MS,
|
||||
);
|
||||
|
||||
const text = extractAssistantText(response);
|
||||
logLiveCache(`anthropic replay live result=${JSON.stringify(text)}`);
|
||||
expect(response.content.length).toBeGreaterThanOrEqual(0);
|
||||
},
|
||||
6 * 60_000,
|
||||
);
|
||||
});
|
||||
@@ -1409,6 +1409,47 @@ describe("wrapStreamFnSanitizeMalformedToolCalls", () => {
|
||||
]);
|
||||
});
|
||||
|
||||
it.each(["toolCall", "functionCall"] as const)(
|
||||
"preserves matching Anthropic user tool_result blocks after %s replay turns",
|
||||
async (toolCallType) => {
|
||||
const messages = [
|
||||
{
|
||||
role: "assistant",
|
||||
content: [{ type: toolCallType, id: "call_1", name: "read", arguments: {} }],
|
||||
},
|
||||
{
|
||||
role: "user",
|
||||
content: [
|
||||
{
|
||||
type: "toolResult",
|
||||
toolUseId: "call_1",
|
||||
content: [{ type: "text", text: "kept result" }],
|
||||
},
|
||||
{ type: "text", text: "retry" },
|
||||
],
|
||||
},
|
||||
];
|
||||
const baseFn = vi.fn((_model, _context) =>
|
||||
createFakeStream({ events: [], resultMessage: { role: "assistant", content: [] } }),
|
||||
);
|
||||
|
||||
const wrapped = wrapStreamFnSanitizeMalformedToolCalls(baseFn as never, new Set(["read"]), {
|
||||
validateGeminiTurns: false,
|
||||
validateAnthropicTurns: true,
|
||||
});
|
||||
const stream = wrapped({} as never, { messages } as never, {} as never) as
|
||||
| FakeWrappedStream
|
||||
| Promise<FakeWrappedStream>;
|
||||
await Promise.resolve(stream);
|
||||
|
||||
expect(baseFn).toHaveBeenCalledTimes(1);
|
||||
const seenContext = baseFn.mock.calls[0]?.[1] as {
|
||||
messages: Array<{ role?: string; content?: unknown[] }>;
|
||||
};
|
||||
expect(seenContext.messages).toEqual(messages);
|
||||
},
|
||||
);
|
||||
|
||||
it("drops orphaned Anthropic user tool_result blocks after dropping an assistant replay turn", async () => {
|
||||
const messages = [
|
||||
{
|
||||
|
||||
@@ -360,7 +360,7 @@ function sanitizeAnthropicReplayToolResults(messages: AgentMessage[]): AgentMess
|
||||
continue;
|
||||
}
|
||||
const typedBlock = block as { type?: unknown; id?: unknown };
|
||||
if (typedBlock.type !== "toolUse" || typeof typedBlock.id !== "string") {
|
||||
if (!isToolCallBlockType(typedBlock.type) || typeof typedBlock.id !== "string") {
|
||||
continue;
|
||||
}
|
||||
const trimmedId = typedBlock.id.trim();
|
||||
|
||||
Reference in New Issue
Block a user