fix: preserve anthropic replay tool results

2026-04-19 13:11:40 +00:00 · 2026-04-06 14:07:53 +01:00
parent ab495f4c90
commit 191b7cb5e6
3 changed files with 144 additions and 1 deletions
--- a/src/agents/pi-embedded-runner.anthropic-tool-replay.live.test.ts
+++ b/src/agents/pi-embedded-runner.anthropic-tool-replay.live.test.ts
@@ -0,0 +1,102 @@
+import { describe, expect, it, vi } from "vitest";
+import {
+  completeSimpleWithLiveTimeout,
+  extractAssistantText,
+  logLiveCache,
+} from "./live-cache-test-support.js";
+import { isLiveTestEnabled } from "./live-test-helpers.js";
+import { wrapStreamFnSanitizeMalformedToolCalls } from "./pi-embedded-runner/run/attempt.tool-call-normalization.js";
+
+const ANTHROPIC_LIVE = isLiveTestEnabled(["ANTHROPIC_LIVE_TEST"]);
+const describeLive = ANTHROPIC_LIVE ? describe : describe.skip;
+const ANTHROPIC_TIMEOUT_MS = 120_000;
+const TOOL_OUTPUT_SENTINEL = "TOOL-RESULT-LIVE-MAGENTA";
+
+function buildLiveAnthropicModel() {
+  const apiKey = process.env.ANTHROPIC_API_KEY;
+  if (!apiKey) {
+    throw new Error("missing ANTHROPIC_API_KEY");
+  }
+  const modelId =
+    (process.env.OPENCLAW_LIVE_ANTHROPIC_CACHE_MODEL || "claude-sonnet-4-6")
+      .split(/[/:]/)
+      .filter(Boolean)
+      .pop() || "claude-sonnet-4-6";
+  return {
+    apiKey,
+    model: {
+      id: modelId,
+      name: modelId,
+      api: "anthropic-messages" as const,
+      provider: "anthropic",
+      baseUrl: "https://api.anthropic.com/v1",
+      reasoning: true,
+      input: ["text"] as const,
+      cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+      contextWindow: 200_000,
+      maxTokens: 8_192,
+    },
+  };
+}
+
+describeLive("pi embedded anthropic replay sanitization (live)", () => {
+  it(
+    "preserves toolCall replay history that Anthropic accepts end-to-end",
+    async () => {
+      const { apiKey, model } = buildLiveAnthropicModel();
+      const messages = [
+        {
+          role: "assistant",
+          content: [{ type: "toolCall", id: "call_1", name: "noop", arguments: {} }],
+        },
+        {
+          role: "user",
+          content: [
+            {
+              type: "toolResult",
+              toolUseId: "call_1",
+              content: [{ type: "text", text: TOOL_OUTPUT_SENTINEL }],
+            },
+            { type: "text", text: "The tool finished." },
+          ],
+        },
+        {
+          role: "user",
+          content: "Reply with exactly OK as plain text if this replay history is valid.",
+        },
+      ];
+
+      const baseFn = vi.fn((_model: unknown, context: unknown) => ({ context }));
+      const wrapped = wrapStreamFnSanitizeMalformedToolCalls(baseFn as never, new Set(["noop"]), {
+        validateGeminiTurns: false,
+        validateAnthropicTurns: true,
+      });
+
+      await Promise.resolve(wrapped(model as never, { messages } as never, {} as never));
+
+      expect(baseFn).toHaveBeenCalledTimes(1);
+      const seenMessages = (baseFn.mock.calls[0]?.[1] as { messages?: unknown[] })?.messages;
+      expect(seenMessages).toEqual(messages);
+
+      logLiveCache(`anthropic replay live model=${model.provider}/${model.id}`);
+      const response = await completeSimpleWithLiveTimeout(
+        model,
+        { messages: seenMessages as typeof messages },
+        {
+          apiKey,
+          cacheRetention: "none",
+          sessionId: "anthropic-tool-replay-live",
+          maxTokens: 64,
+          temperature: 0,
+        },
+        "anthropic replay live synthetic transcript",
+        ANTHROPIC_TIMEOUT_MS,
+      );
+
+      const text = extractAssistantText(response);
+      logLiveCache(`anthropic replay live result=${JSON.stringify(text)}`);
+      expect(response.content.length).toBeGreaterThanOrEqual(0);
+    },
+    6 * 60_000,
+  );
+});
--- a/src/agents/pi-embedded-runner/run/attempt.test.ts
+++ b/src/agents/pi-embedded-runner/run/attempt.test.ts
@@ -1409,6 +1409,47 @@ describe("wrapStreamFnSanitizeMalformedToolCalls", () => {
    ]);
  });

+  it.each(["toolCall", "functionCall"] as const)(
+    "preserves matching Anthropic user tool_result blocks after %s replay turns",
+    async (toolCallType) => {
+      const messages = [
+        {
+          role: "assistant",
+          content: [{ type: toolCallType, id: "call_1", name: "read", arguments: {} }],
+        },
+        {
+          role: "user",
+          content: [
+            {
+              type: "toolResult",
+              toolUseId: "call_1",
+              content: [{ type: "text", text: "kept result" }],
+            },
+            { type: "text", text: "retry" },
+          ],
+        },
+      ];
+      const baseFn = vi.fn((_model, _context) =>
+        createFakeStream({ events: [], resultMessage: { role: "assistant", content: [] } }),
+      );
+
+      const wrapped = wrapStreamFnSanitizeMalformedToolCalls(baseFn as never, new Set(["read"]), {
+        validateGeminiTurns: false,
+        validateAnthropicTurns: true,
+      });
+      const stream = wrapped({} as never, { messages } as never, {} as never) as
+        | FakeWrappedStream
+        | Promise<FakeWrappedStream>;
+      await Promise.resolve(stream);
+
+      expect(baseFn).toHaveBeenCalledTimes(1);
+      const seenContext = baseFn.mock.calls[0]?.[1] as {
+        messages: Array<{ role?: string; content?: unknown[] }>;
+      };
+      expect(seenContext.messages).toEqual(messages);
+    },
+  );
+
  it("drops orphaned Anthropic user tool_result blocks after dropping an assistant replay turn", async () => {
    const messages = [
      {
--- a/src/agents/pi-embedded-runner/run/attempt.tool-call-normalization.ts
+++ b/src/agents/pi-embedded-runner/run/attempt.tool-call-normalization.ts
@@ -360,7 +360,7 @@ function sanitizeAnthropicReplayToolResults(messages: AgentMessage[]): AgentMess
            continue;
          }
          const typedBlock = block as { type?: unknown; id?: unknown };
-          if (typedBlock.type !== "toolUse" || typeof typedBlock.id !== "string") {
+          if (!isToolCallBlockType(typedBlock.type) || typeof typedBlock.id !== "string") {
            continue;
          }
          const trimmedId = typedBlock.id.trim();