test: cover poisoned anthropic replay recovery

2026-05-06 09:00:42 +00:00 · 2026-04-12 03:37:20 +01:00
parent 92f9e09a8e
commit 5568cada24
1 changed files with 52 additions and 0 deletions
--- a/src/agents/pi-embedded-runner.sanitize-session-history.test.ts
+++ b/src/agents/pi-embedded-runner.sanitize-session-history.test.ts
@@ -15,6 +15,7 @@ import {
  sanitizeWithOpenAIResponses,
  TEST_SESSION_ID,
 } from "./pi-embedded-runner.sanitize-session-history.test-harness.js";
+import { validateReplayTurns } from "./pi-embedded-runner/replay-history.js";
 import { castAgentMessage, castAgentMessages } from "./test-helpers/agent-message-fixtures.js";
 import type { TranscriptPolicy } from "./transcript-policy.js";
 import { makeZeroUsageSnapshot } from "./usage.js";
@@ -830,6 +831,57 @@ describe("sanitizeSessionHistory", () => {
    ).toBe(false);
  });

+  it("preserves signed thinking turns while repairing legacy tool-result pairing for anthropic", async () => {
+    const sessionManager = makeMockSessionManager();
+    const messages: AgentMessage[] = [
+      makeUserMessage("Use the gateway"),
+      makeAssistantMessage(
+        [
+          { type: "thinking", thinking: "internal", thinkingSignature: "sig_1" },
+          { type: "toolCall", id: "toolu_legacy", name: "gateway", arguments: {} },
+        ],
+        { stopReason: "toolUse" },
+      ),
+      {
+        role: "toolResult",
+        toolName: "gateway",
+        content: [{ type: "text", text: "legacy tool output without a linked id" }],
+        isError: false,
+        timestamp: nextTimestamp(),
+      } as AgentMessage,
+      makeUserMessage("continue"),
+    ];
+
+    const sanitized = await sanitizeSessionHistory({
+      messages,
+      modelApi: "anthropic-messages",
+      provider: "anthropic",
+      modelId: "claude-opus-4-6",
+      sessionManager,
+      sessionId: TEST_SESSION_ID,
+    });
+    const validated = await validateReplayTurns({
+      messages: sanitized,
+      modelApi: "anthropic-messages",
+      provider: "anthropic",
+      modelId: "claude-opus-4-6",
+      sessionId: TEST_SESSION_ID,
+    });
+
+    expect(sanitized.map((msg) => msg.role)).toEqual(["user", "assistant", "toolResult", "user"]);
+    expect(validated.map((msg) => msg.role)).toEqual(["user", "assistant", "toolResult", "user"]);
+
+    const assistant = validated[1] as Extract<AgentMessage, { role: "assistant" }>;
+    expect(assistant.content).toEqual([
+      { type: "thinking", thinking: "internal", thinkingSignature: "sig_1" },
+      { type: "toolCall", id: "toolu_legacy", name: "gateway", arguments: {} },
+    ]);
+
+    const toolResult = validated[2] as Extract<AgentMessage, { role: "toolResult" }>;
+    expect(toolResult.toolCallId).toBe("toolu_legacy");
+    expect(toolResult.isError).toBe(true);
+  });
+
  it("preserves latest assistant thinking blocks for github-copilot models", async () => {
    setNonGoogleModelApi();