test: cover anthropic-only replay guards

2026-05-06 07:50:43 +00:00 · 2026-04-12 04:11:54 +01:00
parent 1a689240dc
commit eb501536d2
3 changed files with 64 additions and 7 deletions
--- a/src/agents/pi-embedded-runner.sanitize-session-history.test.ts
+++ b/src/agents/pi-embedded-runner.sanitize-session-history.test.ts
@@ -816,7 +816,7 @@ describe("sanitizeSessionHistory", () => {
      messages,
      modelApi: "anthropic-messages",
      provider: "anthropic",
-      modelId: "claude-opus-4-6",
+      modelId: "claude-sonnet-4-6",
      sessionManager,
      sessionId: TEST_SESSION_ID,
    });
@@ -856,7 +856,7 @@ describe("sanitizeSessionHistory", () => {
      messages,
      modelApi: "anthropic-messages",
      provider: "anthropic",
-      modelId: "claude-opus-4-6",
+      modelId: "claude-sonnet-4-6",
      sessionManager,
      sessionId: TEST_SESSION_ID,
    });
--- a/src/agents/pi-embedded-runner/run/attempt.test.ts
+++ b/src/agents/pi-embedded-runner/run/attempt.test.ts
@@ -907,7 +907,11 @@ describe("wrapStreamFnSanitizeMalformedToolCalls", () => {
      createFakeStream({ events: [], resultMessage: { role: "assistant", content: [] } }),
    );

-    const wrapped = wrapStreamFnSanitizeMalformedToolCalls(baseFn as never, new Set(["read"]));
+    const wrapped = wrapStreamFnSanitizeMalformedToolCalls(
+      baseFn as never,
+      new Set(["read"]),
+      { validateAnthropicTurns: true } as never,
+    );
    const stream = wrapped({} as never, { messages } as never, {} as never) as
      | FakeWrappedStream
      | Promise<FakeWrappedStream>;
@@ -935,7 +939,11 @@ describe("wrapStreamFnSanitizeMalformedToolCalls", () => {
      createFakeStream({ events: [], resultMessage: { role: "assistant", content: [] } }),
    );

-    const wrapped = wrapStreamFnSanitizeMalformedToolCalls(baseFn as never, new Set(["read"]));
+    const wrapped = wrapStreamFnSanitizeMalformedToolCalls(
+      baseFn as never,
+      new Set(["read"]),
+      { validateAnthropicTurns: true } as never,
+    );
    const stream = wrapped({} as never, { messages } as never, {} as never) as
      | FakeWrappedStream
      | Promise<FakeWrappedStream>;
@@ -964,7 +972,11 @@ describe("wrapStreamFnSanitizeMalformedToolCalls", () => {
      createFakeStream({ events: [], resultMessage: { role: "assistant", content: [] } }),
    );

-    const wrapped = wrapStreamFnSanitizeMalformedToolCalls(baseFn as never, new Set(["read"]));
+    const wrapped = wrapStreamFnSanitizeMalformedToolCalls(
+      baseFn as never,
+      new Set(["read"]),
+      { validateAnthropicTurns: true } as never,
+    );
    const stream = wrapped(
      { api: "anthropic-messages" } as never,
      { messages } as never,
@@ -1012,6 +1024,7 @@ describe("wrapStreamFnSanitizeMalformedToolCalls", () => {
    const wrapped = wrapStreamFnSanitizeMalformedToolCalls(
      baseFn as never,
      new Set(["sessions_spawn"]),
+      { validateAnthropicTurns: true } as never,
    );
    const stream = wrapped(
      { api: "anthropic-messages" } as never,
@@ -1055,6 +1068,7 @@ describe("wrapStreamFnSanitizeMalformedToolCalls", () => {
    const wrapped = wrapStreamFnSanitizeMalformedToolCalls(
      baseFn as never,
      new Set(["sessions_spawn"]),
+      { validateAnthropicTurns: true } as never,
    );
    const stream = wrapped({} as never, { messages } as never, {} as never) as
      | FakeWrappedStream
@@ -1073,6 +1087,40 @@ describe("wrapStreamFnSanitizeMalformedToolCalls", () => {
    expect(toolCall.input?.attachments?.[0]?.content).toBe(attachmentContent);
  });

+  it("keeps non-Anthropic thinking turns mutable when Anthropic replay validation is off", async () => {
+    const messages = [
+      {
+        role: "assistant",
+        content: [
+          { type: "thinking", thinking: "internal", thinkingSignature: "sig_1" },
+          { type: "toolCall", id: "call_read", name: " read ", arguments: { path: "README.md" } },
+        ],
+      },
+      {
+        role: "user",
+        content: [{ type: "text", text: "retry" }],
+      },
+    ];
+    const baseFn = vi.fn((_model, _context) =>
+      createFakeStream({ events: [], resultMessage: { role: "assistant", content: [] } }),
+    );
+
+    const wrapped = wrapStreamFnSanitizeMalformedToolCalls(baseFn as never, new Set(["read"]));
+    const stream = wrapped({ api: "google-gemini" } as never, { messages } as never, {} as never) as
+      | FakeWrappedStream
+      | Promise<FakeWrappedStream>;
+    await Promise.resolve(stream);
+
+    expect(baseFn).toHaveBeenCalledTimes(1);
+    const seenContext = baseFn.mock.calls[0]?.[1] as {
+      messages: Array<{ content?: unknown[] }>;
+    };
+    expect(seenContext.messages[0]?.content).toEqual([
+      { type: "thinking", thinking: "internal", thinkingSignature: "sig_1" },
+      { type: "toolCall", id: "call_read", name: "read", arguments: { path: "README.md" } },
+    ]);
+  });
+
  it("preserves allowlisted tool names that contain punctuation", async () => {
    const messages = [
      {
--- a/src/agents/pi-embedded-runner/run/attempt.tool-call-normalization.ts
+++ b/src/agents/pi-embedded-runner/run/attempt.tool-call-normalization.ts
@@ -331,6 +331,7 @@ function resolveReplayToolCallName(
 function sanitizeReplayToolCallInputs(
  messages: AgentMessage[],
  allowedToolNames?: Set<string>,
+  preserveImmutableThinkingTurns?: boolean,
 ): ReplayToolCallSanitizeReport {
  let changed = false;
  let droppedAssistantMessages = 0;
@@ -345,7 +346,11 @@ function sanitizeReplayToolCallInputs(
      out.push(message);
      continue;
    }
-    if (message.content.some((block) => isThinkingLikeReplayBlock(block))) {
+    if (
+      preserveImmutableThinkingTurns &&
+      message.content.some((block) => isThinkingLikeReplayBlock(block)) &&
+      message.content.some((block) => isReplayToolCallBlock(block))
+    ) {
      if (isReplaySafeThinkingTurn(message.content, allowedToolNames)) {
        out.push(message);
      } else {
@@ -633,7 +638,11 @@ export function wrapStreamFnSanitizeMalformedToolCalls(
    if (!Array.isArray(messages)) {
      return baseFn(model, context, options);
    }
-    const sanitized = sanitizeReplayToolCallInputs(messages as AgentMessage[], allowedToolNames);
+    const sanitized = sanitizeReplayToolCallInputs(
+      messages as AgentMessage[],
+      allowedToolNames,
+      transcriptPolicy?.validateAnthropicTurns === true,
+    );
    if (sanitized.messages === messages) {
      return baseFn(model, context, options);
    }