test: dedupe btw transcript fixtures

2026-05-06 07:20:43 +00:00 · 2026-04-18 22:12:19 +01:00
parent b4543caf55
commit 966a3ea27c
1 changed files with 174 additions and 286 deletions
--- a/src/agents/btw.test.ts
+++ b/src/agents/btw.test.ts
@@ -95,6 +95,15 @@ const DEFAULT_QUESTION = "What changed?";
 const MATH_QUESTION = "What is 17 * 19?";
 const MATH_ANSWER = "323";

+const DEFAULT_USAGE = {
+  input: 1,
+  output: 2,
+  cacheRead: 0,
+  cacheWrite: 0,
+  totalTokens: 3,
+  cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+};
+
 function makeAsyncEvents(events: unknown[]) {
  return {
    async *[Symbol.asyncIterator]() {
@@ -114,52 +123,29 @@ function createSessionEntry(overrides: Partial<SessionEntry> = {}): SessionEntry
  };
 }

-function createDoneEvent(text: string) {
+function createAssistantDoneEvent(content: unknown[]) {
  return {
    type: "done",
    reason: "stop",
    message: {
      role: "assistant",
-      content: [{ type: "text", text }],
+      content,
      provider: DEFAULT_PROVIDER,
      api: "anthropic-messages",
      model: DEFAULT_MODEL,
      stopReason: "stop",
-      usage: {
-        input: 1,
-        output: 2,
-        cacheRead: 0,
-        cacheWrite: 0,
-        totalTokens: 3,
-        cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
-      },
+      usage: DEFAULT_USAGE,
      timestamp: Date.now(),
    },
  };
 }

+function createDoneEvent(text: string) {
+  return createAssistantDoneEvent([{ type: "text", text }]);
+}
+
 function createThinkingOnlyDoneEvent(thinking: string) {
-  return {
-    type: "done",
-    reason: "stop",
-    message: {
-      role: "assistant",
-      content: [{ type: "thinking", thinking }],
-      provider: DEFAULT_PROVIDER,
-      api: "anthropic-messages",
-      model: DEFAULT_MODEL,
-      stopReason: "stop",
-      usage: {
-        input: 1,
-        output: 2,
-        cacheRead: 0,
-        cacheWrite: 0,
-        totalTokens: 3,
-        cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
-      },
-      timestamp: Date.now(),
-    },
-  };
+  return createAssistantDoneEvent([{ type: "thinking", thinking }]);
 }

 function mockDoneAnswer(text: string) {
@@ -192,6 +178,73 @@ function clearBuiltSessionMessages() {
  buildSessionContextMock.mockReturnValue({ messages: [] });
 }

+function createUserTranscriptMessage(content: unknown[] = [{ type: "text", text: "seed" }]) {
+  return {
+    role: "user",
+    content,
+    timestamp: 1,
+  };
+}
+
+function createAssistantTranscriptMessage(
+  content: unknown,
+  overrides: {
+    stopReason?: string;
+    output?: number;
+    timestamp?: number;
+  } = {},
+) {
+  return {
+    role: "assistant",
+    content,
+    provider: DEFAULT_PROVIDER,
+    api: "anthropic-messages",
+    model: DEFAULT_MODEL,
+    stopReason: overrides.stopReason ?? "stop",
+    usage: {
+      ...DEFAULT_USAGE,
+      output: overrides.output ?? DEFAULT_USAGE.output,
+      totalTokens: 1 + (overrides.output ?? DEFAULT_USAGE.output),
+    },
+    timestamp: overrides.timestamp ?? 2,
+  };
+}
+
+function mockActiveTranscript(messages: unknown[]) {
+  getActiveEmbeddedRunSnapshotMock.mockReturnValue({
+    transcriptLeafId: "assistant-1",
+    messages,
+  });
+}
+
+async function runMathSideQuestionAndCaptureContext() {
+  mockDoneAnswer(MATH_ANSWER);
+  await runMathSideQuestion();
+  const [, context] = streamSimpleMock.mock.calls[0] ?? [];
+  return context;
+}
+
+function expectNoAssistantMessages(context: unknown) {
+  expect(
+    (context as { messages?: Array<{ role?: string }> }).messages?.filter(
+      (message) => message.role === "assistant",
+    ),
+  ).toHaveLength(0);
+}
+
+function expectSanitizedAssistantContext(context: unknown, text: string) {
+  expect(context).toMatchObject({
+    messages: [
+      expect.objectContaining({ role: "user" }),
+      expect.objectContaining({
+        role: "assistant",
+        content: [{ type: "text", text }],
+      }),
+      expect.objectContaining({ role: "user" }),
+    ],
+  });
+}
+
 describe("runBtwSideQuestion", () => {
  beforeEach(() => {
    streamSimpleMock.mockReset();
@@ -614,27 +667,20 @@ describe("runBtwSideQuestion", () => {
  });

  it("excludes tool results from BTW context to avoid replaying raw tool output", async () => {
-    getActiveEmbeddedRunSnapshotMock.mockReturnValue({
-      transcriptLeafId: "assistant-1",
-      messages: [
-        {
-          role: "user",
-          content: [{ type: "text", text: "seed" }],
-          timestamp: 1,
-        },
-        {
-          role: "toolResult",
-          content: [{ type: "text", text: "sensitive tool output" }],
-          details: { raw: "secret" },
-          timestamp: 2,
-        },
-        {
-          role: "assistant",
-          content: [{ type: "text", text: "done" }],
-          timestamp: 3,
-        },
-      ],
-    });
+    mockActiveTranscript([
+      createUserTranscriptMessage(),
+      {
+        role: "toolResult",
+        content: [{ type: "text", text: "sensitive tool output" }],
+        details: { raw: "secret" },
+        timestamp: 2,
+      },
+      {
+        role: "assistant",
+        content: [{ type: "text", text: "done" }],
+        timestamp: 3,
+      },
+    ]);
    mockDoneAnswer(MATH_ANSWER);

    await runMathSideQuestion();
@@ -653,53 +699,24 @@ describe("runBtwSideQuestion", () => {
  });

  it("strips assistant tool calls from BTW context so no-tool side questions stay tool-free", async () => {
-    getActiveEmbeddedRunSnapshotMock.mockReturnValue({
-      transcriptLeafId: "assistant-1",
-      messages: [
-        {
-          role: "user",
-          content: [{ type: "text", text: "seed" }],
-          timestamp: 1,
-        },
-        {
-          role: "assistant",
-          content: [
-            { type: "text", text: "Let me check." },
-            { type: "toolCall", id: "call_1", name: "read", arguments: { path: "README.md" } },
-            { type: "toolUse", id: "call_legacy", name: "read", input: { path: "README.md" } },
-            { type: "tool_call", id: "call_snake", name: "read", arguments: { path: "README.md" } },
-          ],
-          provider: DEFAULT_PROVIDER,
-          api: "anthropic-messages",
-          model: DEFAULT_MODEL,
-          stopReason: "toolUse",
-          usage: {
-            input: 1,
-            output: 2,
-            cacheRead: 0,
-            cacheWrite: 0,
-            totalTokens: 3,
-            cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
-          },
-          timestamp: 2,
-        },
-      ],
-    });
+    mockActiveTranscript([
+      createUserTranscriptMessage(),
+      createAssistantTranscriptMessage(
+        [
+          { type: "text", text: "Let me check." },
+          { type: "toolCall", id: "call_1", name: "read", arguments: { path: "README.md" } },
+          { type: "toolUse", id: "call_legacy", name: "read", input: { path: "README.md" } },
+          { type: "tool_call", id: "call_snake", name: "read", arguments: { path: "README.md" } },
+        ],
+        { stopReason: "toolUse" },
+      ),
+    ]);
    mockDoneAnswer(MATH_ANSWER);

    await runMathSideQuestion();

    const [, context] = streamSimpleMock.mock.calls[0] ?? [];
-    expect(context).toMatchObject({
-      messages: [
-        expect.objectContaining({ role: "user" }),
-        expect.objectContaining({
-          role: "assistant",
-          content: [{ type: "text", text: "Let me check." }],
-        }),
-        expect.objectContaining({ role: "user" }),
-      ],
-    });
+    expectSanitizedAssistantContext(context, "Let me check.");
    expect(
      (context as { messages?: Array<{ role?: string; content?: Array<{ type?: string }> }> })
        .messages,
@@ -718,73 +735,37 @@ describe("runBtwSideQuestion", () => {
  });

  it("drops assistant messages that contain only tool calls", async () => {
-    getActiveEmbeddedRunSnapshotMock.mockReturnValue({
-      transcriptLeafId: "assistant-1",
-      messages: [
-        {
-          role: "user",
-          content: [{ type: "text", text: "seed" }],
-          timestamp: 1,
-        },
-        {
-          role: "assistant",
-          content: [{ type: "toolCall", id: "call_1", name: "read", arguments: {} }],
-          provider: DEFAULT_PROVIDER,
-          api: "anthropic-messages",
-          model: DEFAULT_MODEL,
-          stopReason: "toolUse",
-          usage: {
-            input: 1,
-            output: 0,
-            cacheRead: 0,
-            cacheWrite: 0,
-            totalTokens: 1,
-            cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
-          },
-          timestamp: 2,
-        },
-      ],
-    });
-    mockDoneAnswer(MATH_ANSWER);
-
-    await runMathSideQuestion();
-
-    const [, context] = streamSimpleMock.mock.calls[0] ?? [];
-    expect(
-      (context as { messages?: Array<{ role?: string }> }).messages?.filter(
-        (message) => message.role === "assistant",
+    mockActiveTranscript([
+      createUserTranscriptMessage(),
+      createAssistantTranscriptMessage(
+        [{ type: "toolCall", id: "call_1", name: "read", arguments: {} }],
+        { stopReason: "toolUse", output: 0 },
      ),
-    ).toHaveLength(0);
+    ]);
+
+    const context = await runMathSideQuestionAndCaptureContext();
+
+    expectNoAssistantMessages(context);
  });

  it("strips embedded user tool results from BTW context", async () => {
-    getActiveEmbeddedRunSnapshotMock.mockReturnValue({
-      transcriptLeafId: "assistant-1",
-      messages: [
+    mockActiveTranscript([
+      createUserTranscriptMessage([
+        { type: "text", text: "seed" },
        {
-          role: "user",
-          content: [
-            { type: "text", text: "seed" },
-            {
-              type: "toolResult",
-              toolUseId: "call_1",
-              content: [{ type: "text", text: "secret" }],
-            },
-            {
-              type: "tool_result",
-              toolUseId: "call_2",
-              content: [{ type: "text", text: "secret-2" }],
-            },
-          ],
-          timestamp: 1,
+          type: "toolResult",
+          toolUseId: "call_1",
+          content: [{ type: "text", text: "secret" }],
        },
-      ],
-    });
-    mockDoneAnswer(MATH_ANSWER);
+        {
+          type: "tool_result",
+          toolUseId: "call_2",
+          content: [{ type: "text", text: "secret-2" }],
+        },
+      ]),
+    ]);

-    await runMathSideQuestion();
-
-    const [, context] = streamSimpleMock.mock.calls[0] ?? [];
+    const context = await runMathSideQuestionAndCaptureContext();
    expect(context).toMatchObject({
      messages: [
        expect.objectContaining({
@@ -797,51 +778,20 @@ describe("runBtwSideQuestion", () => {
  });

  it("drops assistant thinking blocks from BTW context", async () => {
-    getActiveEmbeddedRunSnapshotMock.mockReturnValue({
-      transcriptLeafId: "assistant-1",
-      messages: [
-        {
-          role: "user",
-          content: [{ type: "text", text: "seed" }],
-          timestamp: 1,
-        },
-        {
-          role: "assistant",
-          content: [
-            { type: "text", text: "Visible answer" },
-            { type: "thinking", thinking: "Hidden chain of thought" },
-          ],
-          provider: DEFAULT_PROVIDER,
-          api: "anthropic-messages",
-          model: DEFAULT_MODEL,
-          stopReason: "stop",
-          usage: {
-            input: 1,
-            output: 1,
-            cacheRead: 0,
-            cacheWrite: 0,
-            totalTokens: 2,
-            cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
-          },
-          timestamp: 2,
-        },
-      ],
-    });
-    mockDoneAnswer(MATH_ANSWER);
+    mockActiveTranscript([
+      createUserTranscriptMessage(),
+      createAssistantTranscriptMessage(
+        [
+          { type: "text", text: "Visible answer" },
+          { type: "thinking", thinking: "Hidden chain of thought" },
+        ],
+        { output: 1 },
+      ),
+    ]);

-    await runMathSideQuestion();
+    const context = await runMathSideQuestionAndCaptureContext();

-    const [, context] = streamSimpleMock.mock.calls[0] ?? [];
-    expect(context).toMatchObject({
-      messages: [
-        expect.objectContaining({ role: "user" }),
-        expect.objectContaining({
-          role: "assistant",
-          content: [{ type: "text", text: "Visible answer" }],
-        }),
-        expect.objectContaining({ role: "user" }),
-      ],
-    });
+    expectSanitizedAssistantContext(context, "Visible answer");
    expect(
      (context as { messages?: Array<{ role?: string; content?: Array<{ type?: string }> }> })
        .messages,
@@ -856,64 +806,28 @@ describe("runBtwSideQuestion", () => {
  });

  it("drops thinking-only assistant messages from BTW context", async () => {
-    getActiveEmbeddedRunSnapshotMock.mockReturnValue({
-      transcriptLeafId: "assistant-1",
-      messages: [
-        {
-          role: "user",
-          content: [{ type: "text", text: "seed" }],
-          timestamp: 1,
-        },
-        {
-          role: "assistant",
-          content: [{ type: "thinking", thinking: "Hidden chain of thought" }],
-          provider: DEFAULT_PROVIDER,
-          api: "anthropic-messages",
-          model: DEFAULT_MODEL,
-          stopReason: "stop",
-          usage: {
-            input: 1,
-            output: 1,
-            cacheRead: 0,
-            cacheWrite: 0,
-            totalTokens: 2,
-            cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
-          },
-          timestamp: 2,
-        },
-      ],
-    });
-    mockDoneAnswer(MATH_ANSWER);
-
-    await runMathSideQuestion();
-
-    const [, context] = streamSimpleMock.mock.calls[0] ?? [];
-    expect(
-      (context as { messages?: Array<{ role?: string }> }).messages?.filter(
-        (message) => message.role === "assistant",
+    mockActiveTranscript([
+      createUserTranscriptMessage(),
+      createAssistantTranscriptMessage(
+        [{ type: "thinking", thinking: "Hidden chain of thought" }],
+        { output: 1 },
      ),
-    ).toHaveLength(0);
+    ]);
+
+    const context = await runMathSideQuestionAndCaptureContext();
+
+    expectNoAssistantMessages(context);
  });

  it("drops malformed user image blocks from BTW context", async () => {
-    getActiveEmbeddedRunSnapshotMock.mockReturnValue({
-      transcriptLeafId: "assistant-1",
-      messages: [
-        {
-          role: "user",
-          content: [
-            { type: "text", text: "seed" },
-            { type: "image", mimeType: "image/png" },
-          ],
-          timestamp: 1,
-        },
-      ],
-    });
-    mockDoneAnswer(MATH_ANSWER);
+    mockActiveTranscript([
+      createUserTranscriptMessage([
+        { type: "text", text: "seed" },
+        { type: "image", mimeType: "image/png" },
+      ]),
+    ]);

-    await runMathSideQuestion();
-
-    const [, context] = streamSimpleMock.mock.calls[0] ?? [];
+    const context = await runMathSideQuestionAndCaptureContext();
    expect(context).toMatchObject({
      messages: [
        expect.objectContaining({
@@ -926,42 +840,16 @@ describe("runBtwSideQuestion", () => {
  });

  it("normalizes malformed assistant content before stripping tool blocks", async () => {
-    getActiveEmbeddedRunSnapshotMock.mockReturnValue({
-      transcriptLeafId: "assistant-1",
-      messages: [
-        {
-          role: "user",
-          content: [{ type: "text", text: "seed" }],
-          timestamp: 1,
-        },
-        {
-          role: "assistant",
-          content: { type: "toolCall", id: "call_1", name: "read", arguments: {} },
-          provider: DEFAULT_PROVIDER,
-          api: "anthropic-messages",
-          model: DEFAULT_MODEL,
-          stopReason: "toolUse",
-          usage: {
-            input: 1,
-            output: 0,
-            cacheRead: 0,
-            cacheWrite: 0,
-            totalTokens: 1,
-            cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
-          },
-          timestamp: 2,
-        },
-      ],
-    });
-    mockDoneAnswer(MATH_ANSWER);
-
-    await runMathSideQuestion();
-
-    const [, context] = streamSimpleMock.mock.calls[0] ?? [];
-    expect(
-      (context as { messages?: Array<{ role?: string }> }).messages?.filter(
-        (message) => message.role === "assistant",
+    mockActiveTranscript([
+      createUserTranscriptMessage(),
+      createAssistantTranscriptMessage(
+        { type: "toolCall", id: "call_1", name: "read", arguments: {} },
+        { stopReason: "toolUse", output: 0 },
      ),
-    ).toHaveLength(0);
+    ]);
+
+    const context = await runMathSideQuestionAndCaptureContext();
+
+    expectNoAssistantMessages(context);
  });
 });