From 966a3ea27c70c8eeb693cd3880e67707c3ee7c30 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sat, 18 Apr 2026 22:12:19 +0100 Subject: [PATCH] test: dedupe btw transcript fixtures --- src/agents/btw.test.ts | 460 ++++++++++++++++------------------------- 1 file changed, 174 insertions(+), 286 deletions(-) diff --git a/src/agents/btw.test.ts b/src/agents/btw.test.ts index eb6ab72092d..01f25b3d5c4 100644 --- a/src/agents/btw.test.ts +++ b/src/agents/btw.test.ts @@ -95,6 +95,15 @@ const DEFAULT_QUESTION = "What changed?"; const MATH_QUESTION = "What is 17 * 19?"; const MATH_ANSWER = "323"; +const DEFAULT_USAGE = { + input: 1, + output: 2, + cacheRead: 0, + cacheWrite: 0, + totalTokens: 3, + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, +}; + function makeAsyncEvents(events: unknown[]) { return { async *[Symbol.asyncIterator]() { @@ -114,52 +123,29 @@ function createSessionEntry(overrides: Partial = {}): SessionEntry }; } -function createDoneEvent(text: string) { +function createAssistantDoneEvent(content: unknown[]) { return { type: "done", reason: "stop", message: { role: "assistant", - content: [{ type: "text", text }], + content, provider: DEFAULT_PROVIDER, api: "anthropic-messages", model: DEFAULT_MODEL, stopReason: "stop", - usage: { - input: 1, - output: 2, - cacheRead: 0, - cacheWrite: 0, - totalTokens: 3, - cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, - }, + usage: DEFAULT_USAGE, timestamp: Date.now(), }, }; } +function createDoneEvent(text: string) { + return createAssistantDoneEvent([{ type: "text", text }]); +} + function createThinkingOnlyDoneEvent(thinking: string) { - return { - type: "done", - reason: "stop", - message: { - role: "assistant", - content: [{ type: "thinking", thinking }], - provider: DEFAULT_PROVIDER, - api: "anthropic-messages", - model: DEFAULT_MODEL, - stopReason: "stop", - usage: { - input: 1, - output: 2, - cacheRead: 0, - cacheWrite: 0, - totalTokens: 3, - cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, - }, - timestamp: Date.now(), - }, - }; + return createAssistantDoneEvent([{ type: "thinking", thinking }]); } function mockDoneAnswer(text: string) { @@ -192,6 +178,73 @@ function clearBuiltSessionMessages() { buildSessionContextMock.mockReturnValue({ messages: [] }); } +function createUserTranscriptMessage(content: unknown[] = [{ type: "text", text: "seed" }]) { + return { + role: "user", + content, + timestamp: 1, + }; +} + +function createAssistantTranscriptMessage( + content: unknown, + overrides: { + stopReason?: string; + output?: number; + timestamp?: number; + } = {}, +) { + return { + role: "assistant", + content, + provider: DEFAULT_PROVIDER, + api: "anthropic-messages", + model: DEFAULT_MODEL, + stopReason: overrides.stopReason ?? "stop", + usage: { + ...DEFAULT_USAGE, + output: overrides.output ?? DEFAULT_USAGE.output, + totalTokens: 1 + (overrides.output ?? DEFAULT_USAGE.output), + }, + timestamp: overrides.timestamp ?? 2, + }; +} + +function mockActiveTranscript(messages: unknown[]) { + getActiveEmbeddedRunSnapshotMock.mockReturnValue({ + transcriptLeafId: "assistant-1", + messages, + }); +} + +async function runMathSideQuestionAndCaptureContext() { + mockDoneAnswer(MATH_ANSWER); + await runMathSideQuestion(); + const [, context] = streamSimpleMock.mock.calls[0] ?? []; + return context; +} + +function expectNoAssistantMessages(context: unknown) { + expect( + (context as { messages?: Array<{ role?: string }> }).messages?.filter( + (message) => message.role === "assistant", + ), + ).toHaveLength(0); +} + +function expectSanitizedAssistantContext(context: unknown, text: string) { + expect(context).toMatchObject({ + messages: [ + expect.objectContaining({ role: "user" }), + expect.objectContaining({ + role: "assistant", + content: [{ type: "text", text }], + }), + expect.objectContaining({ role: "user" }), + ], + }); +} + describe("runBtwSideQuestion", () => { beforeEach(() => { streamSimpleMock.mockReset(); @@ -614,27 +667,20 @@ describe("runBtwSideQuestion", () => { }); it("excludes tool results from BTW context to avoid replaying raw tool output", async () => { - getActiveEmbeddedRunSnapshotMock.mockReturnValue({ - transcriptLeafId: "assistant-1", - messages: [ - { - role: "user", - content: [{ type: "text", text: "seed" }], - timestamp: 1, - }, - { - role: "toolResult", - content: [{ type: "text", text: "sensitive tool output" }], - details: { raw: "secret" }, - timestamp: 2, - }, - { - role: "assistant", - content: [{ type: "text", text: "done" }], - timestamp: 3, - }, - ], - }); + mockActiveTranscript([ + createUserTranscriptMessage(), + { + role: "toolResult", + content: [{ type: "text", text: "sensitive tool output" }], + details: { raw: "secret" }, + timestamp: 2, + }, + { + role: "assistant", + content: [{ type: "text", text: "done" }], + timestamp: 3, + }, + ]); mockDoneAnswer(MATH_ANSWER); await runMathSideQuestion(); @@ -653,53 +699,24 @@ describe("runBtwSideQuestion", () => { }); it("strips assistant tool calls from BTW context so no-tool side questions stay tool-free", async () => { - getActiveEmbeddedRunSnapshotMock.mockReturnValue({ - transcriptLeafId: "assistant-1", - messages: [ - { - role: "user", - content: [{ type: "text", text: "seed" }], - timestamp: 1, - }, - { - role: "assistant", - content: [ - { type: "text", text: "Let me check." }, - { type: "toolCall", id: "call_1", name: "read", arguments: { path: "README.md" } }, - { type: "toolUse", id: "call_legacy", name: "read", input: { path: "README.md" } }, - { type: "tool_call", id: "call_snake", name: "read", arguments: { path: "README.md" } }, - ], - provider: DEFAULT_PROVIDER, - api: "anthropic-messages", - model: DEFAULT_MODEL, - stopReason: "toolUse", - usage: { - input: 1, - output: 2, - cacheRead: 0, - cacheWrite: 0, - totalTokens: 3, - cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, - }, - timestamp: 2, - }, - ], - }); + mockActiveTranscript([ + createUserTranscriptMessage(), + createAssistantTranscriptMessage( + [ + { type: "text", text: "Let me check." }, + { type: "toolCall", id: "call_1", name: "read", arguments: { path: "README.md" } }, + { type: "toolUse", id: "call_legacy", name: "read", input: { path: "README.md" } }, + { type: "tool_call", id: "call_snake", name: "read", arguments: { path: "README.md" } }, + ], + { stopReason: "toolUse" }, + ), + ]); mockDoneAnswer(MATH_ANSWER); await runMathSideQuestion(); const [, context] = streamSimpleMock.mock.calls[0] ?? []; - expect(context).toMatchObject({ - messages: [ - expect.objectContaining({ role: "user" }), - expect.objectContaining({ - role: "assistant", - content: [{ type: "text", text: "Let me check." }], - }), - expect.objectContaining({ role: "user" }), - ], - }); + expectSanitizedAssistantContext(context, "Let me check."); expect( (context as { messages?: Array<{ role?: string; content?: Array<{ type?: string }> }> }) .messages, @@ -718,73 +735,37 @@ describe("runBtwSideQuestion", () => { }); it("drops assistant messages that contain only tool calls", async () => { - getActiveEmbeddedRunSnapshotMock.mockReturnValue({ - transcriptLeafId: "assistant-1", - messages: [ - { - role: "user", - content: [{ type: "text", text: "seed" }], - timestamp: 1, - }, - { - role: "assistant", - content: [{ type: "toolCall", id: "call_1", name: "read", arguments: {} }], - provider: DEFAULT_PROVIDER, - api: "anthropic-messages", - model: DEFAULT_MODEL, - stopReason: "toolUse", - usage: { - input: 1, - output: 0, - cacheRead: 0, - cacheWrite: 0, - totalTokens: 1, - cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, - }, - timestamp: 2, - }, - ], - }); - mockDoneAnswer(MATH_ANSWER); - - await runMathSideQuestion(); - - const [, context] = streamSimpleMock.mock.calls[0] ?? []; - expect( - (context as { messages?: Array<{ role?: string }> }).messages?.filter( - (message) => message.role === "assistant", + mockActiveTranscript([ + createUserTranscriptMessage(), + createAssistantTranscriptMessage( + [{ type: "toolCall", id: "call_1", name: "read", arguments: {} }], + { stopReason: "toolUse", output: 0 }, ), - ).toHaveLength(0); + ]); + + const context = await runMathSideQuestionAndCaptureContext(); + + expectNoAssistantMessages(context); }); it("strips embedded user tool results from BTW context", async () => { - getActiveEmbeddedRunSnapshotMock.mockReturnValue({ - transcriptLeafId: "assistant-1", - messages: [ + mockActiveTranscript([ + createUserTranscriptMessage([ + { type: "text", text: "seed" }, { - role: "user", - content: [ - { type: "text", text: "seed" }, - { - type: "toolResult", - toolUseId: "call_1", - content: [{ type: "text", text: "secret" }], - }, - { - type: "tool_result", - toolUseId: "call_2", - content: [{ type: "text", text: "secret-2" }], - }, - ], - timestamp: 1, + type: "toolResult", + toolUseId: "call_1", + content: [{ type: "text", text: "secret" }], }, - ], - }); - mockDoneAnswer(MATH_ANSWER); + { + type: "tool_result", + toolUseId: "call_2", + content: [{ type: "text", text: "secret-2" }], + }, + ]), + ]); - await runMathSideQuestion(); - - const [, context] = streamSimpleMock.mock.calls[0] ?? []; + const context = await runMathSideQuestionAndCaptureContext(); expect(context).toMatchObject({ messages: [ expect.objectContaining({ @@ -797,51 +778,20 @@ describe("runBtwSideQuestion", () => { }); it("drops assistant thinking blocks from BTW context", async () => { - getActiveEmbeddedRunSnapshotMock.mockReturnValue({ - transcriptLeafId: "assistant-1", - messages: [ - { - role: "user", - content: [{ type: "text", text: "seed" }], - timestamp: 1, - }, - { - role: "assistant", - content: [ - { type: "text", text: "Visible answer" }, - { type: "thinking", thinking: "Hidden chain of thought" }, - ], - provider: DEFAULT_PROVIDER, - api: "anthropic-messages", - model: DEFAULT_MODEL, - stopReason: "stop", - usage: { - input: 1, - output: 1, - cacheRead: 0, - cacheWrite: 0, - totalTokens: 2, - cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, - }, - timestamp: 2, - }, - ], - }); - mockDoneAnswer(MATH_ANSWER); + mockActiveTranscript([ + createUserTranscriptMessage(), + createAssistantTranscriptMessage( + [ + { type: "text", text: "Visible answer" }, + { type: "thinking", thinking: "Hidden chain of thought" }, + ], + { output: 1 }, + ), + ]); - await runMathSideQuestion(); + const context = await runMathSideQuestionAndCaptureContext(); - const [, context] = streamSimpleMock.mock.calls[0] ?? []; - expect(context).toMatchObject({ - messages: [ - expect.objectContaining({ role: "user" }), - expect.objectContaining({ - role: "assistant", - content: [{ type: "text", text: "Visible answer" }], - }), - expect.objectContaining({ role: "user" }), - ], - }); + expectSanitizedAssistantContext(context, "Visible answer"); expect( (context as { messages?: Array<{ role?: string; content?: Array<{ type?: string }> }> }) .messages, @@ -856,64 +806,28 @@ describe("runBtwSideQuestion", () => { }); it("drops thinking-only assistant messages from BTW context", async () => { - getActiveEmbeddedRunSnapshotMock.mockReturnValue({ - transcriptLeafId: "assistant-1", - messages: [ - { - role: "user", - content: [{ type: "text", text: "seed" }], - timestamp: 1, - }, - { - role: "assistant", - content: [{ type: "thinking", thinking: "Hidden chain of thought" }], - provider: DEFAULT_PROVIDER, - api: "anthropic-messages", - model: DEFAULT_MODEL, - stopReason: "stop", - usage: { - input: 1, - output: 1, - cacheRead: 0, - cacheWrite: 0, - totalTokens: 2, - cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, - }, - timestamp: 2, - }, - ], - }); - mockDoneAnswer(MATH_ANSWER); - - await runMathSideQuestion(); - - const [, context] = streamSimpleMock.mock.calls[0] ?? []; - expect( - (context as { messages?: Array<{ role?: string }> }).messages?.filter( - (message) => message.role === "assistant", + mockActiveTranscript([ + createUserTranscriptMessage(), + createAssistantTranscriptMessage( + [{ type: "thinking", thinking: "Hidden chain of thought" }], + { output: 1 }, ), - ).toHaveLength(0); + ]); + + const context = await runMathSideQuestionAndCaptureContext(); + + expectNoAssistantMessages(context); }); it("drops malformed user image blocks from BTW context", async () => { - getActiveEmbeddedRunSnapshotMock.mockReturnValue({ - transcriptLeafId: "assistant-1", - messages: [ - { - role: "user", - content: [ - { type: "text", text: "seed" }, - { type: "image", mimeType: "image/png" }, - ], - timestamp: 1, - }, - ], - }); - mockDoneAnswer(MATH_ANSWER); + mockActiveTranscript([ + createUserTranscriptMessage([ + { type: "text", text: "seed" }, + { type: "image", mimeType: "image/png" }, + ]), + ]); - await runMathSideQuestion(); - - const [, context] = streamSimpleMock.mock.calls[0] ?? []; + const context = await runMathSideQuestionAndCaptureContext(); expect(context).toMatchObject({ messages: [ expect.objectContaining({ @@ -926,42 +840,16 @@ describe("runBtwSideQuestion", () => { }); it("normalizes malformed assistant content before stripping tool blocks", async () => { - getActiveEmbeddedRunSnapshotMock.mockReturnValue({ - transcriptLeafId: "assistant-1", - messages: [ - { - role: "user", - content: [{ type: "text", text: "seed" }], - timestamp: 1, - }, - { - role: "assistant", - content: { type: "toolCall", id: "call_1", name: "read", arguments: {} }, - provider: DEFAULT_PROVIDER, - api: "anthropic-messages", - model: DEFAULT_MODEL, - stopReason: "toolUse", - usage: { - input: 1, - output: 0, - cacheRead: 0, - cacheWrite: 0, - totalTokens: 1, - cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, - }, - timestamp: 2, - }, - ], - }); - mockDoneAnswer(MATH_ANSWER); - - await runMathSideQuestion(); - - const [, context] = streamSimpleMock.mock.calls[0] ?? []; - expect( - (context as { messages?: Array<{ role?: string }> }).messages?.filter( - (message) => message.role === "assistant", + mockActiveTranscript([ + createUserTranscriptMessage(), + createAssistantTranscriptMessage( + { type: "toolCall", id: "call_1", name: "read", arguments: {} }, + { stopReason: "toolUse", output: 0 }, ), - ).toHaveLength(0); + ]); + + const context = await runMathSideQuestionAndCaptureContext(); + + expectNoAssistantMessages(context); }); });