test: dedupe btw transcript fixtures

This commit is contained in:
Peter Steinberger
2026-04-18 22:12:19 +01:00
parent b4543caf55
commit 966a3ea27c

View File

@@ -95,6 +95,15 @@ const DEFAULT_QUESTION = "What changed?";
const MATH_QUESTION = "What is 17 * 19?";
const MATH_ANSWER = "323";
const DEFAULT_USAGE = {
input: 1,
output: 2,
cacheRead: 0,
cacheWrite: 0,
totalTokens: 3,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
};
function makeAsyncEvents(events: unknown[]) {
return {
async *[Symbol.asyncIterator]() {
@@ -114,52 +123,29 @@ function createSessionEntry(overrides: Partial<SessionEntry> = {}): SessionEntry
};
}
function createDoneEvent(text: string) {
function createAssistantDoneEvent(content: unknown[]) {
return {
type: "done",
reason: "stop",
message: {
role: "assistant",
content: [{ type: "text", text }],
content,
provider: DEFAULT_PROVIDER,
api: "anthropic-messages",
model: DEFAULT_MODEL,
stopReason: "stop",
usage: {
input: 1,
output: 2,
cacheRead: 0,
cacheWrite: 0,
totalTokens: 3,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
},
usage: DEFAULT_USAGE,
timestamp: Date.now(),
},
};
}
function createDoneEvent(text: string) {
return createAssistantDoneEvent([{ type: "text", text }]);
}
function createThinkingOnlyDoneEvent(thinking: string) {
return {
type: "done",
reason: "stop",
message: {
role: "assistant",
content: [{ type: "thinking", thinking }],
provider: DEFAULT_PROVIDER,
api: "anthropic-messages",
model: DEFAULT_MODEL,
stopReason: "stop",
usage: {
input: 1,
output: 2,
cacheRead: 0,
cacheWrite: 0,
totalTokens: 3,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
},
timestamp: Date.now(),
},
};
return createAssistantDoneEvent([{ type: "thinking", thinking }]);
}
function mockDoneAnswer(text: string) {
@@ -192,6 +178,73 @@ function clearBuiltSessionMessages() {
buildSessionContextMock.mockReturnValue({ messages: [] });
}
function createUserTranscriptMessage(content: unknown[] = [{ type: "text", text: "seed" }]) {
return {
role: "user",
content,
timestamp: 1,
};
}
function createAssistantTranscriptMessage(
content: unknown,
overrides: {
stopReason?: string;
output?: number;
timestamp?: number;
} = {},
) {
return {
role: "assistant",
content,
provider: DEFAULT_PROVIDER,
api: "anthropic-messages",
model: DEFAULT_MODEL,
stopReason: overrides.stopReason ?? "stop",
usage: {
...DEFAULT_USAGE,
output: overrides.output ?? DEFAULT_USAGE.output,
totalTokens: 1 + (overrides.output ?? DEFAULT_USAGE.output),
},
timestamp: overrides.timestamp ?? 2,
};
}
function mockActiveTranscript(messages: unknown[]) {
getActiveEmbeddedRunSnapshotMock.mockReturnValue({
transcriptLeafId: "assistant-1",
messages,
});
}
async function runMathSideQuestionAndCaptureContext() {
mockDoneAnswer(MATH_ANSWER);
await runMathSideQuestion();
const [, context] = streamSimpleMock.mock.calls[0] ?? [];
return context;
}
function expectNoAssistantMessages(context: unknown) {
expect(
(context as { messages?: Array<{ role?: string }> }).messages?.filter(
(message) => message.role === "assistant",
),
).toHaveLength(0);
}
function expectSanitizedAssistantContext(context: unknown, text: string) {
expect(context).toMatchObject({
messages: [
expect.objectContaining({ role: "user" }),
expect.objectContaining({
role: "assistant",
content: [{ type: "text", text }],
}),
expect.objectContaining({ role: "user" }),
],
});
}
describe("runBtwSideQuestion", () => {
beforeEach(() => {
streamSimpleMock.mockReset();
@@ -614,27 +667,20 @@ describe("runBtwSideQuestion", () => {
});
it("excludes tool results from BTW context to avoid replaying raw tool output", async () => {
getActiveEmbeddedRunSnapshotMock.mockReturnValue({
transcriptLeafId: "assistant-1",
messages: [
{
role: "user",
content: [{ type: "text", text: "seed" }],
timestamp: 1,
},
{
role: "toolResult",
content: [{ type: "text", text: "sensitive tool output" }],
details: { raw: "secret" },
timestamp: 2,
},
{
role: "assistant",
content: [{ type: "text", text: "done" }],
timestamp: 3,
},
],
});
mockActiveTranscript([
createUserTranscriptMessage(),
{
role: "toolResult",
content: [{ type: "text", text: "sensitive tool output" }],
details: { raw: "secret" },
timestamp: 2,
},
{
role: "assistant",
content: [{ type: "text", text: "done" }],
timestamp: 3,
},
]);
mockDoneAnswer(MATH_ANSWER);
await runMathSideQuestion();
@@ -653,53 +699,24 @@ describe("runBtwSideQuestion", () => {
});
it("strips assistant tool calls from BTW context so no-tool side questions stay tool-free", async () => {
getActiveEmbeddedRunSnapshotMock.mockReturnValue({
transcriptLeafId: "assistant-1",
messages: [
{
role: "user",
content: [{ type: "text", text: "seed" }],
timestamp: 1,
},
{
role: "assistant",
content: [
{ type: "text", text: "Let me check." },
{ type: "toolCall", id: "call_1", name: "read", arguments: { path: "README.md" } },
{ type: "toolUse", id: "call_legacy", name: "read", input: { path: "README.md" } },
{ type: "tool_call", id: "call_snake", name: "read", arguments: { path: "README.md" } },
],
provider: DEFAULT_PROVIDER,
api: "anthropic-messages",
model: DEFAULT_MODEL,
stopReason: "toolUse",
usage: {
input: 1,
output: 2,
cacheRead: 0,
cacheWrite: 0,
totalTokens: 3,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
},
timestamp: 2,
},
],
});
mockActiveTranscript([
createUserTranscriptMessage(),
createAssistantTranscriptMessage(
[
{ type: "text", text: "Let me check." },
{ type: "toolCall", id: "call_1", name: "read", arguments: { path: "README.md" } },
{ type: "toolUse", id: "call_legacy", name: "read", input: { path: "README.md" } },
{ type: "tool_call", id: "call_snake", name: "read", arguments: { path: "README.md" } },
],
{ stopReason: "toolUse" },
),
]);
mockDoneAnswer(MATH_ANSWER);
await runMathSideQuestion();
const [, context] = streamSimpleMock.mock.calls[0] ?? [];
expect(context).toMatchObject({
messages: [
expect.objectContaining({ role: "user" }),
expect.objectContaining({
role: "assistant",
content: [{ type: "text", text: "Let me check." }],
}),
expect.objectContaining({ role: "user" }),
],
});
expectSanitizedAssistantContext(context, "Let me check.");
expect(
(context as { messages?: Array<{ role?: string; content?: Array<{ type?: string }> }> })
.messages,
@@ -718,73 +735,37 @@ describe("runBtwSideQuestion", () => {
});
it("drops assistant messages that contain only tool calls", async () => {
getActiveEmbeddedRunSnapshotMock.mockReturnValue({
transcriptLeafId: "assistant-1",
messages: [
{
role: "user",
content: [{ type: "text", text: "seed" }],
timestamp: 1,
},
{
role: "assistant",
content: [{ type: "toolCall", id: "call_1", name: "read", arguments: {} }],
provider: DEFAULT_PROVIDER,
api: "anthropic-messages",
model: DEFAULT_MODEL,
stopReason: "toolUse",
usage: {
input: 1,
output: 0,
cacheRead: 0,
cacheWrite: 0,
totalTokens: 1,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
},
timestamp: 2,
},
],
});
mockDoneAnswer(MATH_ANSWER);
await runMathSideQuestion();
const [, context] = streamSimpleMock.mock.calls[0] ?? [];
expect(
(context as { messages?: Array<{ role?: string }> }).messages?.filter(
(message) => message.role === "assistant",
mockActiveTranscript([
createUserTranscriptMessage(),
createAssistantTranscriptMessage(
[{ type: "toolCall", id: "call_1", name: "read", arguments: {} }],
{ stopReason: "toolUse", output: 0 },
),
).toHaveLength(0);
]);
const context = await runMathSideQuestionAndCaptureContext();
expectNoAssistantMessages(context);
});
it("strips embedded user tool results from BTW context", async () => {
getActiveEmbeddedRunSnapshotMock.mockReturnValue({
transcriptLeafId: "assistant-1",
messages: [
mockActiveTranscript([
createUserTranscriptMessage([
{ type: "text", text: "seed" },
{
role: "user",
content: [
{ type: "text", text: "seed" },
{
type: "toolResult",
toolUseId: "call_1",
content: [{ type: "text", text: "secret" }],
},
{
type: "tool_result",
toolUseId: "call_2",
content: [{ type: "text", text: "secret-2" }],
},
],
timestamp: 1,
type: "toolResult",
toolUseId: "call_1",
content: [{ type: "text", text: "secret" }],
},
],
});
mockDoneAnswer(MATH_ANSWER);
{
type: "tool_result",
toolUseId: "call_2",
content: [{ type: "text", text: "secret-2" }],
},
]),
]);
await runMathSideQuestion();
const [, context] = streamSimpleMock.mock.calls[0] ?? [];
const context = await runMathSideQuestionAndCaptureContext();
expect(context).toMatchObject({
messages: [
expect.objectContaining({
@@ -797,51 +778,20 @@ describe("runBtwSideQuestion", () => {
});
it("drops assistant thinking blocks from BTW context", async () => {
getActiveEmbeddedRunSnapshotMock.mockReturnValue({
transcriptLeafId: "assistant-1",
messages: [
{
role: "user",
content: [{ type: "text", text: "seed" }],
timestamp: 1,
},
{
role: "assistant",
content: [
{ type: "text", text: "Visible answer" },
{ type: "thinking", thinking: "Hidden chain of thought" },
],
provider: DEFAULT_PROVIDER,
api: "anthropic-messages",
model: DEFAULT_MODEL,
stopReason: "stop",
usage: {
input: 1,
output: 1,
cacheRead: 0,
cacheWrite: 0,
totalTokens: 2,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
},
timestamp: 2,
},
],
});
mockDoneAnswer(MATH_ANSWER);
mockActiveTranscript([
createUserTranscriptMessage(),
createAssistantTranscriptMessage(
[
{ type: "text", text: "Visible answer" },
{ type: "thinking", thinking: "Hidden chain of thought" },
],
{ output: 1 },
),
]);
await runMathSideQuestion();
const context = await runMathSideQuestionAndCaptureContext();
const [, context] = streamSimpleMock.mock.calls[0] ?? [];
expect(context).toMatchObject({
messages: [
expect.objectContaining({ role: "user" }),
expect.objectContaining({
role: "assistant",
content: [{ type: "text", text: "Visible answer" }],
}),
expect.objectContaining({ role: "user" }),
],
});
expectSanitizedAssistantContext(context, "Visible answer");
expect(
(context as { messages?: Array<{ role?: string; content?: Array<{ type?: string }> }> })
.messages,
@@ -856,64 +806,28 @@ describe("runBtwSideQuestion", () => {
});
it("drops thinking-only assistant messages from BTW context", async () => {
getActiveEmbeddedRunSnapshotMock.mockReturnValue({
transcriptLeafId: "assistant-1",
messages: [
{
role: "user",
content: [{ type: "text", text: "seed" }],
timestamp: 1,
},
{
role: "assistant",
content: [{ type: "thinking", thinking: "Hidden chain of thought" }],
provider: DEFAULT_PROVIDER,
api: "anthropic-messages",
model: DEFAULT_MODEL,
stopReason: "stop",
usage: {
input: 1,
output: 1,
cacheRead: 0,
cacheWrite: 0,
totalTokens: 2,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
},
timestamp: 2,
},
],
});
mockDoneAnswer(MATH_ANSWER);
await runMathSideQuestion();
const [, context] = streamSimpleMock.mock.calls[0] ?? [];
expect(
(context as { messages?: Array<{ role?: string }> }).messages?.filter(
(message) => message.role === "assistant",
mockActiveTranscript([
createUserTranscriptMessage(),
createAssistantTranscriptMessage(
[{ type: "thinking", thinking: "Hidden chain of thought" }],
{ output: 1 },
),
).toHaveLength(0);
]);
const context = await runMathSideQuestionAndCaptureContext();
expectNoAssistantMessages(context);
});
it("drops malformed user image blocks from BTW context", async () => {
getActiveEmbeddedRunSnapshotMock.mockReturnValue({
transcriptLeafId: "assistant-1",
messages: [
{
role: "user",
content: [
{ type: "text", text: "seed" },
{ type: "image", mimeType: "image/png" },
],
timestamp: 1,
},
],
});
mockDoneAnswer(MATH_ANSWER);
mockActiveTranscript([
createUserTranscriptMessage([
{ type: "text", text: "seed" },
{ type: "image", mimeType: "image/png" },
]),
]);
await runMathSideQuestion();
const [, context] = streamSimpleMock.mock.calls[0] ?? [];
const context = await runMathSideQuestionAndCaptureContext();
expect(context).toMatchObject({
messages: [
expect.objectContaining({
@@ -926,42 +840,16 @@ describe("runBtwSideQuestion", () => {
});
it("normalizes malformed assistant content before stripping tool blocks", async () => {
getActiveEmbeddedRunSnapshotMock.mockReturnValue({
transcriptLeafId: "assistant-1",
messages: [
{
role: "user",
content: [{ type: "text", text: "seed" }],
timestamp: 1,
},
{
role: "assistant",
content: { type: "toolCall", id: "call_1", name: "read", arguments: {} },
provider: DEFAULT_PROVIDER,
api: "anthropic-messages",
model: DEFAULT_MODEL,
stopReason: "toolUse",
usage: {
input: 1,
output: 0,
cacheRead: 0,
cacheWrite: 0,
totalTokens: 1,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
},
timestamp: 2,
},
],
});
mockDoneAnswer(MATH_ANSWER);
await runMathSideQuestion();
const [, context] = streamSimpleMock.mock.calls[0] ?? [];
expect(
(context as { messages?: Array<{ role?: string }> }).messages?.filter(
(message) => message.role === "assistant",
mockActiveTranscript([
createUserTranscriptMessage(),
createAssistantTranscriptMessage(
{ type: "toolCall", id: "call_1", name: "read", arguments: {} },
{ stopReason: "toolUse", output: 0 },
),
).toHaveLength(0);
]);
const context = await runMathSideQuestionAndCaptureContext();
expectNoAssistantMessages(context);
});
});