Files
openclaw/src/agents/pi-embedded-runner/thinking.test.ts
2026-05-08 05:28:12 +01:00

583 lines
19 KiB
TypeScript

import type { AgentMessage } from "@mariozechner/pi-agent-core";
import { createAssistantMessageEventStream } from "@mariozechner/pi-ai";
import { describe, expect, it } from "vitest";
import { castAgentMessage, castAgentMessages } from "../test-helpers/agent-message-fixtures.js";
import {
OMITTED_ASSISTANT_REASONING_TEXT,
assessLastAssistantMessage,
dropReasoningFromHistory,
dropThinkingBlocks,
isAssistantMessageWithContent,
sanitizeThinkingForRecovery,
stripInvalidThinkingSignatures,
wrapAnthropicStreamWithRecovery,
} from "./thinking.js";
type AssistantMessage = Extract<AgentMessage, { role: "assistant" }>;
function dropSingleAssistantContent(content: Array<Record<string, unknown>>) {
const messages: AgentMessage[] = [
castAgentMessage({
role: "assistant",
content,
}),
];
const result = dropThinkingBlocks(messages);
return {
assistant: result[0] as Extract<AgentMessage, { role: "assistant" }>,
messages,
result,
};
}
const noThinkingReferenceCases = [
{ name: "dropThinkingBlocks", drop: dropThinkingBlocks },
{ name: "dropReasoningFromHistory", drop: dropReasoningFromHistory },
];
function createNoThinkingMessages(): AgentMessage[] {
return [
castAgentMessage({ role: "user", content: "hello" }),
castAgentMessage({ role: "assistant", content: [{ type: "text", text: "world" }] }),
];
}
describe("thinking-free history contract", () => {
it.each(noThinkingReferenceCases)(
"$name returns the original reference when no thinking blocks are present",
({ drop }) => {
const messages = createNoThinkingMessages();
const result = drop(messages);
expect(result).toBe(messages);
},
);
});
describe("isAssistantMessageWithContent", () => {
it("accepts assistant messages with array content and rejects others", () => {
const assistant = castAgentMessage({
role: "assistant",
content: [{ type: "text", text: "ok" }],
});
const user = castAgentMessage({ role: "user", content: "hi" });
const malformed = castAgentMessage({ role: "assistant", content: "not-array" });
expect(isAssistantMessageWithContent(assistant)).toBe(true);
expect(isAssistantMessageWithContent(user)).toBe(false);
expect(isAssistantMessageWithContent(malformed)).toBe(false);
});
});
describe("dropThinkingBlocks", () => {
it("preserves thinking blocks when the assistant message is the latest assistant turn", () => {
const { assistant, messages, result } = dropSingleAssistantContent([
{ type: "thinking", thinking: "internal" },
{ type: "text", text: "final" },
]);
expect(result).toBe(messages);
expect(assistant.content).toEqual([
{ type: "thinking", thinking: "internal" },
{ type: "text", text: "final" },
]);
});
it("preserves a latest assistant turn even when all content blocks are thinking", () => {
const { assistant } = dropSingleAssistantContent([
{ type: "thinking", thinking: "internal-only" },
]);
expect(assistant.content).toEqual([{ type: "thinking", thinking: "internal-only" }]);
});
it("preserves thinking blocks in the latest assistant message", () => {
const messages: AgentMessage[] = [
castAgentMessage({ role: "user", content: "first" }),
castAgentMessage({
role: "assistant",
content: [
{ type: "thinking", thinking: "old" },
{ type: "text", text: "old text" },
],
}),
castAgentMessage({ role: "user", content: "second" }),
castAgentMessage({
role: "assistant",
content: [
{ type: "thinking", thinking: "latest", thinkingSignature: "sig_latest" },
{ type: "text", text: "latest text" },
],
}),
];
const result = dropThinkingBlocks(messages);
const firstAssistant = result[1] as Extract<AgentMessage, { role: "assistant" }>;
const latestAssistant = result[3] as Extract<AgentMessage, { role: "assistant" }>;
expect(firstAssistant.content).toEqual([{ type: "text", text: "old text" }]);
expect(latestAssistant.content).toEqual([
{ type: "thinking", thinking: "latest", thinkingSignature: "sig_latest" },
{ type: "text", text: "latest text" },
]);
});
it("uses non-empty omitted-reasoning text when an older assistant turn is thinking-only", () => {
const messages: AgentMessage[] = [
castAgentMessage({ role: "user", content: "first" }),
castAgentMessage({
role: "assistant",
content: [{ type: "thinking", thinking: "old", thinkingSignature: "sig_old" }],
}),
castAgentMessage({ role: "user", content: "second" }),
castAgentMessage({
role: "assistant",
content: [
{ type: "thinking", thinking: "latest", thinkingSignature: "sig_latest" },
{ type: "text", text: "latest text" },
],
}),
];
const result = dropThinkingBlocks(messages);
const oldAssistant = result[1] as Extract<AgentMessage, { role: "assistant" }>;
const latestAssistant = result[3] as Extract<AgentMessage, { role: "assistant" }>;
const originalLatestAssistant = messages[3] as Extract<AgentMessage, { role: "assistant" }>;
expect(oldAssistant.content).toEqual([
{ type: "text", text: OMITTED_ASSISTANT_REASONING_TEXT },
]);
expect(latestAssistant.content).toEqual(originalLatestAssistant.content);
});
it("uses non-empty omitted-reasoning text when an older assistant turn is redacted-thinking-only", () => {
const messages: AgentMessage[] = [
castAgentMessage({ role: "user", content: "first" }),
castAgentMessage({
role: "assistant",
content: [{ type: "redacted_thinking", data: "opaque" }],
}),
castAgentMessage({ role: "user", content: "second" }),
castAgentMessage({
role: "assistant",
content: [{ type: "text", text: "latest text" }],
}),
];
const result = dropThinkingBlocks(messages);
const oldAssistant = result[1] as Extract<AgentMessage, { role: "assistant" }>;
expect(oldAssistant.content).toEqual([
{ type: "text", text: OMITTED_ASSISTANT_REASONING_TEXT },
]);
});
});
describe("dropReasoningFromHistory", () => {
it("strips assistant reasoning from prior completed turns", () => {
const messages: AgentMessage[] = [
castAgentMessage({ role: "user", content: "first" }),
castAgentMessage({
role: "assistant",
content: [
{ type: "thinking", thinking: "private" },
{ type: "text", text: "visible" },
],
}),
castAgentMessage({ role: "user", content: "second" }),
];
const result = dropReasoningFromHistory(messages);
const assistant = result[1] as AssistantMessage;
expect(result).not.toBe(messages);
expect(assistant.content).toEqual([{ type: "text", text: "visible" }]);
});
it("uses omitted-reasoning text when a completed assistant turn is reasoning-only", () => {
const messages: AgentMessage[] = [
castAgentMessage({ role: "user", content: "first" }),
castAgentMessage({
role: "assistant",
content: [{ type: "thinking", thinking: "private" }],
}),
castAgentMessage({ role: "user", content: "second" }),
];
const result = dropReasoningFromHistory(messages);
const assistant = result[1] as AssistantMessage;
expect(assistant.content).toEqual([{ type: "text", text: OMITTED_ASSISTANT_REASONING_TEXT }]);
});
it("preserves reasoning for the active tool-call continuation after the latest user turn", () => {
const messages: AgentMessage[] = [
castAgentMessage({ role: "user", content: "look up the answer" }),
castAgentMessage({
role: "assistant",
content: [
{ type: "thinking", thinking: "call the tool" },
{ type: "toolCall", id: "call123456", name: "lookup", arguments: {} },
],
}),
castAgentMessage({
role: "toolResult",
toolCallId: "call123456",
toolName: "lookup",
content: "42",
}),
];
const result = dropReasoningFromHistory(messages);
expect(result).toBe(messages);
});
it("strips reasoning from old tool-call turns once a later user turn starts", () => {
const messages: AgentMessage[] = [
castAgentMessage({ role: "user", content: "look up the answer" }),
castAgentMessage({
role: "assistant",
content: [
{ type: "thinking", thinking: "call the tool" },
{ type: "toolCall", id: "call123456", name: "lookup", arguments: {} },
],
}),
castAgentMessage({
role: "toolResult",
toolCallId: "call123456",
toolName: "lookup",
content: "42",
}),
castAgentMessage({ role: "assistant", content: [{ type: "text", text: "42" }] }),
castAgentMessage({ role: "user", content: "thanks" }),
];
const result = dropReasoningFromHistory(messages);
const assistant = result[1] as AssistantMessage;
expect(assistant.content).toEqual([
{ type: "toolCall", id: "call123456", name: "lookup", arguments: {} },
]);
});
});
describe("stripInvalidThinkingSignatures", () => {
it("returns the original reference when no invalid thinking signatures are present", () => {
const messages: AgentMessage[] = [
castAgentMessage({ role: "user", content: "hello" }),
castAgentMessage({
role: "assistant",
content: [
{ type: "thinking", thinking: "internal", thinkingSignature: "sig" },
{ type: "text", text: "answer" },
],
}),
];
const result = stripInvalidThinkingSignatures(messages);
expect(result).toBe(messages);
});
it("strips thinking blocks with missing, empty, or blank signatures", () => {
const messages: AgentMessage[] = [
castAgentMessage({
role: "assistant",
content: [
{ type: "thinking", thinking: "missing" },
{ type: "thinking", thinking: "empty", thinkingSignature: "" },
{ type: "thinking", thinking: "blank", thinkingSignature: " " },
{ type: "thinking", thinking: "signed", thinkingSignature: "sig" },
{ type: "text", text: "answer" },
],
}),
];
const result = stripInvalidThinkingSignatures(messages);
const assistant = result[0] as Extract<AgentMessage, { role: "assistant" }>;
expect(result).not.toBe(messages);
expect(assistant.content).toEqual([
{ type: "thinking", thinking: "signed", thinkingSignature: "sig" },
{ type: "text", text: "answer" },
]);
});
it("uses non-empty omitted-reasoning text when all thinking signatures are invalid", () => {
const messages: AgentMessage[] = [
castAgentMessage({
role: "assistant",
content: [{ type: "thinking", thinking: "reasoning", thinkingSignature: "" }],
}),
];
const result = stripInvalidThinkingSignatures(messages);
const assistant = result[0] as Extract<AgentMessage, { role: "assistant" }>;
expect(assistant.content).toEqual([{ type: "text", text: OMITTED_ASSISTANT_REASONING_TEXT }]);
});
it("strips redacted thinking blocks with invalid opaque signatures", () => {
const messages: AgentMessage[] = [
castAgentMessage({
role: "assistant",
content: [
{ type: "redacted_thinking", data: "" },
{ type: "redacted_thinking", signature: " " },
{ type: "redacted_thinking", data: "opaque" },
{ type: "text", text: "answer" },
],
}),
];
const result = stripInvalidThinkingSignatures(messages);
const assistant = result[0] as Extract<AgentMessage, { role: "assistant" }>;
expect(assistant.content).toEqual([
{ type: "redacted_thinking", data: "opaque" },
{ type: "text", text: "answer" },
]);
});
});
describe("sanitizeThinkingForRecovery", () => {
it("drops the latest assistant message when the thinking block is unsigned", () => {
const messages = castAgentMessages([
{ role: "user", content: "hello" },
{
role: "assistant",
content: [{ type: "thinking", thinking: "partial" }],
},
]);
const result = sanitizeThinkingForRecovery(messages);
expect(result.messages).toEqual([messages[0]]);
expect(result.prefill).toBe(false);
});
it("preserves later turns when dropping an incomplete assistant message", () => {
const messages = castAgentMessages([
{ role: "user", content: "hello" },
{
role: "assistant",
content: [{ type: "thinking", thinking: "partial" }],
},
{ role: "user", content: "follow up" },
]);
const result = sanitizeThinkingForRecovery(messages);
expect(result.messages).toEqual([messages[0], messages[2]]);
expect(result.prefill).toBe(false);
});
it("marks signed thinking without text as a prefill recovery case", () => {
const messages = castAgentMessages([
{ role: "user", content: "hello" },
{
role: "assistant",
content: [{ type: "thinking", thinking: "complete", thinkingSignature: "sig" }],
},
]);
const result = sanitizeThinkingForRecovery(messages);
expect(result.messages).toBe(messages);
expect(result.prefill).toBe(true);
});
it("marks signed thinking with an empty text block as incomplete text", () => {
const message = castAgentMessage({
role: "assistant",
content: [
{ type: "thinking", thinking: "complete", thinkingSignature: "sig" },
{ type: "text", text: "" },
],
});
expect(assessLastAssistantMessage(message)).toBe("incomplete-text");
});
it("treats partial text after signed thinking as valid", () => {
const message = castAgentMessage({
role: "assistant",
content: [
{ type: "thinking", thinking: "complete", thinkingSignature: "sig" },
{ type: "text", text: "Here is my answ" },
],
});
expect(assessLastAssistantMessage(message)).toBe("valid");
});
it("treats non-string text blocks as incomplete text when thinking is signed", () => {
const message = castAgentMessage({
role: "assistant",
content: [
{ type: "thinking", thinking: "complete", thinkingSignature: "sig" },
{ type: "text", text: { bad: true } },
],
});
expect(assessLastAssistantMessage(message)).toBe("incomplete-text");
});
});
describe("wrapAnthropicStreamWithRecovery", () => {
const anthropicThinkingError = new Error(
"thinking or redacted_thinking blocks in the latest assistant message cannot be modified",
);
it("retries once with omitted-reasoning text when the request is rejected before streaming", async () => {
let callCount = 0;
const contexts: Array<{ messages?: AgentMessage[] }> = [];
const wrapped = wrapAnthropicStreamWithRecovery(
((_model, context) => {
callCount += 1;
contexts.push(context as { messages?: AgentMessage[] });
return Promise.reject(anthropicThinkingError);
}) as Parameters<typeof wrapAnthropicStreamWithRecovery>[0],
{ id: "test-session" },
);
await expect(
wrapped(
{} as never,
{
messages: castAgentMessages([
{
role: "assistant",
content: [{ type: "thinking", thinking: "secret", thinkingSignature: "sig" }],
},
]),
} as never,
{} as never,
),
).rejects.toBe(anthropicThinkingError);
expect(callCount).toBe(2);
expect(contexts[1]?.messages?.[0]).toMatchObject({
role: "assistant",
content: [{ type: "text", text: OMITTED_ASSISTANT_REASONING_TEXT }],
});
});
it("retries with visible assistant text when stripping thinking leaves content", async () => {
const contexts: Array<{ messages?: AgentMessage[] }> = [];
const wrapped = wrapAnthropicStreamWithRecovery(
((_model, context) => {
contexts.push(context as { messages?: AgentMessage[] });
return Promise.reject(anthropicThinkingError);
}) as Parameters<typeof wrapAnthropicStreamWithRecovery>[0],
{ id: "test-session" },
);
await expect(
wrapped(
{} as never,
{
messages: castAgentMessages([
{
role: "assistant",
content: [
{ type: "thinking", thinking: "secret", thinkingSignature: "sig" },
{ type: "text", text: "visible answer" },
],
},
]),
} as never,
{} as never,
),
).rejects.toBe(anthropicThinkingError);
expect(contexts[1]?.messages?.[0]).toMatchObject({
role: "assistant",
content: [{ type: "text", text: "visible answer" }],
});
});
it("does not retry when the stream fails after yielding a chunk", async () => {
let callCount = 0;
const wrapped = wrapAnthropicStreamWithRecovery(
(() => {
callCount += 1;
return (async function* failingStream() {
yield "chunk";
throw anthropicThinkingError;
})();
}) as unknown as Parameters<typeof wrapAnthropicStreamWithRecovery>[0],
{ id: "test-session" },
);
const chunks: unknown[] = [];
const response = wrapped({} as never, { messages: [] } as never, {} as never) as {
result: () => Promise<unknown>;
} & AsyncIterable<unknown>;
for await (const chunk of response) {
chunks.push(chunk);
}
expect(chunks).toEqual(["chunk"]);
await expect(response.result()).rejects.toBe(anthropicThinkingError);
expect(callCount).toBe(1);
});
it("does not retry non-Anthropic-thinking errors", async () => {
const rateLimitError = new Error("rate limit exceeded");
let callCount = 0;
const wrapped = wrapAnthropicStreamWithRecovery(
(() => {
callCount += 1;
return Promise.reject(rateLimitError);
}) as Parameters<typeof wrapAnthropicStreamWithRecovery>[0],
{ id: "test-session" },
);
await expect(wrapped({} as never, { messages: [] } as never, {} as never)).rejects.toBe(
rateLimitError,
);
expect(callCount).toBe(1);
});
it("preserves result() for synchronous event streams", async () => {
const finalMessage = castAgentMessage({
role: "assistant",
content: [{ type: "text", text: "done" }],
api: "anthropic-messages",
provider: "anthropic",
model: "claude-sonnet-4-6",
usage: {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
totalTokens: 0,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
},
stopReason: "stop",
timestamp: Date.now(),
}) as AssistantMessage;
const wrapped = wrapAnthropicStreamWithRecovery(
(() => {
const stream = createAssistantMessageEventStream();
queueMicrotask(() => {
stream.push({ type: "start", partial: finalMessage });
stream.push({ type: "done", reason: "stop", message: finalMessage });
stream.end();
});
return stream;
}) as Parameters<typeof wrapAnthropicStreamWithRecovery>[0],
{ id: "test-session" },
);
const response = wrapped({} as never, { messages: [] } as never, {} as never) as {
result: () => Promise<unknown>;
} & AsyncIterable<unknown>;
const events: unknown[] = [];
for await (const event of response) {
events.push(event);
}
await expect(response.result()).resolves.toEqual(finalMessage);
expect(events).toHaveLength(2);
});
});