fix: tighten signed-thinking tool-result trust

This commit is contained in:
Shakker
2026-04-12 06:07:32 +01:00
committed by Shakker
parent 1a041001d3
commit 1e35eed277
4 changed files with 148 additions and 97 deletions

View File

@@ -555,7 +555,7 @@ describe("validateAnthropicTurns strips dangling tool_use blocks", () => {
]);
});
it("preserves signed-thinking turns when the matching tool result is embedded in user content", () => {
it("drops signed-thinking turns when the only matching tool result is embedded in user content", () => {
const msgs = asMessages([
{ role: "user", content: [{ type: "text", text: "Use tool" }] },
{
@@ -577,13 +577,13 @@ describe("validateAnthropicTurns strips dangling tool_use blocks", () => {
const result = validateAnthropicTurns(msgs);
expect(result).toHaveLength(3);
expect((result[1] as { role?: unknown }).role).toBe("assistant");
expect((result[1] as { content?: unknown[] }).content).toEqual([
{ type: "thinking", thinking: "internal", thinkingSignature: "sig_1" },
{ type: "toolUse", id: "tool-1", name: "gateway", arguments: {} },
{ type: "text", text: "[tool calls omitted]" },
]);
});
it("preserves signed-thinking turns when a tool result carries both stale and current id aliases", () => {
it("preserves signed-thinking turns when a trusted tool result carries both stale and current id aliases", () => {
const msgs = asMessages([
{ role: "user", content: [{ type: "text", text: "Use tool" }] },
{
@@ -594,22 +594,19 @@ describe("validateAnthropicTurns strips dangling tool_use blocks", () => {
],
},
{
role: "user",
content: [
{
type: "toolResult",
toolUseId: "tool-stale",
toolCallId: "tool-current",
content: [{ type: "text", text: "ok" }],
},
{ type: "text", text: "Continue" },
],
role: "toolResult",
toolUseId: "tool-stale",
toolCallId: "tool-current",
toolName: "gateway",
content: [{ type: "text", text: "ok" }],
isError: false,
},
{ role: "user", content: [{ type: "text", text: "Continue" }] },
]);
const result = validateAnthropicTurns(msgs);
expect(result).toHaveLength(3);
expect(result).toHaveLength(4);
expect((result[1] as { content?: unknown[] }).content).toEqual([
{ type: "thinking", thinking: "internal", thinkingSignature: "sig_1" },
{ type: "toolCall", id: "tool-current", name: "gateway", arguments: {} },
@@ -683,9 +680,7 @@ describe("validateAnthropicTurns strips dangling tool_use blocks", () => {
expect(result).toHaveLength(3);
const assistantContent = (result[1] as { content?: unknown[] }).content;
expect(assistantContent).toEqual([
{ type: "text", text: "[tool calls omitted]" },
]);
expect(assistantContent).toEqual([{ type: "text", text: "[tool calls omitted]" }]);
});
it("is replay-safe across repeated validation passes", () => {

View File

@@ -120,22 +120,6 @@ function collectTrustedToolResultMatches(message: AgentMessage): Map<string, Set
addMatch(extractToolResultMatchIds(record), extractToolResultMatchName(record));
}
const content = (message as { content?: unknown }).content;
if (!Array.isArray(content)) {
return matches;
}
for (const block of content) {
if (!block || typeof block !== "object") {
continue;
}
const record = block as Record<string, unknown>;
if (record.type !== "toolResult" && record.type !== "tool") {
continue;
}
addMatch(extractToolResultMatchIds(record), extractToolResultMatchName(record));
}
return matches;
}

View File

@@ -907,15 +907,11 @@ describe("wrapStreamFnSanitizeMalformedToolCalls", () => {
createFakeStream({ events: [], resultMessage: { role: "assistant", content: [] } }),
);
const wrapped = wrapStreamFnSanitizeMalformedToolCalls(
baseFn as never,
new Set(["read"]),
{
validateAnthropicTurns: true,
preserveSignatures: true,
dropThinkingBlocks: false,
} as never,
);
const wrapped = wrapStreamFnSanitizeMalformedToolCalls(baseFn as never, new Set(["read"]), {
validateAnthropicTurns: true,
preserveSignatures: true,
dropThinkingBlocks: false,
} as never);
const stream = wrapped({} as never, { messages } as never, {} as never) as
| FakeWrappedStream
| Promise<FakeWrappedStream>;
@@ -943,15 +939,11 @@ describe("wrapStreamFnSanitizeMalformedToolCalls", () => {
createFakeStream({ events: [], resultMessage: { role: "assistant", content: [] } }),
);
const wrapped = wrapStreamFnSanitizeMalformedToolCalls(
baseFn as never,
new Set(["read"]),
{
validateAnthropicTurns: true,
preserveSignatures: true,
dropThinkingBlocks: false,
} as never,
);
const wrapped = wrapStreamFnSanitizeMalformedToolCalls(baseFn as never, new Set(["read"]), {
validateAnthropicTurns: true,
preserveSignatures: true,
dropThinkingBlocks: false,
} as never);
const stream = wrapped({} as never, { messages } as never, {} as never) as
| FakeWrappedStream
| Promise<FakeWrappedStream>;
@@ -980,15 +972,11 @@ describe("wrapStreamFnSanitizeMalformedToolCalls", () => {
createFakeStream({ events: [], resultMessage: { role: "assistant", content: [] } }),
);
const wrapped = wrapStreamFnSanitizeMalformedToolCalls(
baseFn as never,
new Set(["read"]),
{
validateAnthropicTurns: true,
preserveSignatures: true,
dropThinkingBlocks: false,
} as never,
);
const wrapped = wrapStreamFnSanitizeMalformedToolCalls(baseFn as never, new Set(["read"]), {
validateAnthropicTurns: true,
preserveSignatures: true,
dropThinkingBlocks: false,
} as never);
const stream = wrapped(
{ api: "anthropic-messages" } as never,
{ messages } as never,
@@ -1024,15 +1012,11 @@ describe("wrapStreamFnSanitizeMalformedToolCalls", () => {
createFakeStream({ events: [], resultMessage: { role: "assistant", content: [] } }),
);
const wrapped = wrapStreamFnSanitizeMalformedToolCalls(
baseFn as never,
new Set(["read"]),
{
validateAnthropicTurns: true,
preserveSignatures: true,
dropThinkingBlocks: false,
} as never,
);
const wrapped = wrapStreamFnSanitizeMalformedToolCalls(baseFn as never, new Set(["read"]), {
validateAnthropicTurns: true,
preserveSignatures: true,
dropThinkingBlocks: false,
} as never);
const stream = wrapped(
{ api: "bedrock-converse-stream" } as never,
{ messages } as never,
@@ -1180,11 +1164,9 @@ describe("wrapStreamFnSanitizeMalformedToolCalls", () => {
createFakeStream({ events: [], resultMessage: { role: "assistant", content: [] } }),
);
const wrapped = wrapStreamFnSanitizeMalformedToolCalls(
baseFn as never,
new Set(["read"]),
{ validateAnthropicTurns: true } as never,
);
const wrapped = wrapStreamFnSanitizeMalformedToolCalls(baseFn as never, new Set(["read"]), {
validateAnthropicTurns: true,
} as never);
const stream = wrapped(
{ api: "openai-completions" } as never,
{ messages } as never,
@@ -1703,6 +1685,60 @@ describe("wrapStreamFnSanitizeMalformedToolCalls", () => {
]);
});
it("drops embedded Anthropic user tool_result blocks when signed-thinking replay must stay provider-owned", async () => {
const messages = [
{
role: "assistant",
content: [
{ type: "thinking", thinking: "internal", thinkingSignature: "sig_1" },
{ type: "toolUse", id: "call_1", name: "read", input: { path: "." } },
],
},
{
role: "user",
content: [
{
type: "toolResult",
toolUseId: "call_1",
content: [{ type: "text", text: "embedded result" }],
},
{ type: "text", text: "retry" },
],
},
];
const baseFn = vi.fn((_model, _context) =>
createFakeStream({ events: [], resultMessage: { role: "assistant", content: [] } }),
);
const wrapped = wrapStreamFnSanitizeMalformedToolCalls(baseFn as never, new Set(["read"]), {
validateGeminiTurns: false,
validateAnthropicTurns: true,
preserveSignatures: true,
dropThinkingBlocks: false,
});
const stream = wrapped(
{ api: "anthropic-messages" } as never,
{ messages } as never,
{} as never,
) as FakeWrappedStream | Promise<FakeWrappedStream>;
await Promise.resolve(stream);
expect(baseFn).toHaveBeenCalledTimes(1);
const seenContext = baseFn.mock.calls[0]?.[1] as {
messages: Array<{ role?: string; content?: unknown[] }>;
};
expect(seenContext.messages).toEqual([
{
role: "assistant",
content: [{ type: "text", text: "[tool calls omitted]" }],
},
{
role: "user",
content: [{ type: "text", text: "retry" }],
},
]);
});
it.each(["toolCall", "functionCall"] as const)(
"preserves matching Anthropic user tool_result blocks after %s replay turns",
async (toolCallType) => {

View File

@@ -6,8 +6,8 @@ import {
isRedactedSessionsSpawnAttachment,
sanitizeToolUseResultPairing,
} from "../../session-transcript-repair.js";
import { shouldAllowProviderOwnedThinkingReplay } from "../../transcript-policy.js";
import { normalizeToolName } from "../../tool-policy.js";
import { shouldAllowProviderOwnedThinkingReplay } from "../../transcript-policy.js";
import type { TranscriptPolicy } from "../../transcript-policy.js";
function resolveCaseInsensitiveAllowedToolName(
@@ -231,6 +231,9 @@ type ReplayToolCallSanitizeReport = {
type AnthropicToolResultContentBlock = {
type?: unknown;
toolUseId?: unknown;
toolCallId?: unknown;
tool_use_id?: unknown;
tool_call_id?: unknown;
};
function isThinkingLikeReplayBlock(block: unknown): boolean {
@@ -263,15 +266,12 @@ function hasUnredactedSessionsSpawnAttachments(block: ReplayToolCallBlock): bool
return false;
}
function isReplaySafeThinkingTurn(
content: unknown[],
allowedToolNames?: Set<string>,
): boolean {
function isReplaySafeThinkingTurn(content: unknown[], allowedToolNames?: Set<string>): boolean {
for (const block of content) {
if (!isReplayToolCallBlock(block)) {
continue;
}
const replayBlock = block as ReplayToolCallBlock;
const replayBlock = block;
if (
!replayToolCallHasInput(replayBlock) ||
!replayToolCallNonEmptyString(replayBlock.id) ||
@@ -409,9 +409,30 @@ function sanitizeReplayToolCallInputs(
};
}
function sanitizeAnthropicReplayToolResults(messages: AgentMessage[]): AgentMessage[] {
function extractAnthropicReplayToolResultIds(block: AnthropicToolResultContentBlock): string[] {
const ids: string[] = [];
for (const value of [block.toolUseId, block.toolCallId, block.tool_use_id, block.tool_call_id]) {
if (typeof value !== "string") {
continue;
}
const trimmed = value.trim();
if (!trimmed || ids.includes(trimmed)) {
continue;
}
ids.push(trimmed);
}
return ids;
}
function sanitizeAnthropicReplayToolResults(
messages: AgentMessage[],
options?: {
allowEmbeddedUserToolResults?: boolean;
},
): AgentMessage[] {
let changed = false;
const out: AgentMessage[] = [];
const allowEmbeddedUserToolResults = options?.allowEmbeddedUserToolResults !== false;
for (let index = 0; index < messages.length; index += 1) {
const message = messages[index];
@@ -450,10 +471,19 @@ function sanitizeAnthropicReplayToolResults(messages: AgentMessage[]): AgentMess
return true;
}
const typedBlock = block as AnthropicToolResultContentBlock;
if (typedBlock.type !== "toolResult" || typeof typedBlock.toolUseId !== "string") {
if (typedBlock.type !== "toolResult" && typedBlock.type !== "tool") {
return true;
}
return validToolUseIds.size > 0 && validToolUseIds.has(typedBlock.toolUseId);
if (!allowEmbeddedUserToolResults) {
changed = true;
return false;
}
const resultIds = extractAnthropicReplayToolResultIds(typedBlock);
if (resultIds.length === 0) {
changed = true;
return false;
}
return validToolUseIds.size > 0 && resultIds.some((id) => validToolUseIds.has(id));
});
if (nextContent.length === message.content.length) {
@@ -638,24 +668,30 @@ export function wrapStreamFnSanitizeMalformedToolCalls(
if (!Array.isArray(messages)) {
return baseFn(model, context, options);
}
const allowProviderOwnedThinkingReplay = shouldAllowProviderOwnedThinkingReplay({
modelApi: (model as { api?: unknown })?.api as string | null | undefined,
policy: {
validateAnthropicTurns: transcriptPolicy?.validateAnthropicTurns === true,
preserveSignatures: transcriptPolicy?.preserveSignatures === true,
dropThinkingBlocks: transcriptPolicy?.dropThinkingBlocks === true,
},
});
const sanitized = sanitizeReplayToolCallInputs(
messages as AgentMessage[],
allowedToolNames,
shouldAllowProviderOwnedThinkingReplay({
modelApi: (model as { api?: unknown })?.api as string | null | undefined,
policy: {
validateAnthropicTurns: transcriptPolicy?.validateAnthropicTurns === true,
preserveSignatures: transcriptPolicy?.preserveSignatures === true,
dropThinkingBlocks: transcriptPolicy?.dropThinkingBlocks === true,
},
}),
allowProviderOwnedThinkingReplay,
);
if (sanitized.messages === messages) {
return baseFn(model, context, options);
}
let nextMessages = sanitizeToolUseResultPairing(sanitized.messages);
const replayInputsChanged = sanitized.messages !== messages;
let nextMessages = replayInputsChanged
? sanitizeToolUseResultPairing(sanitized.messages)
: sanitized.messages;
if (transcriptPolicy?.validateAnthropicTurns) {
nextMessages = sanitizeAnthropicReplayToolResults(nextMessages);
nextMessages = sanitizeAnthropicReplayToolResults(nextMessages, {
allowEmbeddedUserToolResults: !allowProviderOwnedThinkingReplay,
});
}
if (nextMessages === messages) {
return baseFn(model, context, options);
}
if (sanitized.droppedAssistantMessages > 0 || transcriptPolicy?.validateAnthropicTurns) {
if (transcriptPolicy?.validateGeminiTurns) {