fix: gate immutable thinking replay by transcript policy

This commit is contained in:
Shakker
2026-04-12 05:11:08 +01:00
committed by Shakker
parent 5c244b3bd2
commit c6e2298950
6 changed files with 132 additions and 14 deletions

View File

@@ -968,6 +968,48 @@ describe("sanitizeSessionHistory", () => {
]);
});
it("uses immutable thinking replay for anthropic-compatible providers when policy preserves signatures", async () => {
setNonGoogleModelApi();
const messages = castAgentMessages([
makeUserMessage("retry"),
makeAssistantMessage([
{
type: "thinking",
thinking: "internal",
thinkingSignature: "sig_1",
},
{ type: "toolCall", id: "call_1", name: " read ", arguments: {} },
] as unknown as AssistantMessage["content"]),
]);
const result = await sanitizeAnthropicHistory({
provider: "anthropic-vertex",
messages,
policy: {
sanitizeMode: "full",
sanitizeToolCallIds: true,
toolCallIdMode: "strict",
preserveNativeAnthropicToolUseIds: true,
repairToolUseResultPairing: true,
preserveSignatures: true,
sanitizeThoughtSignatures: undefined,
sanitizeThinkingSignatures: false,
dropThinkingBlocks: false,
applyGoogleTurnOrdering: false,
validateGeminiTurns: false,
validateAnthropicTurns: true,
allowSyntheticToolResults: true,
},
});
expect(result).toHaveLength(1);
expect(result[0]).toMatchObject({
role: "user",
content: "retry",
});
});
it("keeps mutable thinking turns outside exact anthropic replay", async () => {
setNonGoogleModelApi();

View File

@@ -29,7 +29,10 @@ import {
stripToolResultDetails,
} from "../session-transcript-repair.js";
import type { TranscriptPolicy } from "../transcript-policy.js";
import { resolveTranscriptPolicy } from "../transcript-policy.js";
import {
resolveTranscriptPolicy,
shouldAllowProviderOwnedThinkingReplay,
} from "../transcript-policy.js";
import {
makeZeroUsageSnapshot,
normalizeUsage,
@@ -418,10 +421,10 @@ export async function sanitizeSessionHistory(params: {
: sanitizedImages;
const sanitizedToolCalls = sanitizeToolCallInputs(droppedThinking, {
allowedToolNames: params.allowedToolNames,
allowProviderOwnedThinkingReplay:
policy.validateAnthropicTurns &&
params.provider === "anthropic" &&
params.modelApi === "anthropic-messages",
allowProviderOwnedThinkingReplay: shouldAllowProviderOwnedThinkingReplay({
modelApi: params.modelApi,
policy,
}),
});
const repairedTools = policy.repairToolUseResultPairing
? sanitizeToolUseResultPairing(sanitizedToolCalls, {

View File

@@ -910,7 +910,11 @@ describe("wrapStreamFnSanitizeMalformedToolCalls", () => {
const wrapped = wrapStreamFnSanitizeMalformedToolCalls(
baseFn as never,
new Set(["read"]),
{ validateAnthropicTurns: true } as never,
{
validateAnthropicTurns: true,
preserveSignatures: true,
dropThinkingBlocks: false,
} as never,
);
const stream = wrapped({} as never, { messages } as never, {} as never) as
| FakeWrappedStream
@@ -942,7 +946,11 @@ describe("wrapStreamFnSanitizeMalformedToolCalls", () => {
const wrapped = wrapStreamFnSanitizeMalformedToolCalls(
baseFn as never,
new Set(["read"]),
{ validateAnthropicTurns: true } as never,
{
validateAnthropicTurns: true,
preserveSignatures: true,
dropThinkingBlocks: false,
} as never,
);
const stream = wrapped({} as never, { messages } as never, {} as never) as
| FakeWrappedStream
@@ -975,7 +983,11 @@ describe("wrapStreamFnSanitizeMalformedToolCalls", () => {
const wrapped = wrapStreamFnSanitizeMalformedToolCalls(
baseFn as never,
new Set(["read"]),
{ validateAnthropicTurns: true } as never,
{
validateAnthropicTurns: true,
preserveSignatures: true,
dropThinkingBlocks: false,
} as never,
);
const stream = wrapped(
{ api: "anthropic-messages" } as never,
@@ -1024,7 +1036,11 @@ describe("wrapStreamFnSanitizeMalformedToolCalls", () => {
const wrapped = wrapStreamFnSanitizeMalformedToolCalls(
baseFn as never,
new Set(["sessions_spawn"]),
{ validateAnthropicTurns: true } as never,
{
validateAnthropicTurns: true,
preserveSignatures: true,
dropThinkingBlocks: false,
} as never,
);
const stream = wrapped(
{ api: "anthropic-messages" } as never,
@@ -1079,7 +1095,11 @@ describe("wrapStreamFnSanitizeMalformedToolCalls", () => {
const wrapped = wrapStreamFnSanitizeMalformedToolCalls(
baseFn as never,
new Set(["sessions_spawn"]),
{ validateAnthropicTurns: true } as never,
{
validateAnthropicTurns: true,
preserveSignatures: true,
dropThinkingBlocks: false,
} as never,
);
const stream = wrapped(
{ api: "anthropic-messages" } as never,

View File

@@ -6,6 +6,7 @@ import {
isRedactedSessionsSpawnAttachment,
sanitizeToolUseResultPairing,
} from "../../session-transcript-repair.js";
import { shouldAllowProviderOwnedThinkingReplay } from "../../transcript-policy.js";
import { normalizeToolName } from "../../tool-policy.js";
import type { TranscriptPolicy } from "../../transcript-policy.js";
@@ -626,7 +627,10 @@ export function wrapStreamFnTrimToolCallNames(
export function wrapStreamFnSanitizeMalformedToolCalls(
baseFn: StreamFn,
allowedToolNames?: Set<string>,
transcriptPolicy?: Pick<TranscriptPolicy, "validateGeminiTurns" | "validateAnthropicTurns">,
transcriptPolicy?: Pick<
TranscriptPolicy,
"validateGeminiTurns" | "validateAnthropicTurns" | "preserveSignatures" | "dropThinkingBlocks"
>,
): StreamFn {
return (model, context, options) => {
const ctx = context as unknown as { messages?: unknown };
@@ -637,8 +641,14 @@ export function wrapStreamFnSanitizeMalformedToolCalls(
const sanitized = sanitizeReplayToolCallInputs(
messages as AgentMessage[],
allowedToolNames,
transcriptPolicy?.validateAnthropicTurns === true &&
(model as { api?: unknown })?.api === "anthropic-messages",
shouldAllowProviderOwnedThinkingReplay({
modelApi: (model as { api?: unknown })?.api as string | null | undefined,
policy: {
validateAnthropicTurns: transcriptPolicy?.validateAnthropicTurns === true,
preserveSignatures: transcriptPolicy?.preserveSignatures === true,
dropThinkingBlocks: transcriptPolicy?.dropThinkingBlocks === true,
},
}),
);
if (sanitized.messages === messages) {
return baseFn(model, context, options);

View File

@@ -178,10 +178,13 @@ vi.mock("../plugins/provider-runtime.js", async () => {
});
let resolveTranscriptPolicy: typeof import("./transcript-policy.js").resolveTranscriptPolicy;
let shouldAllowProviderOwnedThinkingReplay: typeof import("./transcript-policy.js").shouldAllowProviderOwnedThinkingReplay;
describe("resolveTranscriptPolicy", () => {
beforeAll(async () => {
({ resolveTranscriptPolicy } = await import("./transcript-policy.js"));
({ resolveTranscriptPolicy, shouldAllowProviderOwnedThinkingReplay } = await import(
"./transcript-policy.js"
));
});
beforeEach(() => {
@@ -404,6 +407,34 @@ describe("resolveTranscriptPolicy", () => {
expect(policy.preserveSignatures).toBe(preserveSignatures);
});
it("allows immutable provider-owned thinking replay for anthropic-compatible native replay policies", () => {
const policy = resolveTranscriptPolicy({
provider: "minimax",
modelId: "MiniMax-M2.7",
modelApi: "anthropic-messages",
});
expect(
shouldAllowProviderOwnedThinkingReplay({
modelApi: "anthropic-messages",
policy,
}),
).toBe(true);
});
it("does not allow immutable provider-owned thinking replay for strict openai-compatible replay", () => {
const policy = resolveTranscriptPolicy({
provider: "vllm",
modelId: "gemma-3-27b",
modelApi: "openai-completions",
});
expect(
shouldAllowProviderOwnedThinkingReplay({
modelApi: "openai-completions",
policy,
}),
).toBe(false);
});
it("enables turn-ordering and assistant-merge for strict OpenAI-compatible providers (#38962)", () => {
const policy = resolveTranscriptPolicy({
provider: "vllm",

View File

@@ -29,6 +29,18 @@ export type TranscriptPolicy = {
allowSyntheticToolResults: boolean;
};
export function shouldAllowProviderOwnedThinkingReplay(params: {
modelApi?: string | null;
policy: Pick<TranscriptPolicy, "validateAnthropicTurns" | "preserveSignatures" | "dropThinkingBlocks">;
}): boolean {
return (
params.modelApi === "anthropic-messages" &&
params.policy.validateAnthropicTurns === true &&
params.policy.preserveSignatures === true &&
params.policy.dropThinkingBlocks !== true
);
}
const DEFAULT_TRANSCRIPT_POLICY: TranscriptPolicy = {
sanitizeMode: "images-only",
sanitizeToolCallIds: false,