fix: drop conflicting signed replay turns before mutation

This commit is contained in:
Shakker
2026-04-12 06:52:43 +01:00
committed by Shakker
parent 91465f620b
commit 941aca5e5e
4 changed files with 144 additions and 3 deletions

View File

@@ -17,6 +17,7 @@ import {
} from "./pi-embedded-runner.sanitize-session-history.test-harness.js";
import { validateReplayTurns } from "./pi-embedded-runner/replay-history.js";
import { castAgentMessage, castAgentMessages } from "./test-helpers/agent-message-fixtures.js";
import { extractToolCallsFromAssistant } from "./tool-call-id.js";
import type { TranscriptPolicy } from "./transcript-policy.js";
import { makeZeroUsageSnapshot } from "./usage.js";
@@ -1182,6 +1183,93 @@ describe("sanitizeSessionHistory", () => {
]);
});
it("drops later preserved signed turns that reuse an earlier raw tool id across the transcript", async () => {
setNonGoogleModelApi();
const sessionManager = makeMockSessionManager();
const messages = castAgentMessages([
makeUserMessage("first"),
makeAssistantMessage(
[
{ type: "thinking", thinking: "internal", thinkingSignature: "sig_1" },
{ type: "toolCall", id: "call1", name: "read", arguments: {} },
] as unknown as AssistantMessage["content"],
{ stopReason: "toolUse" },
),
castAgentMessage({
role: "toolResult",
toolCallId: "call1",
toolName: "read",
content: [{ type: "text", text: "first result" }],
isError: false,
}),
makeUserMessage("second"),
makeAssistantMessage(
[
{ type: "thinking", thinking: "internal", thinkingSignature: "sig_2" },
{ type: "toolCall", id: "call1", name: "read", arguments: {} },
] as unknown as AssistantMessage["content"],
{ stopReason: "toolUse" },
),
castAgentMessage({
role: "toolResult",
toolCallId: "call1",
toolName: "read",
content: [{ type: "text", text: "second result" }],
isError: false,
}),
makeUserMessage("retry"),
]);
const sanitized = await sanitizeSessionHistory({
messages,
modelApi: "anthropic-messages",
provider: "anthropic",
modelId: "claude-sonnet-4-6",
sessionManager,
sessionId: TEST_SESSION_ID,
});
const validated = await validateReplayTurns({
messages: sanitized,
modelApi: "anthropic-messages",
provider: "anthropic",
modelId: "claude-sonnet-4-6",
sessionId: TEST_SESSION_ID,
});
expect(
sanitized.filter(
(message) =>
message &&
typeof message === "object" &&
message.role === "assistant" &&
extractToolCallsFromAssistant(message as Extract<AgentMessage, { role: "assistant" }>)
.length > 0,
),
).toHaveLength(1);
expect(
sanitized.filter(
(message) => message && typeof message === "object" && message.role === "toolResult",
),
).toHaveLength(1);
expect(
validated.filter(
(message) =>
message &&
typeof message === "object" &&
message.role === "assistant" &&
extractToolCallsFromAssistant(message as Extract<AgentMessage, { role: "assistant" }>)
.length > 0,
),
).toHaveLength(1);
expect(
validated.filter(
(message) => message && typeof message === "object" && message.role === "toolResult",
),
).toHaveLength(1);
expect(JSON.stringify(validated)).not.toContain("[tool calls omitted]");
});
it("keeps the earlier anthropic replay prefix stable after a later subagent turn", async () => {
setNonGoogleModelApi();

View File

@@ -6,6 +6,7 @@ import {
isRedactedSessionsSpawnAttachment,
sanitizeToolUseResultPairing,
} from "../../session-transcript-repair.js";
import { extractToolCallsFromAssistant } from "../../tool-call-id.js";
import { normalizeToolName } from "../../tool-policy.js";
import { shouldAllowProviderOwnedThinkingReplay } from "../../transcript-policy.js";
import type { TranscriptPolicy } from "../../transcript-policy.js";
@@ -284,7 +285,7 @@ function isReplaySafeThinkingTurn(content: unknown[], allowedToolNames?: Set<str
}
seenToolCallIds.add(toolCallId);
const rawName = typeof replayBlock.name === "string" ? replayBlock.name : "";
const resolvedName = resolveReplayToolCallName(rawName, replayBlock.id, allowedToolNames);
const resolvedName = resolveReplayToolCallName(rawName, toolCallId, allowedToolNames);
if (!resolvedName || replayBlock.name !== resolvedName) {
return false;
}
@@ -337,6 +338,7 @@ function sanitizeReplayToolCallInputs(
let changed = false;
let droppedAssistantMessages = 0;
const out: AgentMessage[] = [];
const claimedReplaySafeToolCallIds = new Set<string>();
for (const message of messages) {
if (!message || typeof message !== "object" || message.role !== "assistant") {
@@ -352,7 +354,16 @@ function sanitizeReplayToolCallInputs(
message.content.some((block) => isThinkingLikeReplayBlock(block)) &&
message.content.some((block) => isReplayToolCallBlock(block))
) {
if (isReplaySafeThinkingTurn(message.content, allowedToolNames)) {
const replaySafeToolCalls = extractToolCallsFromAssistant(
message as Extract<AgentMessage, { role: "assistant" }>,
);
if (
isReplaySafeThinkingTurn(message.content, allowedToolNames) &&
replaySafeToolCalls.every((toolCall) => !claimedReplaySafeToolCallIds.has(toolCall.id))
) {
for (const toolCall of replaySafeToolCalls) {
claimedReplaySafeToolCallIds.add(toolCall.id);
}
out.push(message);
} else {
changed = true;

View File

@@ -435,6 +435,40 @@ describe("sanitizeToolCallInputs", () => {
expect(out).toEqual([]);
});
it("drops later signed-thinking assistant turns that reuse an earlier signed tool id", () => {
const input = castAgentMessages([
{
role: "assistant",
content: [
{
type: "thinking",
thinking: "First signed replay turn.",
thinkingSignature: "sig_first",
},
{ type: "toolCall", id: "call_shared", name: "read", arguments: { path: "a" } },
],
},
{
role: "assistant",
content: [
{
type: "thinking",
thinking: "Second signed replay turn.",
thinkingSignature: "sig_second",
},
{ type: "toolUse", id: "call_shared", name: "read", input: { path: "b" } },
],
},
]);
const out = sanitizeToolCallInputs(input, {
allowedToolNames: ["read"],
allowProviderOwnedThinkingReplay: true,
});
expect(out).toEqual([input[0]]);
});
it("drops signed-thinking assistant turns that would require attachment redaction", () => {
const secret = "SIGNED_THINKING_ATTACHMENT_SECRET"; // pragma: allowlist secret
const input = castAgentMessages([

View File

@@ -316,6 +316,7 @@ export function repairToolCallInputs(
const out: AgentMessage[] = [];
const allowedToolNames = normalizeAllowedToolNames(options?.allowedToolNames);
const allowProviderOwnedThinkingReplay = options?.allowProviderOwnedThinkingReplay === true;
const claimedReplaySafeToolCallIds = new Set<string>();
for (const msg of messages) {
if (!msg || typeof msg !== "object") {
@@ -337,7 +338,14 @@ export function repairToolCallInputs(
// replay. Preserve the turn only if every sibling tool call is already
// valid and requires no redaction or normalization. Otherwise drop the
// whole assistant turn rather than mutating provider-owned content.
if (isReplaySafeThinkingAssistantTurn(msg.content, allowedToolNames)) {
const replaySafeToolCalls = extractToolCallsFromAssistant(msg);
if (
isReplaySafeThinkingAssistantTurn(msg.content, allowedToolNames) &&
replaySafeToolCalls.every((toolCall) => !claimedReplaySafeToolCallIds.has(toolCall.id))
) {
for (const toolCall of replaySafeToolCalls) {
claimedReplaySafeToolCallIds.add(toolCall.id);
}
out.push(msg);
} else {
droppedToolCalls += countRawToolCallBlocks(msg.content);