fix: preserve replay-safe signed tool ids

This commit is contained in:
Shakker
2026-04-12 06:10:06 +01:00
committed by Shakker
parent 1e35eed277
commit eed627d3f2
5 changed files with 297 additions and 12 deletions

View File

@@ -1038,6 +1038,56 @@ describe("sanitizeSessionHistory", () => {
});
});
it.each([
{
provider: "anthropic",
modelApi: "anthropic-messages",
label: "anthropic",
},
{
provider: "amazon-bedrock",
modelApi: "bedrock-converse-stream",
label: "bedrock",
},
])("preserves replay-safe signed tool ids for $label history", async ({ provider, modelApi }) => {
setNonGoogleModelApi();
const messages = castAgentMessages([
makeUserMessage("retry"),
makeAssistantMessage([
{
type: "thinking",
thinking: "internal",
thinkingSignature: "sig_1",
},
{ type: "toolCall", id: "call_1", name: "read", arguments: {} },
] as unknown as AssistantMessage["content"]),
castAgentMessage({
role: "toolResult",
toolCallId: "call_1",
toolName: "read",
content: [{ type: "text", text: "ok" }],
isError: false,
}),
]);
const result = await sanitizeAnthropicHistory({
provider,
modelApi,
messages,
});
expect((result[1] as Extract<AgentMessage, { role: "assistant" }>).content).toEqual([
{
type: "thinking",
thinking: "internal",
thinkingSignature: "sig_1",
},
{ type: "toolCall", id: "call_1", name: "read", arguments: {} },
]);
expect((result[2] as Extract<AgentMessage, { role: "toolResult" }>).toolCallId).toBe("call_1");
});
it("keeps mutable thinking turns outside exact anthropic replay", async () => {
setNonGoogleModelApi();

View File

@@ -33,6 +33,7 @@ import {
resolveTranscriptPolicy,
shouldAllowProviderOwnedThinkingReplay,
} from "../transcript-policy.js";
import { sanitizeToolCallIdsForCloudCodeAssist } from "../tool-call-id.js";
import {
makeZeroUsageSnapshot,
normalizeUsage,
@@ -403,12 +404,16 @@ export async function sanitizeSessionHistory(params: {
model: params.model,
});
const withInterSessionMarkers = annotateInterSessionUserMessages(params.messages);
const allowProviderOwnedThinkingReplay = shouldAllowProviderOwnedThinkingReplay({
modelApi: params.modelApi,
policy,
});
const sanitizedImages = await sanitizeSessionMessagesImages(
withInterSessionMarkers,
"session:history",
{
sanitizeMode: policy.sanitizeMode,
sanitizeToolCallIds: policy.sanitizeToolCallIds,
sanitizeToolCallIds: policy.sanitizeToolCallIds && !allowProviderOwnedThinkingReplay,
toolCallIdMode: policy.toolCallIdMode,
preserveNativeAnthropicToolUseIds: policy.preserveNativeAnthropicToolUseIds,
preserveSignatures: policy.preserveSignatures,
@@ -421,16 +426,21 @@ export async function sanitizeSessionHistory(params: {
: sanitizedImages;
const sanitizedToolCalls = sanitizeToolCallInputs(droppedThinking, {
allowedToolNames: params.allowedToolNames,
allowProviderOwnedThinkingReplay: shouldAllowProviderOwnedThinkingReplay({
modelApi: params.modelApi,
policy,
}),
allowProviderOwnedThinkingReplay,
});
const sanitizedToolIds =
policy.sanitizeToolCallIds && policy.toolCallIdMode
? sanitizeToolCallIdsForCloudCodeAssist(sanitizedToolCalls, policy.toolCallIdMode, {
preserveNativeAnthropicToolUseIds: policy.preserveNativeAnthropicToolUseIds,
preserveReplaySafeThinkingToolCallIds: allowProviderOwnedThinkingReplay,
allowedToolNames: params.allowedToolNames,
})
: sanitizedToolCalls;
const repairedTools = policy.repairToolUseResultPairing
? sanitizeToolUseResultPairing(sanitizedToolCalls, {
? sanitizeToolUseResultPairing(sanitizedToolIds, {
erroredAssistantResultPolicy: "drop",
})
: sanitizedToolCalls;
: sanitizedToolIds;
const sanitizedToolResults = stripToolResultDetails(repairedTools);
const sanitizedCompactionUsage = ensureAssistantUsageSnapshots(
stripStaleAssistantUsageBeforeLatestCompaction(sanitizedToolResults),

View File

@@ -111,7 +111,10 @@ import { buildSystemPromptParams } from "../../system-prompt-params.js";
import { buildSystemPromptReport } from "../../system-prompt-report.js";
import { resolveAgentTimeoutMs } from "../../timeout.js";
import { sanitizeToolCallIdsForCloudCodeAssist } from "../../tool-call-id.js";
import { resolveTranscriptPolicy } from "../../transcript-policy.js";
import {
resolveTranscriptPolicy,
shouldAllowProviderOwnedThinkingReplay,
} from "../../transcript-policy.js";
import { normalizeUsage, type NormalizedUsage } from "../../usage.js";
import { DEFAULT_BOOTSTRAP_FILENAME } from "../../workspace.js";
import { isRunnerAbortError } from "../abort.js";
@@ -1156,11 +1159,17 @@ export async function runEmbeddedAttempt(
if (!Array.isArray(messages)) {
return inner(model, context, options);
}
const allowProviderOwnedThinkingReplay = shouldAllowProviderOwnedThinkingReplay({
modelApi: (model as { api?: unknown })?.api as string | null | undefined,
policy: transcriptPolicy,
});
const sanitized = sanitizeToolCallIdsForCloudCodeAssist(
messages as AgentMessage[],
mode,
{
preserveNativeAnthropicToolUseIds: transcriptPolicy.preserveNativeAnthropicToolUseIds,
preserveReplaySafeThinkingToolCallIds: allowProviderOwnedThinkingReplay,
allowedToolNames,
},
);
if (sanitized === messages) {

View File

@@ -293,6 +293,35 @@ describe("sanitizeToolCallIdsForCloudCodeAssist", () => {
).toBe("call123fc123");
});
it("preserves replay-safe signed-thinking tool ids when requested", () => {
const input = castAgentMessages([
{
role: "assistant",
content: [
{ type: "thinking", thinking: "internal", thinkingSignature: "sig_1" },
{ type: "toolCall", id: "call_1", name: "read", arguments: {} },
],
},
{
role: "toolResult",
toolCallId: "call_1",
toolName: "read",
content: [{ type: "text", text: "ok" }],
},
]);
const out = sanitizeToolCallIdsForCloudCodeAssist(input, "strict", {
preserveReplaySafeThinkingToolCallIds: true,
allowedToolNames: ["read"],
});
expect(out).toBe(input);
expect(((out[0] as Extract<AgentMessage, { role: "assistant" }>).content?.[1] as { id?: string }).id).toBe(
"call_1",
);
expect((out[1] as Extract<AgentMessage, { role: "toolResult" }>).toolCallId).toBe("call_1");
});
it("avoids collisions with alphanumeric-only suffixes", () => {
const input = buildDuplicateIdCollisionInput();

View File

@@ -1,8 +1,13 @@
import { createHash } from "node:crypto";
import type { AgentMessage } from "@mariozechner/pi-agent-core";
import { normalizeLowercaseStringOrEmpty } from "../shared/string-coerce.js";
export type ToolCallIdMode = "strict" | "strict9";
const NATIVE_ANTHROPIC_TOOL_USE_ID_RE = /^toolu_[A-Za-z0-9_]+$/;
const REDACTED_SESSIONS_SPAWN_ATTACHMENT_CONTENT = "__OPENCLAW_REDACTED__";
const SESSIONS_SPAWN_ATTACHMENT_METADATA_KEYS = ["name", "encoding", "mimeType"] as const;
const TOOL_CALL_NAME_MAX_CHARS = 64;
const TOOL_CALL_NAME_RE = /^[A-Za-z0-9_:.-]+$/;
const STRICT9_LEN = 9;
const TOOL_CALL_TYPES = new Set(["toolCall", "toolUse", "functionCall"]);
@@ -12,6 +17,14 @@ export type ToolCallLike = {
name?: string;
};
type ReplaySafeToolCallBlock = {
type?: unknown;
id?: unknown;
name?: unknown;
input?: unknown;
arguments?: unknown;
};
/**
* Sanitize a tool call ID to be compatible with various providers.
*
@@ -83,6 +96,150 @@ export function extractToolResultId(
return null;
}
function isThinkingLikeBlock(block: unknown): boolean {
if (!block || typeof block !== "object") {
return false;
}
const type = (block as { type?: unknown }).type;
return type === "thinking" || type === "redacted_thinking";
}
function hasToolCallInput(block: ReplaySafeToolCallBlock): boolean {
const hasInput = "input" in block ? block.input !== undefined && block.input !== null : false;
const hasArguments =
"arguments" in block ? block.arguments !== undefined && block.arguments !== null : false;
return hasInput || hasArguments;
}
function hasNonEmptyStringField(value: unknown): value is string {
return typeof value === "string" && value.trim().length > 0;
}
function normalizeAllowedToolNames(allowedToolNames?: Iterable<string>): Set<string> | null {
if (!allowedToolNames) {
return null;
}
const normalized = new Set<string>();
for (const name of allowedToolNames) {
if (typeof name !== "string") {
continue;
}
const trimmed = name.trim();
if (!trimmed) {
continue;
}
normalized.add(normalizeLowercaseStringOrEmpty(trimmed));
}
return normalized.size > 0 ? normalized : null;
}
function isRedactedSessionsSpawnAttachment(item: unknown): boolean {
if (!item || typeof item !== "object") {
return false;
}
const attachment = item as Record<string, unknown>;
if (attachment.content !== REDACTED_SESSIONS_SPAWN_ATTACHMENT_CONTENT) {
return false;
}
for (const key of Object.keys(attachment)) {
if (key === "content") {
continue;
}
if (!(SESSIONS_SPAWN_ATTACHMENT_METADATA_KEYS as readonly string[]).includes(key)) {
return false;
}
if (typeof attachment[key] !== "string" || (attachment[key] as string).trim().length === 0) {
return false;
}
}
return true;
}
function toolCallNeedsReplayMutation(block: ReplaySafeToolCallBlock): boolean {
const rawName = typeof block.name === "string" ? block.name : undefined;
const trimmedName = rawName?.trim();
if (rawName && rawName !== trimmedName) {
return true;
}
if (normalizeLowercaseStringOrEmpty(trimmedName) !== "sessions_spawn") {
return false;
}
for (const payload of [block.arguments, block.input]) {
if (!payload || typeof payload !== "object") {
continue;
}
const attachments = (payload as { attachments?: unknown }).attachments;
if (!Array.isArray(attachments)) {
continue;
}
for (const attachment of attachments) {
if (!isRedactedSessionsSpawnAttachment(attachment)) {
return true;
}
}
}
return false;
}
function hasReplaySafeToolCallName(
block: ReplaySafeToolCallBlock,
allowedToolNames: Set<string> | null,
): boolean {
if (typeof block.name !== "string") {
return false;
}
const trimmed = block.name.trim();
if (!trimmed) {
return false;
}
if (trimmed.length > TOOL_CALL_NAME_MAX_CHARS || !TOOL_CALL_NAME_RE.test(trimmed)) {
return false;
}
if (!allowedToolNames) {
return true;
}
return allowedToolNames.has(normalizeLowercaseStringOrEmpty(trimmed));
}
function isReplaySafeThinkingAssistantMessage(
message: Extract<AgentMessage, { role: "assistant" }>,
allowedToolNames: Set<string> | null,
): boolean {
const content = message.content;
if (!Array.isArray(content)) {
return false;
}
let sawThinking = false;
let sawToolCall = false;
for (const block of content) {
if (isThinkingLikeBlock(block)) {
sawThinking = true;
continue;
}
if (!block || typeof block !== "object") {
continue;
}
const typedBlock = block as ReplaySafeToolCallBlock;
if (
typeof typedBlock.type !== "string" ||
!TOOL_CALL_TYPES.has(typedBlock.type)
) {
continue;
}
sawToolCall = true;
if (
!hasToolCallInput(typedBlock) ||
!hasNonEmptyStringField(typedBlock.id) ||
!hasReplaySafeToolCallName(typedBlock, allowedToolNames) ||
toolCallNeedsReplayMutation(typedBlock)
) {
return false;
}
}
return sawThinking && sawToolCall;
}
export function isValidCloudCodeAssistToolId(id: string, mode: ToolCallIdMode = "strict"): boolean {
if (!id || typeof id !== "string") {
return false;
@@ -155,6 +312,7 @@ function createOccurrenceAwareResolver(
): {
resolveAssistantId: (id: string) => string;
resolveToolResultId: (id: string) => string;
preserveAssistantId: (id: string) => string;
} {
const used = new Set<string>();
const assistantOccurrences = new Map<string, number>();
@@ -218,7 +376,18 @@ function createOccurrenceAwareResolver(
return allocate(`${id}:tool_result:${occurrence}`);
};
return { resolveAssistantId, resolveToolResultId };
const preserveAssistantId = (id: string): string => {
used.add(id);
const pending = pendingByRawId.get(id);
if (pending) {
pending.push(id);
} else {
pendingByRawId.set(id, [id]);
}
return id;
};
return { resolveAssistantId, resolveToolResultId, preserveAssistantId };
}
function rewriteAssistantToolCallIds(params: {
@@ -298,7 +467,11 @@ function rewriteToolResultIds(params: {
export function sanitizeToolCallIdsForCloudCodeAssist(
messages: AgentMessage[],
mode: ToolCallIdMode = "strict",
options?: { preserveNativeAnthropicToolUseIds?: boolean },
options?: {
preserveNativeAnthropicToolUseIds?: boolean;
preserveReplaySafeThinkingToolCallIds?: boolean;
allowedToolNames?: Iterable<string>;
},
): AgentMessage[] {
// Strict mode: only [a-zA-Z0-9]
// Strict9 mode: only [a-zA-Z0-9], length 9 (Mistral tool call requirement)
@@ -306,7 +479,11 @@ export function sanitizeToolCallIdsForCloudCodeAssist(
// duplicate tool-call IDs. Track assistant occurrences in-order so repeated
// raw IDs receive distinct rewritten IDs, while matching tool results consume
// the same rewritten IDs in encounter order.
const { resolveAssistantId, resolveToolResultId } = createOccurrenceAwareResolver(mode, options);
const { resolveAssistantId, resolveToolResultId, preserveAssistantId } =
createOccurrenceAwareResolver(mode, options);
const allowedToolNames = normalizeAllowedToolNames(options?.allowedToolNames);
const preserveReplaySafeThinkingToolCallIds =
options?.preserveReplaySafeThinkingToolCallIds === true;
let changed = false;
const out = messages.map((msg) => {
@@ -315,8 +492,18 @@ export function sanitizeToolCallIdsForCloudCodeAssist(
}
const role = (msg as { role?: unknown }).role;
if (role === "assistant") {
const assistant = msg as Extract<AgentMessage, { role: "assistant" }>;
if (
preserveReplaySafeThinkingToolCallIds &&
isReplaySafeThinkingAssistantMessage(assistant, allowedToolNames)
) {
for (const toolCall of extractToolCallsFromAssistant(assistant)) {
preserveAssistantId(toolCall.id);
}
return msg;
}
const next = rewriteAssistantToolCallIds({
message: msg as Extract<AgentMessage, { role: "assistant" }>,
message: assistant,
resolveId: resolveAssistantId,
});
if (next !== msg) {