From eed627d3f2918b5e960290d2bf20aa9caeae46aa Mon Sep 17 00:00:00 2001 From: Shakker Date: Sun, 12 Apr 2026 06:10:06 +0100 Subject: [PATCH] fix: preserve replay-safe signed tool ids --- ...ed-runner.sanitize-session-history.test.ts | 50 +++++ .../pi-embedded-runner/replay-history.ts | 24 ++- src/agents/pi-embedded-runner/run/attempt.ts | 11 +- src/agents/tool-call-id.test.ts | 29 +++ src/agents/tool-call-id.ts | 195 +++++++++++++++++- 5 files changed, 297 insertions(+), 12 deletions(-) diff --git a/src/agents/pi-embedded-runner.sanitize-session-history.test.ts b/src/agents/pi-embedded-runner.sanitize-session-history.test.ts index fb5625d84b2..a0b47d63bda 100644 --- a/src/agents/pi-embedded-runner.sanitize-session-history.test.ts +++ b/src/agents/pi-embedded-runner.sanitize-session-history.test.ts @@ -1038,6 +1038,56 @@ describe("sanitizeSessionHistory", () => { }); }); + it.each([ + { + provider: "anthropic", + modelApi: "anthropic-messages", + label: "anthropic", + }, + { + provider: "amazon-bedrock", + modelApi: "bedrock-converse-stream", + label: "bedrock", + }, + ])("preserves replay-safe signed tool ids for $label history", async ({ provider, modelApi }) => { + setNonGoogleModelApi(); + + const messages = castAgentMessages([ + makeUserMessage("retry"), + makeAssistantMessage([ + { + type: "thinking", + thinking: "internal", + thinkingSignature: "sig_1", + }, + { type: "toolCall", id: "call_1", name: "read", arguments: {} }, + ] as unknown as AssistantMessage["content"]), + castAgentMessage({ + role: "toolResult", + toolCallId: "call_1", + toolName: "read", + content: [{ type: "text", text: "ok" }], + isError: false, + }), + ]); + + const result = await sanitizeAnthropicHistory({ + provider, + modelApi, + messages, + }); + + expect((result[1] as Extract).content).toEqual([ + { + type: "thinking", + thinking: "internal", + thinkingSignature: "sig_1", + }, + { type: "toolCall", id: "call_1", name: "read", arguments: {} }, + ]); + expect((result[2] as Extract).toolCallId).toBe("call_1"); + }); + it("keeps mutable thinking turns outside exact anthropic replay", async () => { setNonGoogleModelApi(); diff --git a/src/agents/pi-embedded-runner/replay-history.ts b/src/agents/pi-embedded-runner/replay-history.ts index 11f7ed02f75..c4cab3d9ca6 100644 --- a/src/agents/pi-embedded-runner/replay-history.ts +++ b/src/agents/pi-embedded-runner/replay-history.ts @@ -33,6 +33,7 @@ import { resolveTranscriptPolicy, shouldAllowProviderOwnedThinkingReplay, } from "../transcript-policy.js"; +import { sanitizeToolCallIdsForCloudCodeAssist } from "../tool-call-id.js"; import { makeZeroUsageSnapshot, normalizeUsage, @@ -403,12 +404,16 @@ export async function sanitizeSessionHistory(params: { model: params.model, }); const withInterSessionMarkers = annotateInterSessionUserMessages(params.messages); + const allowProviderOwnedThinkingReplay = shouldAllowProviderOwnedThinkingReplay({ + modelApi: params.modelApi, + policy, + }); const sanitizedImages = await sanitizeSessionMessagesImages( withInterSessionMarkers, "session:history", { sanitizeMode: policy.sanitizeMode, - sanitizeToolCallIds: policy.sanitizeToolCallIds, + sanitizeToolCallIds: policy.sanitizeToolCallIds && !allowProviderOwnedThinkingReplay, toolCallIdMode: policy.toolCallIdMode, preserveNativeAnthropicToolUseIds: policy.preserveNativeAnthropicToolUseIds, preserveSignatures: policy.preserveSignatures, @@ -421,16 +426,21 @@ export async function sanitizeSessionHistory(params: { : sanitizedImages; const sanitizedToolCalls = sanitizeToolCallInputs(droppedThinking, { allowedToolNames: params.allowedToolNames, - allowProviderOwnedThinkingReplay: shouldAllowProviderOwnedThinkingReplay({ - modelApi: params.modelApi, - policy, - }), + allowProviderOwnedThinkingReplay, }); + const sanitizedToolIds = + policy.sanitizeToolCallIds && policy.toolCallIdMode + ? sanitizeToolCallIdsForCloudCodeAssist(sanitizedToolCalls, policy.toolCallIdMode, { + preserveNativeAnthropicToolUseIds: policy.preserveNativeAnthropicToolUseIds, + preserveReplaySafeThinkingToolCallIds: allowProviderOwnedThinkingReplay, + allowedToolNames: params.allowedToolNames, + }) + : sanitizedToolCalls; const repairedTools = policy.repairToolUseResultPairing - ? sanitizeToolUseResultPairing(sanitizedToolCalls, { + ? sanitizeToolUseResultPairing(sanitizedToolIds, { erroredAssistantResultPolicy: "drop", }) - : sanitizedToolCalls; + : sanitizedToolIds; const sanitizedToolResults = stripToolResultDetails(repairedTools); const sanitizedCompactionUsage = ensureAssistantUsageSnapshots( stripStaleAssistantUsageBeforeLatestCompaction(sanitizedToolResults), diff --git a/src/agents/pi-embedded-runner/run/attempt.ts b/src/agents/pi-embedded-runner/run/attempt.ts index cca25d098c4..25fb55c3e50 100644 --- a/src/agents/pi-embedded-runner/run/attempt.ts +++ b/src/agents/pi-embedded-runner/run/attempt.ts @@ -111,7 +111,10 @@ import { buildSystemPromptParams } from "../../system-prompt-params.js"; import { buildSystemPromptReport } from "../../system-prompt-report.js"; import { resolveAgentTimeoutMs } from "../../timeout.js"; import { sanitizeToolCallIdsForCloudCodeAssist } from "../../tool-call-id.js"; -import { resolveTranscriptPolicy } from "../../transcript-policy.js"; +import { + resolveTranscriptPolicy, + shouldAllowProviderOwnedThinkingReplay, +} from "../../transcript-policy.js"; import { normalizeUsage, type NormalizedUsage } from "../../usage.js"; import { DEFAULT_BOOTSTRAP_FILENAME } from "../../workspace.js"; import { isRunnerAbortError } from "../abort.js"; @@ -1156,11 +1159,17 @@ export async function runEmbeddedAttempt( if (!Array.isArray(messages)) { return inner(model, context, options); } + const allowProviderOwnedThinkingReplay = shouldAllowProviderOwnedThinkingReplay({ + modelApi: (model as { api?: unknown })?.api as string | null | undefined, + policy: transcriptPolicy, + }); const sanitized = sanitizeToolCallIdsForCloudCodeAssist( messages as AgentMessage[], mode, { preserveNativeAnthropicToolUseIds: transcriptPolicy.preserveNativeAnthropicToolUseIds, + preserveReplaySafeThinkingToolCallIds: allowProviderOwnedThinkingReplay, + allowedToolNames, }, ); if (sanitized === messages) { diff --git a/src/agents/tool-call-id.test.ts b/src/agents/tool-call-id.test.ts index 80a5cba5889..749cf6061ad 100644 --- a/src/agents/tool-call-id.test.ts +++ b/src/agents/tool-call-id.test.ts @@ -293,6 +293,35 @@ describe("sanitizeToolCallIdsForCloudCodeAssist", () => { ).toBe("call123fc123"); }); + it("preserves replay-safe signed-thinking tool ids when requested", () => { + const input = castAgentMessages([ + { + role: "assistant", + content: [ + { type: "thinking", thinking: "internal", thinkingSignature: "sig_1" }, + { type: "toolCall", id: "call_1", name: "read", arguments: {} }, + ], + }, + { + role: "toolResult", + toolCallId: "call_1", + toolName: "read", + content: [{ type: "text", text: "ok" }], + }, + ]); + + const out = sanitizeToolCallIdsForCloudCodeAssist(input, "strict", { + preserveReplaySafeThinkingToolCallIds: true, + allowedToolNames: ["read"], + }); + + expect(out).toBe(input); + expect(((out[0] as Extract).content?.[1] as { id?: string }).id).toBe( + "call_1", + ); + expect((out[1] as Extract).toolCallId).toBe("call_1"); + }); + it("avoids collisions with alphanumeric-only suffixes", () => { const input = buildDuplicateIdCollisionInput(); diff --git a/src/agents/tool-call-id.ts b/src/agents/tool-call-id.ts index 8cfabc622db..071008968cc 100644 --- a/src/agents/tool-call-id.ts +++ b/src/agents/tool-call-id.ts @@ -1,8 +1,13 @@ import { createHash } from "node:crypto"; import type { AgentMessage } from "@mariozechner/pi-agent-core"; +import { normalizeLowercaseStringOrEmpty } from "../shared/string-coerce.js"; export type ToolCallIdMode = "strict" | "strict9"; const NATIVE_ANTHROPIC_TOOL_USE_ID_RE = /^toolu_[A-Za-z0-9_]+$/; +const REDACTED_SESSIONS_SPAWN_ATTACHMENT_CONTENT = "__OPENCLAW_REDACTED__"; +const SESSIONS_SPAWN_ATTACHMENT_METADATA_KEYS = ["name", "encoding", "mimeType"] as const; +const TOOL_CALL_NAME_MAX_CHARS = 64; +const TOOL_CALL_NAME_RE = /^[A-Za-z0-9_:.-]+$/; const STRICT9_LEN = 9; const TOOL_CALL_TYPES = new Set(["toolCall", "toolUse", "functionCall"]); @@ -12,6 +17,14 @@ export type ToolCallLike = { name?: string; }; +type ReplaySafeToolCallBlock = { + type?: unknown; + id?: unknown; + name?: unknown; + input?: unknown; + arguments?: unknown; +}; + /** * Sanitize a tool call ID to be compatible with various providers. * @@ -83,6 +96,150 @@ export function extractToolResultId( return null; } +function isThinkingLikeBlock(block: unknown): boolean { + if (!block || typeof block !== "object") { + return false; + } + const type = (block as { type?: unknown }).type; + return type === "thinking" || type === "redacted_thinking"; +} + +function hasToolCallInput(block: ReplaySafeToolCallBlock): boolean { + const hasInput = "input" in block ? block.input !== undefined && block.input !== null : false; + const hasArguments = + "arguments" in block ? block.arguments !== undefined && block.arguments !== null : false; + return hasInput || hasArguments; +} + +function hasNonEmptyStringField(value: unknown): value is string { + return typeof value === "string" && value.trim().length > 0; +} + +function normalizeAllowedToolNames(allowedToolNames?: Iterable): Set | null { + if (!allowedToolNames) { + return null; + } + const normalized = new Set(); + for (const name of allowedToolNames) { + if (typeof name !== "string") { + continue; + } + const trimmed = name.trim(); + if (!trimmed) { + continue; + } + normalized.add(normalizeLowercaseStringOrEmpty(trimmed)); + } + return normalized.size > 0 ? normalized : null; +} + +function isRedactedSessionsSpawnAttachment(item: unknown): boolean { + if (!item || typeof item !== "object") { + return false; + } + const attachment = item as Record; + if (attachment.content !== REDACTED_SESSIONS_SPAWN_ATTACHMENT_CONTENT) { + return false; + } + for (const key of Object.keys(attachment)) { + if (key === "content") { + continue; + } + if (!(SESSIONS_SPAWN_ATTACHMENT_METADATA_KEYS as readonly string[]).includes(key)) { + return false; + } + if (typeof attachment[key] !== "string" || (attachment[key] as string).trim().length === 0) { + return false; + } + } + return true; +} + +function toolCallNeedsReplayMutation(block: ReplaySafeToolCallBlock): boolean { + const rawName = typeof block.name === "string" ? block.name : undefined; + const trimmedName = rawName?.trim(); + if (rawName && rawName !== trimmedName) { + return true; + } + if (normalizeLowercaseStringOrEmpty(trimmedName) !== "sessions_spawn") { + return false; + } + for (const payload of [block.arguments, block.input]) { + if (!payload || typeof payload !== "object") { + continue; + } + const attachments = (payload as { attachments?: unknown }).attachments; + if (!Array.isArray(attachments)) { + continue; + } + for (const attachment of attachments) { + if (!isRedactedSessionsSpawnAttachment(attachment)) { + return true; + } + } + } + return false; +} + +function hasReplaySafeToolCallName( + block: ReplaySafeToolCallBlock, + allowedToolNames: Set | null, +): boolean { + if (typeof block.name !== "string") { + return false; + } + const trimmed = block.name.trim(); + if (!trimmed) { + return false; + } + if (trimmed.length > TOOL_CALL_NAME_MAX_CHARS || !TOOL_CALL_NAME_RE.test(trimmed)) { + return false; + } + if (!allowedToolNames) { + return true; + } + return allowedToolNames.has(normalizeLowercaseStringOrEmpty(trimmed)); +} + +function isReplaySafeThinkingAssistantMessage( + message: Extract, + allowedToolNames: Set | null, +): boolean { + const content = message.content; + if (!Array.isArray(content)) { + return false; + } + + let sawThinking = false; + let sawToolCall = false; + for (const block of content) { + if (isThinkingLikeBlock(block)) { + sawThinking = true; + continue; + } + if (!block || typeof block !== "object") { + continue; + } + const typedBlock = block as ReplaySafeToolCallBlock; + if ( + typeof typedBlock.type !== "string" || + !TOOL_CALL_TYPES.has(typedBlock.type) + ) { + continue; + } + sawToolCall = true; + if ( + !hasToolCallInput(typedBlock) || + !hasNonEmptyStringField(typedBlock.id) || + !hasReplaySafeToolCallName(typedBlock, allowedToolNames) || + toolCallNeedsReplayMutation(typedBlock) + ) { + return false; + } + } + return sawThinking && sawToolCall; +} + export function isValidCloudCodeAssistToolId(id: string, mode: ToolCallIdMode = "strict"): boolean { if (!id || typeof id !== "string") { return false; @@ -155,6 +312,7 @@ function createOccurrenceAwareResolver( ): { resolveAssistantId: (id: string) => string; resolveToolResultId: (id: string) => string; + preserveAssistantId: (id: string) => string; } { const used = new Set(); const assistantOccurrences = new Map(); @@ -218,7 +376,18 @@ function createOccurrenceAwareResolver( return allocate(`${id}:tool_result:${occurrence}`); }; - return { resolveAssistantId, resolveToolResultId }; + const preserveAssistantId = (id: string): string => { + used.add(id); + const pending = pendingByRawId.get(id); + if (pending) { + pending.push(id); + } else { + pendingByRawId.set(id, [id]); + } + return id; + }; + + return { resolveAssistantId, resolveToolResultId, preserveAssistantId }; } function rewriteAssistantToolCallIds(params: { @@ -298,7 +467,11 @@ function rewriteToolResultIds(params: { export function sanitizeToolCallIdsForCloudCodeAssist( messages: AgentMessage[], mode: ToolCallIdMode = "strict", - options?: { preserveNativeAnthropicToolUseIds?: boolean }, + options?: { + preserveNativeAnthropicToolUseIds?: boolean; + preserveReplaySafeThinkingToolCallIds?: boolean; + allowedToolNames?: Iterable; + }, ): AgentMessage[] { // Strict mode: only [a-zA-Z0-9] // Strict9 mode: only [a-zA-Z0-9], length 9 (Mistral tool call requirement) @@ -306,7 +479,11 @@ export function sanitizeToolCallIdsForCloudCodeAssist( // duplicate tool-call IDs. Track assistant occurrences in-order so repeated // raw IDs receive distinct rewritten IDs, while matching tool results consume // the same rewritten IDs in encounter order. - const { resolveAssistantId, resolveToolResultId } = createOccurrenceAwareResolver(mode, options); + const { resolveAssistantId, resolveToolResultId, preserveAssistantId } = + createOccurrenceAwareResolver(mode, options); + const allowedToolNames = normalizeAllowedToolNames(options?.allowedToolNames); + const preserveReplaySafeThinkingToolCallIds = + options?.preserveReplaySafeThinkingToolCallIds === true; let changed = false; const out = messages.map((msg) => { @@ -315,8 +492,18 @@ export function sanitizeToolCallIdsForCloudCodeAssist( } const role = (msg as { role?: unknown }).role; if (role === "assistant") { + const assistant = msg as Extract; + if ( + preserveReplaySafeThinkingToolCallIds && + isReplaySafeThinkingAssistantMessage(assistant, allowedToolNames) + ) { + for (const toolCall of extractToolCallsFromAssistant(assistant)) { + preserveAssistantId(toolCall.id); + } + return msg; + } const next = rewriteAssistantToolCallIds({ - message: msg as Extract, + message: assistant, resolveId: resolveAssistantId, }); if (next !== msg) {