From a383e09f527324614a7e6cac611a44fe415a7c53 Mon Sep 17 00:00:00 2001 From: Shakker Date: Sun, 12 Apr 2026 03:43:30 +0100 Subject: [PATCH] fix: drop unsafe signed-thinking turns during replay normalization --- .../pi-embedded-runner/run/attempt.test.ts | 57 ++++++++++++++++- .../run/attempt.tool-call-normalization.ts | 61 ++++++++++++++++++- 2 files changed, 115 insertions(+), 3 deletions(-) diff --git a/src/agents/pi-embedded-runner/run/attempt.test.ts b/src/agents/pi-embedded-runner/run/attempt.test.ts index 9d073728912..175744b226c 100644 --- a/src/agents/pi-embedded-runner/run/attempt.test.ts +++ b/src/agents/pi-embedded-runner/run/attempt.test.ts @@ -946,7 +946,7 @@ describe("wrapStreamFnSanitizeMalformedToolCalls", () => { expect(seenContext.messages).toBe(messages); }); - it("preserves signed thinking turns when replayed tool calls would otherwise be sanitized", async () => { + it("drops signed thinking turns when sibling replay tool calls are not allowlisted", async () => { const messages = [ { role: "assistant", @@ -974,7 +974,60 @@ describe("wrapStreamFnSanitizeMalformedToolCalls", () => { expect(baseFn).toHaveBeenCalledTimes(1); const seenContext = baseFn.mock.calls[0]?.[1] as { messages: unknown[] }; - expect(seenContext.messages).toBe(messages); + expect(seenContext.messages).toEqual([ + { + role: "user", + content: [{ type: "text", text: "retry" }], + }, + ]); + }); + + it("drops signed thinking turns when replay would expose inline sessions_spawn attachments", async () => { + const attachmentContent = "SIGNED_THINKING_INLINE_ATTACHMENT"; + const messages = [ + { + role: "assistant", + content: [ + { type: "thinking", thinking: "internal", thinkingSignature: "sig_1" }, + { + type: "toolUse", + id: "call_1", + name: "sessions_spawn", + input: { + task: "inspect attachment", + attachments: [{ name: "snapshot.txt", content: attachmentContent }], + }, + }, + ], + }, + { + role: "user", + content: [{ type: "text", text: "retry" }], + }, + ]; + const baseFn = vi.fn((_model, _context) => + createFakeStream({ events: [], resultMessage: { role: "assistant", content: [] } }), + ); + + const wrapped = wrapStreamFnSanitizeMalformedToolCalls( + baseFn as never, + new Set(["sessions_spawn"]), + ); + const stream = wrapped( + { api: "anthropic-messages" } as never, + { messages } as never, + {} as never, + ) as FakeWrappedStream | Promise; + await Promise.resolve(stream); + + expect(baseFn).toHaveBeenCalledTimes(1); + const seenContext = baseFn.mock.calls[0]?.[1] as { messages: unknown[] }; + expect(seenContext.messages).toEqual([ + { + role: "user", + content: [{ type: "text", text: "retry" }], + }, + ]); }); it("preserves sessions_spawn attachment payloads on replay", async () => { diff --git a/src/agents/pi-embedded-runner/run/attempt.tool-call-normalization.ts b/src/agents/pi-embedded-runner/run/attempt.tool-call-normalization.ts index 00e1cc9f6ef..27d8f7c2649 100644 --- a/src/agents/pi-embedded-runner/run/attempt.tool-call-normalization.ts +++ b/src/agents/pi-embedded-runner/run/attempt.tool-call-normalization.ts @@ -237,6 +237,60 @@ function isThinkingLikeReplayBlock(block: unknown): boolean { return type === "thinking" || type === "redacted_thinking"; } +function hasUnredactedSessionsSpawnAttachments(block: ReplayToolCallBlock): boolean { + const rawName = typeof block.name === "string" ? block.name.trim() : ""; + if (normalizeLowercaseStringOrEmpty(rawName) !== "sessions_spawn") { + return false; + } + for (const payload of [block.arguments, block.input]) { + if (!payload || typeof payload !== "object") { + continue; + } + const attachments = (payload as { attachments?: unknown }).attachments; + if (!Array.isArray(attachments)) { + continue; + } + for (const attachment of attachments) { + if (!attachment || typeof attachment !== "object") { + continue; + } + if (!Object.hasOwn(attachment, "content")) { + continue; + } + const content = (attachment as { content?: unknown }).content; + if (content !== "__OPENCLAW_REDACTED__") { + return true; + } + } + } + return false; +} + +function isReplaySafeThinkingTurn( + content: unknown[], + allowedToolNames?: Set, +): boolean { + for (const block of content) { + if (!isReplayToolCallBlock(block)) { + continue; + } + const replayBlock = block as ReplayToolCallBlock; + if ( + !replayToolCallHasInput(replayBlock) || + !replayToolCallNonEmptyString(replayBlock.id) || + hasUnredactedSessionsSpawnAttachments(replayBlock) + ) { + return false; + } + const rawName = typeof replayBlock.name === "string" ? replayBlock.name : ""; + const resolvedName = resolveReplayToolCallName(rawName, replayBlock.id, allowedToolNames); + if (!resolvedName || replayBlock.name !== resolvedName) { + return false; + } + } + return true; +} + function isReplayToolCallBlock(block: unknown): block is ReplayToolCallBlock { if (!block || typeof block !== "object") { return false; @@ -292,7 +346,12 @@ function sanitizeReplayToolCallInputs( continue; } if (message.content.some((block) => isThinkingLikeReplayBlock(block))) { - out.push(message); + if (isReplaySafeThinkingTurn(message.content, allowedToolNames)) { + out.push(message); + } else { + changed = true; + droppedAssistantMessages += 1; + } continue; }