diff --git a/CHANGELOG.md b/CHANGELOG.md index 98da9950a0f..0641c111846 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -96,6 +96,10 @@ Docs: https://docs.openclaw.ai - Agents/Bedrock: prevent empty assistant stream-error turns from poisoning Converse replay by persisting, repairing, and replaying a non-empty fallback block. Fixes #71572. (#71627) Thanks @openperf. +- Agents/Anthropic/Bedrock: strip thinking blocks with missing, empty, or blank + replay signatures before provider conversion, falling back to non-empty + omitted-reasoning text when needed so corrupted signed-thinking history no + longer poisons subsequent turns. Fixes #45010. (#70054) Thanks @castaples. - Agents/Anthropic/Bedrock: preserve stripped thinking-only assistant replay turns with non-empty omitted-reasoning text so provider adapters keep strict user/assistant turn shape. Thanks @wujiaming88. diff --git a/docs/reference/transcript-hygiene.md b/docs/reference/transcript-hygiene.md index d5b4c242eab..5185b51b6dc 100644 --- a/docs/reference/transcript-hygiene.md +++ b/docs/reference/transcript-hygiene.md @@ -23,6 +23,7 @@ Scope includes: - Tool result pairing repair - Turn validation / ordering - Thought signature cleanup +- Thinking signature cleanup - Image payload sanitization - User-input provenance tagging (for inter-session routed prompts) - Empty assistant error-turn repair for Bedrock Converse replay @@ -133,6 +134,9 @@ external end-user instructions. - Tool result pairing repair and synthetic tool results. - Turn validation (merge consecutive user turns to satisfy strict alternation). +- Thinking blocks with missing, empty, or blank replay signatures are stripped + before provider conversion. If that empties an assistant turn, OpenClaw keeps + turn shape with non-empty omitted-reasoning text. - Older thinking-only assistant turns that must be stripped are replaced with non-empty omitted-reasoning text so provider adapters do not drop the replay turn. @@ -143,6 +147,9 @@ external end-user instructions. before replay. Bedrock Converse rejects assistant messages with `content: []`, so persisted assistant turns with `stopReason: "error"` and empty content are also repaired on disk before load. +- Claude thinking blocks with missing, empty, or blank replay signatures are + stripped before Converse replay. If that empties an assistant turn, OpenClaw + keeps turn shape with non-empty omitted-reasoning text. - Older thinking-only assistant turns that must be stripped are replaced with non-empty omitted-reasoning text so the Converse replay keeps strict turn shape. - Replay filters OpenClaw delivery-mirror and gateway-injected assistant turns. diff --git a/src/agents/pi-embedded-runner.sanitize-session-history.test.ts b/src/agents/pi-embedded-runner.sanitize-session-history.test.ts index cf8745004e9..1d63e7f4fb3 100644 --- a/src/agents/pi-embedded-runner.sanitize-session-history.test.ts +++ b/src/agents/pi-embedded-runner.sanitize-session-history.test.ts @@ -1263,6 +1263,77 @@ describe("sanitizeSessionHistory", () => { }, ); + it.each([ + { + provider: "anthropic", + modelApi: "anthropic-messages", + label: "anthropic", + }, + { + provider: "amazon-bedrock", + modelApi: "bedrock-converse-stream", + label: "bedrock", + }, + ])("strips invalid thinking signatures before $label replay", async ({ provider, modelApi }) => { + setNonGoogleModelApi(); + + const messages = castAgentMessages([ + makeUserMessage("first"), + makeAssistantMessage([ + { type: "thinking", thinking: "missing signature" }, + { type: "thinking", thinking: "blank signature", thinkingSignature: " " }, + { type: "thinking", thinking: "signed", thinkingSignature: "sig_latest" }, + { type: "text", text: "latest visible answer" }, + ]), + ]); + + const result = await sanitizeAnthropicHistory({ + provider, + modelApi, + messages, + modelId: "claude-sonnet-4-6", + }); + + expect((result[1] as Extract).content).toEqual([ + { type: "thinking", thinking: "signed", thinkingSignature: "sig_latest" }, + { type: "text", text: "latest visible answer" }, + ]); + }); + + it.each([ + { + provider: "anthropic", + modelApi: "anthropic-messages", + label: "anthropic", + }, + { + provider: "amazon-bedrock", + modelApi: "bedrock-converse-stream", + label: "bedrock", + }, + ])( + "uses non-empty omitted-reasoning fallback when all $label thinking signatures are invalid", + async ({ provider, modelApi }) => { + setNonGoogleModelApi(); + + const messages = castAgentMessages([ + makeUserMessage("first"), + makeAssistantMessage([{ type: "thinking", thinking: "blank", thinkingSignature: "" }]), + ]); + + const result = await sanitizeAnthropicHistory({ + provider, + modelApi, + messages, + modelId: "claude-sonnet-4-6", + }); + + expect((result[1] as Extract).content).toEqual([ + { type: "text", text: OMITTED_ASSISTANT_REASONING_TEXT }, + ]); + }, + ); + it("uses immutable thinking replay for anthropic-compatible providers when policy preserves signatures", async () => { setNonGoogleModelApi(); diff --git a/src/agents/pi-embedded-runner/replay-history.ts b/src/agents/pi-embedded-runner/replay-history.ts index a82fda16bf7..cc2676b9f77 100644 --- a/src/agents/pi-embedded-runner/replay-history.ts +++ b/src/agents/pi-embedded-runner/replay-history.ts @@ -41,7 +41,7 @@ import { type AssistantUsageSnapshot, type UsageLike, } from "../usage.js"; -import { dropThinkingBlocks } from "./thinking.js"; +import { dropThinkingBlocks, stripInvalidThinkingSignatures } from "./thinking.js"; const INTER_SESSION_PREFIX_BASE = "[Inter-session message]"; const MODEL_SNAPSHOT_CUSTOM_TYPE = "model-snapshot"; @@ -544,9 +544,12 @@ export async function sanitizeSessionHistory(params: { ...resolveImageSanitizationLimits(params.config), }, ); - const droppedThinking = policy.dropThinkingBlocks - ? dropThinkingBlocks(sanitizedImages) + const validatedThinkingSignatures = policy.preserveSignatures + ? stripInvalidThinkingSignatures(sanitizedImages) : sanitizedImages; + const droppedThinking = policy.dropThinkingBlocks + ? dropThinkingBlocks(validatedThinkingSignatures) + : validatedThinkingSignatures; const sanitizedToolCalls = sanitizeToolCallInputs(droppedThinking, { allowedToolNames: params.allowedToolNames, allowProviderOwnedThinkingReplay, diff --git a/src/agents/pi-embedded-runner/thinking.test.ts b/src/agents/pi-embedded-runner/thinking.test.ts index 7598145bf03..6aefa5bcf05 100644 --- a/src/agents/pi-embedded-runner/thinking.test.ts +++ b/src/agents/pi-embedded-runner/thinking.test.ts @@ -8,6 +8,7 @@ import { dropThinkingBlocks, isAssistantMessageWithContent, sanitizeThinkingForRecovery, + stripInvalidThinkingSignatures, wrapAnthropicStreamWithRecovery, } from "./thinking.js"; @@ -156,6 +157,85 @@ describe("dropThinkingBlocks", () => { }); }); +describe("stripInvalidThinkingSignatures", () => { + it("returns the original reference when no invalid thinking signatures are present", () => { + const messages: AgentMessage[] = [ + castAgentMessage({ role: "user", content: "hello" }), + castAgentMessage({ + role: "assistant", + content: [ + { type: "thinking", thinking: "internal", thinkingSignature: "sig" }, + { type: "text", text: "answer" }, + ], + }), + ]; + + const result = stripInvalidThinkingSignatures(messages); + + expect(result).toBe(messages); + }); + + it("strips thinking blocks with missing, empty, or blank signatures", () => { + const messages: AgentMessage[] = [ + castAgentMessage({ + role: "assistant", + content: [ + { type: "thinking", thinking: "missing" }, + { type: "thinking", thinking: "empty", thinkingSignature: "" }, + { type: "thinking", thinking: "blank", thinkingSignature: " " }, + { type: "thinking", thinking: "signed", thinkingSignature: "sig" }, + { type: "text", text: "answer" }, + ], + }), + ]; + + const result = stripInvalidThinkingSignatures(messages); + const assistant = result[0] as Extract; + + expect(result).not.toBe(messages); + expect(assistant.content).toEqual([ + { type: "thinking", thinking: "signed", thinkingSignature: "sig" }, + { type: "text", text: "answer" }, + ]); + }); + + it("uses non-empty omitted-reasoning text when all thinking signatures are invalid", () => { + const messages: AgentMessage[] = [ + castAgentMessage({ + role: "assistant", + content: [{ type: "thinking", thinking: "reasoning", thinkingSignature: "" }], + }), + ]; + + const result = stripInvalidThinkingSignatures(messages); + const assistant = result[0] as Extract; + + expect(assistant.content).toEqual([{ type: "text", text: OMITTED_ASSISTANT_REASONING_TEXT }]); + }); + + it("strips redacted thinking blocks with invalid opaque signatures", () => { + const messages: AgentMessage[] = [ + castAgentMessage({ + role: "assistant", + content: [ + { type: "redacted_thinking", data: "" }, + { type: "redacted_thinking", signature: " " }, + { type: "redacted_thinking", data: "opaque" }, + { type: "text", text: "answer" }, + ], + }), + ]; + + const result = stripInvalidThinkingSignatures(messages); + const assistant = result[0] as Extract; + + expect(assistant.content).toEqual([ + { type: "redacted_thinking", data: "opaque" }, + { type: "text", text: "answer" }, + ]); + }); +}); + describe("sanitizeThinkingForRecovery", () => { it("drops the latest assistant message when the thinking block is unsigned", () => { const messages = castAgentMessages([ diff --git a/src/agents/pi-embedded-runner/thinking.ts b/src/agents/pi-embedded-runner/thinking.ts index 4281dcff446..79330ed8d6e 100644 --- a/src/agents/pi-embedded-runner/thinking.ts +++ b/src/agents/pi-embedded-runner/thinking.ts @@ -61,6 +61,67 @@ function buildOmittedAssistantReasoningContent(): AssistantContentBlock[] { return [{ type: "text", text: OMITTED_ASSISTANT_REASONING_TEXT } as AssistantContentBlock]; } +function hasReplayableThinkingSignature(block: AssistantContentBlock): boolean { + if (!isThinkingBlock(block)) { + return false; + } + const record = block as { + data?: unknown; + signature?: unknown; + thinkingSignature?: unknown; + thought_signature?: unknown; + }; + const candidates = + (block as { type?: unknown }).type === "redacted_thinking" + ? [record.data, record.signature, record.thinkingSignature, record.thought_signature] + : [record.signature, record.thinkingSignature, record.thought_signature]; + return candidates.some((signature) => { + return typeof signature === "string" && signature.trim().length > 0; + }); +} + +/** + * Strip thinking blocks with clearly invalid replay signatures. + * + * Anthropic and Bedrock reject persisted thinking blocks when the signature is + * absent, empty, or blank. They are also the authority for opaque signature + * validity, so this intentionally avoids local length or shape heuristics. + */ +export function stripInvalidThinkingSignatures(messages: AgentMessage[]): AgentMessage[] { + let touched = false; + const out: AgentMessage[] = []; + + for (const message of messages) { + if (!isAssistantMessageWithContent(message)) { + out.push(message); + continue; + } + + const nextContent: AssistantContentBlock[] = []; + let changed = false; + for (const block of message.content) { + if (!isThinkingBlock(block) || hasReplayableThinkingSignature(block)) { + nextContent.push(block); + continue; + } + changed = true; + touched = true; + } + + if (!changed) { + out.push(message); + continue; + } + + out.push({ + ...message, + content: nextContent.length > 0 ? nextContent : buildOmittedAssistantReasoningContent(), + }); + } + + return touched ? out : messages; +} + /** * Strip `type: "thinking"` and `type: "redacted_thinking"` content blocks from * all assistant messages except the latest one.