fix: strip invalid thinking replay signatures

Fixes #45010. Supersedes #70054. Co-authored-by: Chris Staples <chris.staples@sophos.com> Co-authored-by: Fourier <yang.fourier@gmail.com>
2026-05-06 11:30:43 +00:00 · 2026-04-25 20:12:23 +01:00
parent 47a63f7acf
commit 791ad0864a
6 changed files with 229 additions and 3 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -96,6 +96,10 @@ Docs: https://docs.openclaw.ai
 - Agents/Bedrock: prevent empty assistant stream-error turns from poisoning
  Converse replay by persisting, repairing, and replaying a non-empty fallback
  block. Fixes #71572. (#71627) Thanks @openperf.
+- Agents/Anthropic/Bedrock: strip thinking blocks with missing, empty, or blank
+  replay signatures before provider conversion, falling back to non-empty
+  omitted-reasoning text when needed so corrupted signed-thinking history no
+  longer poisons subsequent turns. Fixes #45010. (#70054) Thanks @castaples.
 - Agents/Anthropic/Bedrock: preserve stripped thinking-only assistant replay
  turns with non-empty omitted-reasoning text so provider adapters keep strict
  user/assistant turn shape. Thanks @wujiaming88.
--- a/docs/reference/transcript-hygiene.md
+++ b/docs/reference/transcript-hygiene.md
@@ -23,6 +23,7 @@ Scope includes:
 - Tool result pairing repair
 - Turn validation / ordering
 - Thought signature cleanup
+- Thinking signature cleanup
 - Image payload sanitization
 - User-input provenance tagging (for inter-session routed prompts)
 - Empty assistant error-turn repair for Bedrock Converse replay
@@ -133,6 +134,9 @@ external end-user instructions.

 - Tool result pairing repair and synthetic tool results.
 - Turn validation (merge consecutive user turns to satisfy strict alternation).
+- Thinking blocks with missing, empty, or blank replay signatures are stripped
+  before provider conversion. If that empties an assistant turn, OpenClaw keeps
+  turn shape with non-empty omitted-reasoning text.
 - Older thinking-only assistant turns that must be stripped are replaced with
  non-empty omitted-reasoning text so provider adapters do not drop the replay
  turn.
@@ -143,6 +147,9 @@ external end-user instructions.
  before replay. Bedrock Converse rejects assistant messages with `content: []`, so
  persisted assistant turns with `stopReason: "error"` and empty content are also
  repaired on disk before load.
+- Claude thinking blocks with missing, empty, or blank replay signatures are
+  stripped before Converse replay. If that empties an assistant turn, OpenClaw
+  keeps turn shape with non-empty omitted-reasoning text.
 - Older thinking-only assistant turns that must be stripped are replaced with
  non-empty omitted-reasoning text so the Converse replay keeps strict turn shape.
 - Replay filters OpenClaw delivery-mirror and gateway-injected assistant turns.
--- a/src/agents/pi-embedded-runner.sanitize-session-history.test.ts
+++ b/src/agents/pi-embedded-runner.sanitize-session-history.test.ts
@@ -1263,6 +1263,77 @@ describe("sanitizeSessionHistory", () => {
    },
  );

+  it.each([
+    {
+      provider: "anthropic",
+      modelApi: "anthropic-messages",
+      label: "anthropic",
+    },
+    {
+      provider: "amazon-bedrock",
+      modelApi: "bedrock-converse-stream",
+      label: "bedrock",
+    },
+  ])("strips invalid thinking signatures before $label replay", async ({ provider, modelApi }) => {
+    setNonGoogleModelApi();
+
+    const messages = castAgentMessages([
+      makeUserMessage("first"),
+      makeAssistantMessage([
+        { type: "thinking", thinking: "missing signature" },
+        { type: "thinking", thinking: "blank signature", thinkingSignature: "   " },
+        { type: "thinking", thinking: "signed", thinkingSignature: "sig_latest" },
+        { type: "text", text: "latest visible answer" },
+      ]),
+    ]);
+
+    const result = await sanitizeAnthropicHistory({
+      provider,
+      modelApi,
+      messages,
+      modelId: "claude-sonnet-4-6",
+    });
+
+    expect((result[1] as Extract<AgentMessage, { role: "assistant" }>).content).toEqual([
+      { type: "thinking", thinking: "signed", thinkingSignature: "sig_latest" },
+      { type: "text", text: "latest visible answer" },
+    ]);
+  });
+
+  it.each([
+    {
+      provider: "anthropic",
+      modelApi: "anthropic-messages",
+      label: "anthropic",
+    },
+    {
+      provider: "amazon-bedrock",
+      modelApi: "bedrock-converse-stream",
+      label: "bedrock",
+    },
+  ])(
+    "uses non-empty omitted-reasoning fallback when all $label thinking signatures are invalid",
+    async ({ provider, modelApi }) => {
+      setNonGoogleModelApi();
+
+      const messages = castAgentMessages([
+        makeUserMessage("first"),
+        makeAssistantMessage([{ type: "thinking", thinking: "blank", thinkingSignature: "" }]),
+      ]);
+
+      const result = await sanitizeAnthropicHistory({
+        provider,
+        modelApi,
+        messages,
+        modelId: "claude-sonnet-4-6",
+      });
+
+      expect((result[1] as Extract<AgentMessage, { role: "assistant" }>).content).toEqual([
+        { type: "text", text: OMITTED_ASSISTANT_REASONING_TEXT },
+      ]);
+    },
+  );
+
  it("uses immutable thinking replay for anthropic-compatible providers when policy preserves signatures", async () => {
    setNonGoogleModelApi();

--- a/src/agents/pi-embedded-runner/replay-history.ts
+++ b/src/agents/pi-embedded-runner/replay-history.ts
@@ -41,7 +41,7 @@ import {
  type AssistantUsageSnapshot,
  type UsageLike,
 } from "../usage.js";
-import { dropThinkingBlocks } from "./thinking.js";
+import { dropThinkingBlocks, stripInvalidThinkingSignatures } from "./thinking.js";

 const INTER_SESSION_PREFIX_BASE = "[Inter-session message]";
 const MODEL_SNAPSHOT_CUSTOM_TYPE = "model-snapshot";
@@ -544,9 +544,12 @@ export async function sanitizeSessionHistory(params: {
      ...resolveImageSanitizationLimits(params.config),
    },
  );
-  const droppedThinking = policy.dropThinkingBlocks
-    ? dropThinkingBlocks(sanitizedImages)
+  const validatedThinkingSignatures = policy.preserveSignatures
+    ? stripInvalidThinkingSignatures(sanitizedImages)
    : sanitizedImages;
+  const droppedThinking = policy.dropThinkingBlocks
+    ? dropThinkingBlocks(validatedThinkingSignatures)
+    : validatedThinkingSignatures;
  const sanitizedToolCalls = sanitizeToolCallInputs(droppedThinking, {
    allowedToolNames: params.allowedToolNames,
    allowProviderOwnedThinkingReplay,
--- a/src/agents/pi-embedded-runner/thinking.test.ts
+++ b/src/agents/pi-embedded-runner/thinking.test.ts
@@ -8,6 +8,7 @@ import {
  dropThinkingBlocks,
  isAssistantMessageWithContent,
  sanitizeThinkingForRecovery,
+  stripInvalidThinkingSignatures,
  wrapAnthropicStreamWithRecovery,
 } from "./thinking.js";

@@ -156,6 +157,85 @@ describe("dropThinkingBlocks", () => {
  });
 });

+describe("stripInvalidThinkingSignatures", () => {
+  it("returns the original reference when no invalid thinking signatures are present", () => {
+    const messages: AgentMessage[] = [
+      castAgentMessage({ role: "user", content: "hello" }),
+      castAgentMessage({
+        role: "assistant",
+        content: [
+          { type: "thinking", thinking: "internal", thinkingSignature: "sig" },
+          { type: "text", text: "answer" },
+        ],
+      }),
+    ];
+
+    const result = stripInvalidThinkingSignatures(messages);
+
+    expect(result).toBe(messages);
+  });
+
+  it("strips thinking blocks with missing, empty, or blank signatures", () => {
+    const messages: AgentMessage[] = [
+      castAgentMessage({
+        role: "assistant",
+        content: [
+          { type: "thinking", thinking: "missing" },
+          { type: "thinking", thinking: "empty", thinkingSignature: "" },
+          { type: "thinking", thinking: "blank", thinkingSignature: "   " },
+          { type: "thinking", thinking: "signed", thinkingSignature: "sig" },
+          { type: "text", text: "answer" },
+        ],
+      }),
+    ];
+
+    const result = stripInvalidThinkingSignatures(messages);
+    const assistant = result[0] as Extract<AgentMessage, { role: "assistant" }>;
+
+    expect(result).not.toBe(messages);
+    expect(assistant.content).toEqual([
+      { type: "thinking", thinking: "signed", thinkingSignature: "sig" },
+      { type: "text", text: "answer" },
+    ]);
+  });
+
+  it("uses non-empty omitted-reasoning text when all thinking signatures are invalid", () => {
+    const messages: AgentMessage[] = [
+      castAgentMessage({
+        role: "assistant",
+        content: [{ type: "thinking", thinking: "reasoning", thinkingSignature: "" }],
+      }),
+    ];
+
+    const result = stripInvalidThinkingSignatures(messages);
+    const assistant = result[0] as Extract<AgentMessage, { role: "assistant" }>;
+
+    expect(assistant.content).toEqual([{ type: "text", text: OMITTED_ASSISTANT_REASONING_TEXT }]);
+  });
+
+  it("strips redacted thinking blocks with invalid opaque signatures", () => {
+    const messages: AgentMessage[] = [
+      castAgentMessage({
+        role: "assistant",
+        content: [
+          { type: "redacted_thinking", data: "" },
+          { type: "redacted_thinking", signature: "   " },
+          { type: "redacted_thinking", data: "opaque" },
+          { type: "text", text: "answer" },
+        ],
+      }),
+    ];
+
+    const result = stripInvalidThinkingSignatures(messages);
+    const assistant = result[0] as Extract<AgentMessage, { role: "assistant" }>;
+
+    expect(assistant.content).toEqual([
+      { type: "redacted_thinking", data: "opaque" },
+      { type: "text", text: "answer" },
+    ]);
+  });
+});
+
 describe("sanitizeThinkingForRecovery", () => {
  it("drops the latest assistant message when the thinking block is unsigned", () => {
    const messages = castAgentMessages([
--- a/src/agents/pi-embedded-runner/thinking.ts
+++ b/src/agents/pi-embedded-runner/thinking.ts
@@ -61,6 +61,67 @@ function buildOmittedAssistantReasoningContent(): AssistantContentBlock[] {
  return [{ type: "text", text: OMITTED_ASSISTANT_REASONING_TEXT } as AssistantContentBlock];
 }

+function hasReplayableThinkingSignature(block: AssistantContentBlock): boolean {
+  if (!isThinkingBlock(block)) {
+    return false;
+  }
+  const record = block as {
+    data?: unknown;
+    signature?: unknown;
+    thinkingSignature?: unknown;
+    thought_signature?: unknown;
+  };
+  const candidates =
+    (block as { type?: unknown }).type === "redacted_thinking"
+      ? [record.data, record.signature, record.thinkingSignature, record.thought_signature]
+      : [record.signature, record.thinkingSignature, record.thought_signature];
+  return candidates.some((signature) => {
+    return typeof signature === "string" && signature.trim().length > 0;
+  });
+}
+
+/**
+ * Strip thinking blocks with clearly invalid replay signatures.
+ *
+ * Anthropic and Bedrock reject persisted thinking blocks when the signature is
+ * absent, empty, or blank. They are also the authority for opaque signature
+ * validity, so this intentionally avoids local length or shape heuristics.
+ */
+export function stripInvalidThinkingSignatures(messages: AgentMessage[]): AgentMessage[] {
+  let touched = false;
+  const out: AgentMessage[] = [];
+
+  for (const message of messages) {
+    if (!isAssistantMessageWithContent(message)) {
+      out.push(message);
+      continue;
+    }
+
+    const nextContent: AssistantContentBlock[] = [];
+    let changed = false;
+    for (const block of message.content) {
+      if (!isThinkingBlock(block) || hasReplayableThinkingSignature(block)) {
+        nextContent.push(block);
+        continue;
+      }
+      changed = true;
+      touched = true;
+    }
+
+    if (!changed) {
+      out.push(message);
+      continue;
+    }
+
+    out.push({
+      ...message,
+      content: nextContent.length > 0 ? nextContent : buildOmittedAssistantReasoningContent(),
+    });
+  }
+
+  return touched ? out : messages;
+}
+
 /**
 * Strip `type: "thinking"` and `type: "redacted_thinking"` content blocks from
 * all assistant messages except the latest one.