fix: preserve omitted thinking replay turns

2026-05-06 08:00:42 +00:00 · 2026-04-25 19:54:11 +01:00
parent 690c98ad99
commit a018db771d
6 changed files with 288 additions and 12 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -80,6 +80,9 @@ Docs: https://docs.openclaw.ai
 - Agents/Bedrock: prevent empty assistant stream-error turns from poisoning
  Converse replay by persisting, repairing, and replaying a non-empty fallback
  block. Fixes #71572. (#71627) Thanks @openperf.
+- Agents/Anthropic/Bedrock: preserve stripped thinking-only assistant replay
+  turns with non-empty omitted-reasoning text so provider adapters keep strict
+  user/assistant turn shape. Thanks @wujiaming88.
 - Browser/CDP: make readiness diagnostics use the same discovery-first fallback as reachability for bare `ws://` Browserless and Browserbase CDP URLs. Fixes #69532.
 - Browser/CDP: explain that loopback Browserless or other externally managed CDP services need `attachOnly: true` and matching Browserless `EXTERNAL` endpoint when reporting local port ownership conflicts, and fall back to the configured bare WebSocket root when a discovered Browserless endpoint rejects CDP. Fixes #49815.
 - Gateway/reload: preserve indefinite `gateway.reload.deferralTimeoutMs: 0` semantics for channel hot reload deferrals so active agent runs are not interrupted by a forced channel restart. (#71637) Thanks @Poo-Squirry.
--- a/docs/reference/transcript-hygiene.md
+++ b/docs/reference/transcript-hygiene.md
@@ -133,6 +133,9 @@ external end-user instructions.

 - Tool result pairing repair and synthetic tool results.
 - Turn validation (merge consecutive user turns to satisfy strict alternation).
+- Older thinking-only assistant turns that must be stripped are replaced with
+  non-empty omitted-reasoning text so provider adapters do not drop the replay
+  turn.

 **Amazon Bedrock (Converse API)**

@@ -140,6 +143,8 @@ external end-user instructions.
  before replay. Bedrock Converse rejects assistant messages with `content: []`, so
  persisted assistant turns with `stopReason: "error"` and empty content are also
  repaired on disk before load.
+- Older thinking-only assistant turns that must be stripped are replaced with
+  non-empty omitted-reasoning text so the Converse replay keeps strict turn shape.
 - Replay filters OpenClaw delivery-mirror and gateway-injected assistant turns.
 - Image sanitization applies through the global rule.

--- a/src/agents/pi-embedded-runner.anthropic-tool-replay.live.test.ts
+++ b/src/agents/pi-embedded-runner.anthropic-tool-replay.live.test.ts
@@ -7,6 +7,7 @@ import {
 } from "./live-cache-test-support.js";
 import { isLiveTestEnabled } from "./live-test-helpers.js";
 import { wrapStreamFnSanitizeMalformedToolCalls } from "./pi-embedded-runner/run/attempt.tool-call-normalization.js";
+import { OMITTED_ASSISTANT_REASONING_TEXT } from "./pi-embedded-runner/thinking.js";
 import { buildAssistantMessageWithZeroUsage } from "./stream-message-shared.js";

 const ANTHROPIC_LIVE = isLiveTestEnabled(["ANTHROPIC_LIVE_TEST"]);
@@ -33,7 +34,7 @@ function buildLiveAnthropicModel(): {
      name: modelId,
      api: "anthropic-messages" as const,
      provider: "anthropic",
-      baseUrl: "https://api.anthropic.com/v1",
+      baseUrl: "https://api.anthropic.com",
      reasoning: true,
      input: ["text"] as const,
      cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
@@ -44,6 +45,94 @@ function buildLiveAnthropicModel(): {
 }

 describeLive("pi embedded anthropic replay sanitization (live)", () => {
+  it(
+    "accepts regular text-only assistant replay history",
+    async () => {
+      const { apiKey, model } = buildLiveAnthropicModel();
+      const messages: Message[] = [
+        {
+          role: "user",
+          content: "Remember the marker REGULAR_ANTHROPIC_REPLAY_OK.",
+          timestamp: Date.now(),
+        },
+        buildAssistantMessageWithZeroUsage({
+          model: { api: model.api, provider: model.provider, id: model.id },
+          content: [{ type: "text", text: "I remember REGULAR_ANTHROPIC_REPLAY_OK." }],
+          stopReason: "stop",
+        }),
+        {
+          role: "user",
+          content: "Reply with a short confirmation if this replay history is valid.",
+          timestamp: Date.now(),
+        },
+      ];
+
+      logLiveCache(`anthropic regular replay live model=${model.provider}/${model.id}`);
+      const response = await completeSimpleWithLiveTimeout(
+        model,
+        { messages },
+        {
+          apiKey,
+          cacheRetention: "none",
+          sessionId: "anthropic-regular-replay-live",
+          maxTokens: 64,
+          temperature: 0,
+        },
+        "anthropic regular text replay live synthetic transcript",
+        ANTHROPIC_TIMEOUT_MS,
+      );
+
+      const text = extractAssistantText(response);
+      logLiveCache(`anthropic regular replay live result=${JSON.stringify(text)}`);
+      expect(text.trim().length).toBeGreaterThan(0);
+    },
+    6 * 60_000,
+  );
+
+  it(
+    "accepts omitted-reasoning placeholder assistant replay history",
+    async () => {
+      const { apiKey, model } = buildLiveAnthropicModel();
+      const messages: Message[] = [
+        {
+          role: "user",
+          content: "Remember that the previous assistant reasoning was omitted.",
+          timestamp: Date.now(),
+        },
+        buildAssistantMessageWithZeroUsage({
+          model: { api: model.api, provider: model.provider, id: model.id },
+          content: [{ type: "text", text: OMITTED_ASSISTANT_REASONING_TEXT }],
+          stopReason: "stop",
+        }),
+        {
+          role: "user",
+          content: "Reply with exactly OK if this placeholder replay history is valid.",
+          timestamp: Date.now(),
+        },
+      ];
+
+      logLiveCache(`anthropic omitted-reasoning replay live model=${model.provider}/${model.id}`);
+      const response = await completeSimpleWithLiveTimeout(
+        model,
+        { messages },
+        {
+          apiKey,
+          cacheRetention: "none",
+          sessionId: "anthropic-omitted-reasoning-replay-live",
+          maxTokens: 64,
+          temperature: 0,
+        },
+        "anthropic omitted reasoning replay live synthetic transcript",
+        ANTHROPIC_TIMEOUT_MS,
+      );
+
+      const text = extractAssistantText(response);
+      logLiveCache(`anthropic omitted-reasoning replay live result=${JSON.stringify(text)}`);
+      expect(text.trim().length).toBeGreaterThan(0);
+    },
+    6 * 60_000,
+  );
+
  it(
    "preserves toolCall replay history that Anthropic accepts end-to-end",
    async () => {
--- a/src/agents/pi-embedded-runner.sanitize-session-history.test.ts
+++ b/src/agents/pi-embedded-runner.sanitize-session-history.test.ts
@@ -16,6 +16,7 @@ import {
  TEST_SESSION_ID,
 } from "./pi-embedded-runner.sanitize-session-history.test-harness.js";
 import { validateReplayTurns } from "./pi-embedded-runner/replay-history.js";
+import { OMITTED_ASSISTANT_REASONING_TEXT } from "./pi-embedded-runner/thinking.js";
 import { castAgentMessage, castAgentMessages } from "./test-helpers/agent-message-fixtures.js";
 import { extractToolCallsFromAssistant } from "./tool-call-id.js";
 import type { TranscriptPolicy } from "./transcript-policy.js";
@@ -1176,6 +1177,92 @@ describe("sanitizeSessionHistory", () => {
    ]);
  });

+  it("keeps regular latest Anthropic thinking replay while preserving older stripped turns", async () => {
+    setNonGoogleModelApi();
+
+    const messages = castAgentMessages([
+      makeUserMessage("first"),
+      makeAssistantMessage([
+        {
+          type: "thinking",
+          thinking: "old private reasoning",
+          thinkingSignature: "sig_old",
+        },
+      ]),
+      makeUserMessage("second"),
+      makeAssistantMessage([
+        {
+          type: "thinking",
+          thinking: "latest private reasoning",
+          thinkingSignature: "sig_latest",
+        },
+        { type: "text", text: "latest visible answer" },
+      ]),
+    ]);
+
+    const result = await sanitizeAnthropicHistory({
+      messages,
+      modelId: "claude-3-7-sonnet-20250219",
+    });
+
+    expect((result[1] as Extract<AgentMessage, { role: "assistant" }>).content).toEqual([
+      { type: "text", text: OMITTED_ASSISTANT_REASONING_TEXT },
+    ]);
+    expect((result[3] as Extract<AgentMessage, { role: "assistant" }>).content).toEqual([
+      {
+        type: "thinking",
+        thinking: "latest private reasoning",
+        thinkingSignature: "sig_latest",
+      },
+      { type: "text", text: "latest visible answer" },
+    ]);
+  });
+
+  it.each([
+    {
+      provider: "anthropic",
+      modelApi: "anthropic-messages",
+      label: "anthropic",
+    },
+    {
+      provider: "amazon-bedrock",
+      modelApi: "bedrock-converse-stream",
+      label: "bedrock",
+    },
+  ])(
+    "preserves older stripped thinking-only assistant turns for $label replay",
+    async ({ provider, modelApi }) => {
+      setNonGoogleModelApi();
+
+      const messages = castAgentMessages([
+        makeUserMessage("first"),
+        makeAssistantMessage([
+          {
+            type: "thinking",
+            thinking: "old private reasoning",
+            thinkingSignature: "sig_old",
+          },
+        ]),
+        makeUserMessage("second"),
+        makeAssistantMessage([{ type: "text", text: "latest visible answer" }]),
+      ]);
+
+      const result = await sanitizeAnthropicHistory({
+        provider,
+        modelApi,
+        messages,
+        modelId: "claude-3-7-sonnet-20250219",
+      });
+
+      expect((result[1] as Extract<AgentMessage, { role: "assistant" }>).content).toEqual([
+        { type: "text", text: OMITTED_ASSISTANT_REASONING_TEXT },
+      ]);
+      expect((result[3] as Extract<AgentMessage, { role: "assistant" }>).content).toEqual([
+        { type: "text", text: "latest visible answer" },
+      ]);
+    },
+  );
+
  it("uses immutable thinking replay for anthropic-compatible providers when policy preserves signatures", async () => {
    setNonGoogleModelApi();

--- a/src/agents/pi-embedded-runner/thinking.test.ts
+++ b/src/agents/pi-embedded-runner/thinking.test.ts
@@ -3,6 +3,7 @@ import { createAssistantMessageEventStream } from "@mariozechner/pi-ai";
 import { describe, expect, it } from "vitest";
 import { castAgentMessage, castAgentMessages } from "../test-helpers/agent-message-fixtures.js";
 import {
+  OMITTED_ASSISTANT_REASONING_TEXT,
  assessLastAssistantMessage,
  dropThinkingBlocks,
  isAssistantMessageWithContent,
@@ -103,6 +104,56 @@ describe("dropThinkingBlocks", () => {
      { type: "text", text: "latest text" },
    ]);
  });
+
+  it("uses non-empty omitted-reasoning text when an older assistant turn is thinking-only", () => {
+    const messages: AgentMessage[] = [
+      castAgentMessage({ role: "user", content: "first" }),
+      castAgentMessage({
+        role: "assistant",
+        content: [{ type: "thinking", thinking: "old", thinkingSignature: "sig_old" }],
+      }),
+      castAgentMessage({ role: "user", content: "second" }),
+      castAgentMessage({
+        role: "assistant",
+        content: [
+          { type: "thinking", thinking: "latest", thinkingSignature: "sig_latest" },
+          { type: "text", text: "latest text" },
+        ],
+      }),
+    ];
+
+    const result = dropThinkingBlocks(messages);
+    const oldAssistant = result[1] as Extract<AgentMessage, { role: "assistant" }>;
+    const latestAssistant = result[3] as Extract<AgentMessage, { role: "assistant" }>;
+    const originalLatestAssistant = messages[3] as Extract<AgentMessage, { role: "assistant" }>;
+
+    expect(oldAssistant.content).toEqual([
+      { type: "text", text: OMITTED_ASSISTANT_REASONING_TEXT },
+    ]);
+    expect(latestAssistant.content).toEqual(originalLatestAssistant.content);
+  });
+
+  it("uses non-empty omitted-reasoning text when an older assistant turn is redacted-thinking-only", () => {
+    const messages: AgentMessage[] = [
+      castAgentMessage({ role: "user", content: "first" }),
+      castAgentMessage({
+        role: "assistant",
+        content: [{ type: "redacted_thinking", data: "opaque" }],
+      }),
+      castAgentMessage({ role: "user", content: "second" }),
+      castAgentMessage({
+        role: "assistant",
+        content: [{ type: "text", text: "latest text" }],
+      }),
+    ];
+
+    const result = dropThinkingBlocks(messages);
+    const oldAssistant = result[1] as Extract<AgentMessage, { role: "assistant" }>;
+
+    expect(oldAssistant.content).toEqual([
+      { type: "text", text: OMITTED_ASSISTANT_REASONING_TEXT },
+    ]);
+  });
 });

 describe("sanitizeThinkingForRecovery", () => {
@@ -191,11 +242,13 @@ describe("wrapAnthropicStreamWithRecovery", () => {
    "thinking or redacted_thinking blocks in the latest assistant message cannot be modified",
  );

-  it("retries once when the request is rejected before streaming", async () => {
+  it("retries once with omitted-reasoning text when the request is rejected before streaming", async () => {
    let callCount = 0;
+    const contexts: Array<{ messages?: AgentMessage[] }> = [];
    const wrapped = wrapAnthropicStreamWithRecovery(
-      (() => {
+      ((_model, context) => {
        callCount += 1;
+        contexts.push(context as { messages?: AgentMessage[] });
        return Promise.reject(anthropicThinkingError);
      }) as Parameters<typeof wrapAnthropicStreamWithRecovery>[0],
      { id: "test-session" },
@@ -216,6 +269,44 @@ describe("wrapAnthropicStreamWithRecovery", () => {
      ),
    ).rejects.toBe(anthropicThinkingError);
    expect(callCount).toBe(2);
+    expect(contexts[1]?.messages?.[0]).toMatchObject({
+      role: "assistant",
+      content: [{ type: "text", text: OMITTED_ASSISTANT_REASONING_TEXT }],
+    });
+  });
+
+  it("retries with visible assistant text when stripping thinking leaves content", async () => {
+    const contexts: Array<{ messages?: AgentMessage[] }> = [];
+    const wrapped = wrapAnthropicStreamWithRecovery(
+      ((_model, context) => {
+        contexts.push(context as { messages?: AgentMessage[] });
+        return Promise.reject(anthropicThinkingError);
+      }) as Parameters<typeof wrapAnthropicStreamWithRecovery>[0],
+      { id: "test-session" },
+    );
+
+    await expect(
+      wrapped(
+        {} as never,
+        {
+          messages: castAgentMessages([
+            {
+              role: "assistant",
+              content: [
+                { type: "thinking", thinking: "secret", thinkingSignature: "sig" },
+                { type: "text", text: "visible answer" },
+              ],
+            },
+          ]),
+        } as never,
+        {} as never,
+      ),
+    ).rejects.toBe(anthropicThinkingError);
+
+    expect(contexts[1]?.messages?.[0]).toMatchObject({
+      role: "assistant",
+      content: [{ type: "text", text: "visible answer" }],
+    });
  });

  it("does not retry when the stream fails after yielding a chunk", async () => {
--- a/src/agents/pi-embedded-runner/thinking.ts
+++ b/src/agents/pi-embedded-runner/thinking.ts
@@ -9,6 +9,7 @@ type RecoveryAssessment = "valid" | "incomplete-thinking" | "incomplete-text";
 type RecoverySessionMeta = { id: string; recoveredAnthropicThinking?: boolean };

 const THINKING_BLOCK_ERROR_PATTERN = /thinking or redacted_thinking blocks?.* cannot be modified/i;
+export const OMITTED_ASSISTANT_REASONING_TEXT = "[assistant reasoning omitted]";

 export function isAssistantMessageWithContent(message: AgentMessage): message is AssistantMessage {
  return (
@@ -55,6 +56,11 @@ function hasMeaningfulText(block: AssistantContentBlock): boolean {
    : false;
 }

+function buildOmittedAssistantReasoningContent(): AssistantContentBlock[] {
+  // Provider converters drop blank text blocks; keep this neutral text non-empty so the assistant turn survives replay.
+  return [{ type: "text", text: OMITTED_ASSISTANT_REASONING_TEXT } as AssistantContentBlock];
+}
+
 /**
 * Strip `type: "thinking"` and `type: "redacted_thinking"` content blocks from
 * all assistant messages except the latest one.
@@ -63,8 +69,8 @@ function hasMeaningfulText(block: AssistantContentBlock): boolean {
 * providers that require replay signatures can continue the conversation.
 *
 * If a non-latest assistant message becomes empty after stripping, it is
- * replaced with a synthetic `{ type: "text", text: "" }` block to preserve
- * turn structure (some providers require strict user/assistant alternation).
+ * replaced with a synthetic non-empty text block to preserve turn structure
+ * through provider adapters that filter blank text blocks.
 *
 * Returns the original array reference when nothing was changed (callers can
 * use reference equality to skip downstream work).
@@ -104,9 +110,7 @@ export function dropThinkingBlocks(messages: AgentMessage[]): AgentMessage[] {
      out.push(msg);
      continue;
    }
-    // Preserve the assistant turn even if all blocks were thinking-only.
-    const content =
-      nextContent.length > 0 ? nextContent : [{ type: "text", text: "" } as AssistantContentBlock];
+    const content = nextContent.length > 0 ? nextContent : buildOmittedAssistantReasoningContent();
    out.push({ ...msg, content });
  }
  return touched ? out : messages;
@@ -130,10 +134,7 @@ function stripAllThinkingBlocks(messages: AgentMessage[]): AgentMessage[] {
    touched = true;
    out.push({
      ...message,
-      content:
-        nextContent.length > 0
-          ? nextContent
-          : ([{ type: "text", text: "" }] as AssistantContentBlock[]),
+      content: nextContent.length > 0 ? nextContent : buildOmittedAssistantReasoningContent(),
    });
  }
  return touched ? out : messages;