diff --git a/CHANGELOG.md b/CHANGELOG.md index c985a2f6a4b..a876f6aef91 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -80,6 +80,9 @@ Docs: https://docs.openclaw.ai - Agents/Bedrock: prevent empty assistant stream-error turns from poisoning Converse replay by persisting, repairing, and replaying a non-empty fallback block. Fixes #71572. (#71627) Thanks @openperf. +- Agents/Anthropic/Bedrock: preserve stripped thinking-only assistant replay + turns with non-empty omitted-reasoning text so provider adapters keep strict + user/assistant turn shape. Thanks @wujiaming88. - Browser/CDP: make readiness diagnostics use the same discovery-first fallback as reachability for bare `ws://` Browserless and Browserbase CDP URLs. Fixes #69532. - Browser/CDP: explain that loopback Browserless or other externally managed CDP services need `attachOnly: true` and matching Browserless `EXTERNAL` endpoint when reporting local port ownership conflicts, and fall back to the configured bare WebSocket root when a discovered Browserless endpoint rejects CDP. Fixes #49815. - Gateway/reload: preserve indefinite `gateway.reload.deferralTimeoutMs: 0` semantics for channel hot reload deferrals so active agent runs are not interrupted by a forced channel restart. (#71637) Thanks @Poo-Squirry. diff --git a/docs/reference/transcript-hygiene.md b/docs/reference/transcript-hygiene.md index cc4b3ba751d..d5b4c242eab 100644 --- a/docs/reference/transcript-hygiene.md +++ b/docs/reference/transcript-hygiene.md @@ -133,6 +133,9 @@ external end-user instructions. - Tool result pairing repair and synthetic tool results. - Turn validation (merge consecutive user turns to satisfy strict alternation). +- Older thinking-only assistant turns that must be stripped are replaced with + non-empty omitted-reasoning text so provider adapters do not drop the replay + turn. **Amazon Bedrock (Converse API)** @@ -140,6 +143,8 @@ external end-user instructions. before replay. Bedrock Converse rejects assistant messages with `content: []`, so persisted assistant turns with `stopReason: "error"` and empty content are also repaired on disk before load. +- Older thinking-only assistant turns that must be stripped are replaced with + non-empty omitted-reasoning text so the Converse replay keeps strict turn shape. - Replay filters OpenClaw delivery-mirror and gateway-injected assistant turns. - Image sanitization applies through the global rule. diff --git a/src/agents/pi-embedded-runner.anthropic-tool-replay.live.test.ts b/src/agents/pi-embedded-runner.anthropic-tool-replay.live.test.ts index 1b2b0749a3d..a7c642ecbce 100644 --- a/src/agents/pi-embedded-runner.anthropic-tool-replay.live.test.ts +++ b/src/agents/pi-embedded-runner.anthropic-tool-replay.live.test.ts @@ -7,6 +7,7 @@ import { } from "./live-cache-test-support.js"; import { isLiveTestEnabled } from "./live-test-helpers.js"; import { wrapStreamFnSanitizeMalformedToolCalls } from "./pi-embedded-runner/run/attempt.tool-call-normalization.js"; +import { OMITTED_ASSISTANT_REASONING_TEXT } from "./pi-embedded-runner/thinking.js"; import { buildAssistantMessageWithZeroUsage } from "./stream-message-shared.js"; const ANTHROPIC_LIVE = isLiveTestEnabled(["ANTHROPIC_LIVE_TEST"]); @@ -33,7 +34,7 @@ function buildLiveAnthropicModel(): { name: modelId, api: "anthropic-messages" as const, provider: "anthropic", - baseUrl: "https://api.anthropic.com/v1", + baseUrl: "https://api.anthropic.com", reasoning: true, input: ["text"] as const, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, @@ -44,6 +45,94 @@ function buildLiveAnthropicModel(): { } describeLive("pi embedded anthropic replay sanitization (live)", () => { + it( + "accepts regular text-only assistant replay history", + async () => { + const { apiKey, model } = buildLiveAnthropicModel(); + const messages: Message[] = [ + { + role: "user", + content: "Remember the marker REGULAR_ANTHROPIC_REPLAY_OK.", + timestamp: Date.now(), + }, + buildAssistantMessageWithZeroUsage({ + model: { api: model.api, provider: model.provider, id: model.id }, + content: [{ type: "text", text: "I remember REGULAR_ANTHROPIC_REPLAY_OK." }], + stopReason: "stop", + }), + { + role: "user", + content: "Reply with a short confirmation if this replay history is valid.", + timestamp: Date.now(), + }, + ]; + + logLiveCache(`anthropic regular replay live model=${model.provider}/${model.id}`); + const response = await completeSimpleWithLiveTimeout( + model, + { messages }, + { + apiKey, + cacheRetention: "none", + sessionId: "anthropic-regular-replay-live", + maxTokens: 64, + temperature: 0, + }, + "anthropic regular text replay live synthetic transcript", + ANTHROPIC_TIMEOUT_MS, + ); + + const text = extractAssistantText(response); + logLiveCache(`anthropic regular replay live result=${JSON.stringify(text)}`); + expect(text.trim().length).toBeGreaterThan(0); + }, + 6 * 60_000, + ); + + it( + "accepts omitted-reasoning placeholder assistant replay history", + async () => { + const { apiKey, model } = buildLiveAnthropicModel(); + const messages: Message[] = [ + { + role: "user", + content: "Remember that the previous assistant reasoning was omitted.", + timestamp: Date.now(), + }, + buildAssistantMessageWithZeroUsage({ + model: { api: model.api, provider: model.provider, id: model.id }, + content: [{ type: "text", text: OMITTED_ASSISTANT_REASONING_TEXT }], + stopReason: "stop", + }), + { + role: "user", + content: "Reply with exactly OK if this placeholder replay history is valid.", + timestamp: Date.now(), + }, + ]; + + logLiveCache(`anthropic omitted-reasoning replay live model=${model.provider}/${model.id}`); + const response = await completeSimpleWithLiveTimeout( + model, + { messages }, + { + apiKey, + cacheRetention: "none", + sessionId: "anthropic-omitted-reasoning-replay-live", + maxTokens: 64, + temperature: 0, + }, + "anthropic omitted reasoning replay live synthetic transcript", + ANTHROPIC_TIMEOUT_MS, + ); + + const text = extractAssistantText(response); + logLiveCache(`anthropic omitted-reasoning replay live result=${JSON.stringify(text)}`); + expect(text.trim().length).toBeGreaterThan(0); + }, + 6 * 60_000, + ); + it( "preserves toolCall replay history that Anthropic accepts end-to-end", async () => { diff --git a/src/agents/pi-embedded-runner.sanitize-session-history.test.ts b/src/agents/pi-embedded-runner.sanitize-session-history.test.ts index cc8fb530b5f..cf8745004e9 100644 --- a/src/agents/pi-embedded-runner.sanitize-session-history.test.ts +++ b/src/agents/pi-embedded-runner.sanitize-session-history.test.ts @@ -16,6 +16,7 @@ import { TEST_SESSION_ID, } from "./pi-embedded-runner.sanitize-session-history.test-harness.js"; import { validateReplayTurns } from "./pi-embedded-runner/replay-history.js"; +import { OMITTED_ASSISTANT_REASONING_TEXT } from "./pi-embedded-runner/thinking.js"; import { castAgentMessage, castAgentMessages } from "./test-helpers/agent-message-fixtures.js"; import { extractToolCallsFromAssistant } from "./tool-call-id.js"; import type { TranscriptPolicy } from "./transcript-policy.js"; @@ -1176,6 +1177,92 @@ describe("sanitizeSessionHistory", () => { ]); }); + it("keeps regular latest Anthropic thinking replay while preserving older stripped turns", async () => { + setNonGoogleModelApi(); + + const messages = castAgentMessages([ + makeUserMessage("first"), + makeAssistantMessage([ + { + type: "thinking", + thinking: "old private reasoning", + thinkingSignature: "sig_old", + }, + ]), + makeUserMessage("second"), + makeAssistantMessage([ + { + type: "thinking", + thinking: "latest private reasoning", + thinkingSignature: "sig_latest", + }, + { type: "text", text: "latest visible answer" }, + ]), + ]); + + const result = await sanitizeAnthropicHistory({ + messages, + modelId: "claude-3-7-sonnet-20250219", + }); + + expect((result[1] as Extract).content).toEqual([ + { type: "text", text: OMITTED_ASSISTANT_REASONING_TEXT }, + ]); + expect((result[3] as Extract).content).toEqual([ + { + type: "thinking", + thinking: "latest private reasoning", + thinkingSignature: "sig_latest", + }, + { type: "text", text: "latest visible answer" }, + ]); + }); + + it.each([ + { + provider: "anthropic", + modelApi: "anthropic-messages", + label: "anthropic", + }, + { + provider: "amazon-bedrock", + modelApi: "bedrock-converse-stream", + label: "bedrock", + }, + ])( + "preserves older stripped thinking-only assistant turns for $label replay", + async ({ provider, modelApi }) => { + setNonGoogleModelApi(); + + const messages = castAgentMessages([ + makeUserMessage("first"), + makeAssistantMessage([ + { + type: "thinking", + thinking: "old private reasoning", + thinkingSignature: "sig_old", + }, + ]), + makeUserMessage("second"), + makeAssistantMessage([{ type: "text", text: "latest visible answer" }]), + ]); + + const result = await sanitizeAnthropicHistory({ + provider, + modelApi, + messages, + modelId: "claude-3-7-sonnet-20250219", + }); + + expect((result[1] as Extract).content).toEqual([ + { type: "text", text: OMITTED_ASSISTANT_REASONING_TEXT }, + ]); + expect((result[3] as Extract).content).toEqual([ + { type: "text", text: "latest visible answer" }, + ]); + }, + ); + it("uses immutable thinking replay for anthropic-compatible providers when policy preserves signatures", async () => { setNonGoogleModelApi(); diff --git a/src/agents/pi-embedded-runner/thinking.test.ts b/src/agents/pi-embedded-runner/thinking.test.ts index c02f826bb59..7598145bf03 100644 --- a/src/agents/pi-embedded-runner/thinking.test.ts +++ b/src/agents/pi-embedded-runner/thinking.test.ts @@ -3,6 +3,7 @@ import { createAssistantMessageEventStream } from "@mariozechner/pi-ai"; import { describe, expect, it } from "vitest"; import { castAgentMessage, castAgentMessages } from "../test-helpers/agent-message-fixtures.js"; import { + OMITTED_ASSISTANT_REASONING_TEXT, assessLastAssistantMessage, dropThinkingBlocks, isAssistantMessageWithContent, @@ -103,6 +104,56 @@ describe("dropThinkingBlocks", () => { { type: "text", text: "latest text" }, ]); }); + + it("uses non-empty omitted-reasoning text when an older assistant turn is thinking-only", () => { + const messages: AgentMessage[] = [ + castAgentMessage({ role: "user", content: "first" }), + castAgentMessage({ + role: "assistant", + content: [{ type: "thinking", thinking: "old", thinkingSignature: "sig_old" }], + }), + castAgentMessage({ role: "user", content: "second" }), + castAgentMessage({ + role: "assistant", + content: [ + { type: "thinking", thinking: "latest", thinkingSignature: "sig_latest" }, + { type: "text", text: "latest text" }, + ], + }), + ]; + + const result = dropThinkingBlocks(messages); + const oldAssistant = result[1] as Extract; + const latestAssistant = result[3] as Extract; + const originalLatestAssistant = messages[3] as Extract; + + expect(oldAssistant.content).toEqual([ + { type: "text", text: OMITTED_ASSISTANT_REASONING_TEXT }, + ]); + expect(latestAssistant.content).toEqual(originalLatestAssistant.content); + }); + + it("uses non-empty omitted-reasoning text when an older assistant turn is redacted-thinking-only", () => { + const messages: AgentMessage[] = [ + castAgentMessage({ role: "user", content: "first" }), + castAgentMessage({ + role: "assistant", + content: [{ type: "redacted_thinking", data: "opaque" }], + }), + castAgentMessage({ role: "user", content: "second" }), + castAgentMessage({ + role: "assistant", + content: [{ type: "text", text: "latest text" }], + }), + ]; + + const result = dropThinkingBlocks(messages); + const oldAssistant = result[1] as Extract; + + expect(oldAssistant.content).toEqual([ + { type: "text", text: OMITTED_ASSISTANT_REASONING_TEXT }, + ]); + }); }); describe("sanitizeThinkingForRecovery", () => { @@ -191,11 +242,13 @@ describe("wrapAnthropicStreamWithRecovery", () => { "thinking or redacted_thinking blocks in the latest assistant message cannot be modified", ); - it("retries once when the request is rejected before streaming", async () => { + it("retries once with omitted-reasoning text when the request is rejected before streaming", async () => { let callCount = 0; + const contexts: Array<{ messages?: AgentMessage[] }> = []; const wrapped = wrapAnthropicStreamWithRecovery( - (() => { + ((_model, context) => { callCount += 1; + contexts.push(context as { messages?: AgentMessage[] }); return Promise.reject(anthropicThinkingError); }) as Parameters[0], { id: "test-session" }, @@ -216,6 +269,44 @@ describe("wrapAnthropicStreamWithRecovery", () => { ), ).rejects.toBe(anthropicThinkingError); expect(callCount).toBe(2); + expect(contexts[1]?.messages?.[0]).toMatchObject({ + role: "assistant", + content: [{ type: "text", text: OMITTED_ASSISTANT_REASONING_TEXT }], + }); + }); + + it("retries with visible assistant text when stripping thinking leaves content", async () => { + const contexts: Array<{ messages?: AgentMessage[] }> = []; + const wrapped = wrapAnthropicStreamWithRecovery( + ((_model, context) => { + contexts.push(context as { messages?: AgentMessage[] }); + return Promise.reject(anthropicThinkingError); + }) as Parameters[0], + { id: "test-session" }, + ); + + await expect( + wrapped( + {} as never, + { + messages: castAgentMessages([ + { + role: "assistant", + content: [ + { type: "thinking", thinking: "secret", thinkingSignature: "sig" }, + { type: "text", text: "visible answer" }, + ], + }, + ]), + } as never, + {} as never, + ), + ).rejects.toBe(anthropicThinkingError); + + expect(contexts[1]?.messages?.[0]).toMatchObject({ + role: "assistant", + content: [{ type: "text", text: "visible answer" }], + }); }); it("does not retry when the stream fails after yielding a chunk", async () => { diff --git a/src/agents/pi-embedded-runner/thinking.ts b/src/agents/pi-embedded-runner/thinking.ts index 037b4675ae4..4281dcff446 100644 --- a/src/agents/pi-embedded-runner/thinking.ts +++ b/src/agents/pi-embedded-runner/thinking.ts @@ -9,6 +9,7 @@ type RecoveryAssessment = "valid" | "incomplete-thinking" | "incomplete-text"; type RecoverySessionMeta = { id: string; recoveredAnthropicThinking?: boolean }; const THINKING_BLOCK_ERROR_PATTERN = /thinking or redacted_thinking blocks?.* cannot be modified/i; +export const OMITTED_ASSISTANT_REASONING_TEXT = "[assistant reasoning omitted]"; export function isAssistantMessageWithContent(message: AgentMessage): message is AssistantMessage { return ( @@ -55,6 +56,11 @@ function hasMeaningfulText(block: AssistantContentBlock): boolean { : false; } +function buildOmittedAssistantReasoningContent(): AssistantContentBlock[] { + // Provider converters drop blank text blocks; keep this neutral text non-empty so the assistant turn survives replay. + return [{ type: "text", text: OMITTED_ASSISTANT_REASONING_TEXT } as AssistantContentBlock]; +} + /** * Strip `type: "thinking"` and `type: "redacted_thinking"` content blocks from * all assistant messages except the latest one. @@ -63,8 +69,8 @@ function hasMeaningfulText(block: AssistantContentBlock): boolean { * providers that require replay signatures can continue the conversation. * * If a non-latest assistant message becomes empty after stripping, it is - * replaced with a synthetic `{ type: "text", text: "" }` block to preserve - * turn structure (some providers require strict user/assistant alternation). + * replaced with a synthetic non-empty text block to preserve turn structure + * through provider adapters that filter blank text blocks. * * Returns the original array reference when nothing was changed (callers can * use reference equality to skip downstream work). @@ -104,9 +110,7 @@ export function dropThinkingBlocks(messages: AgentMessage[]): AgentMessage[] { out.push(msg); continue; } - // Preserve the assistant turn even if all blocks were thinking-only. - const content = - nextContent.length > 0 ? nextContent : [{ type: "text", text: "" } as AssistantContentBlock]; + const content = nextContent.length > 0 ? nextContent : buildOmittedAssistantReasoningContent(); out.push({ ...msg, content }); } return touched ? out : messages; @@ -130,10 +134,7 @@ function stripAllThinkingBlocks(messages: AgentMessage[]): AgentMessage[] { touched = true; out.push({ ...message, - content: - nextContent.length > 0 - ? nextContent - : ([{ type: "text", text: "" }] as AssistantContentBlock[]), + content: nextContent.length > 0 ? nextContent : buildOmittedAssistantReasoningContent(), }); } return touched ? out : messages;