mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 08:00:42 +00:00
fix: preserve omitted thinking replay turns
This commit is contained in:
@@ -80,6 +80,9 @@ Docs: https://docs.openclaw.ai
|
||||
- Agents/Bedrock: prevent empty assistant stream-error turns from poisoning
|
||||
Converse replay by persisting, repairing, and replaying a non-empty fallback
|
||||
block. Fixes #71572. (#71627) Thanks @openperf.
|
||||
- Agents/Anthropic/Bedrock: preserve stripped thinking-only assistant replay
|
||||
turns with non-empty omitted-reasoning text so provider adapters keep strict
|
||||
user/assistant turn shape. Thanks @wujiaming88.
|
||||
- Browser/CDP: make readiness diagnostics use the same discovery-first fallback as reachability for bare `ws://` Browserless and Browserbase CDP URLs. Fixes #69532.
|
||||
- Browser/CDP: explain that loopback Browserless or other externally managed CDP services need `attachOnly: true` and matching Browserless `EXTERNAL` endpoint when reporting local port ownership conflicts, and fall back to the configured bare WebSocket root when a discovered Browserless endpoint rejects CDP. Fixes #49815.
|
||||
- Gateway/reload: preserve indefinite `gateway.reload.deferralTimeoutMs: 0` semantics for channel hot reload deferrals so active agent runs are not interrupted by a forced channel restart. (#71637) Thanks @Poo-Squirry.
|
||||
|
||||
@@ -133,6 +133,9 @@ external end-user instructions.
|
||||
|
||||
- Tool result pairing repair and synthetic tool results.
|
||||
- Turn validation (merge consecutive user turns to satisfy strict alternation).
|
||||
- Older thinking-only assistant turns that must be stripped are replaced with
|
||||
non-empty omitted-reasoning text so provider adapters do not drop the replay
|
||||
turn.
|
||||
|
||||
**Amazon Bedrock (Converse API)**
|
||||
|
||||
@@ -140,6 +143,8 @@ external end-user instructions.
|
||||
before replay. Bedrock Converse rejects assistant messages with `content: []`, so
|
||||
persisted assistant turns with `stopReason: "error"` and empty content are also
|
||||
repaired on disk before load.
|
||||
- Older thinking-only assistant turns that must be stripped are replaced with
|
||||
non-empty omitted-reasoning text so the Converse replay keeps strict turn shape.
|
||||
- Replay filters OpenClaw delivery-mirror and gateway-injected assistant turns.
|
||||
- Image sanitization applies through the global rule.
|
||||
|
||||
|
||||
@@ -7,6 +7,7 @@ import {
|
||||
} from "./live-cache-test-support.js";
|
||||
import { isLiveTestEnabled } from "./live-test-helpers.js";
|
||||
import { wrapStreamFnSanitizeMalformedToolCalls } from "./pi-embedded-runner/run/attempt.tool-call-normalization.js";
|
||||
import { OMITTED_ASSISTANT_REASONING_TEXT } from "./pi-embedded-runner/thinking.js";
|
||||
import { buildAssistantMessageWithZeroUsage } from "./stream-message-shared.js";
|
||||
|
||||
const ANTHROPIC_LIVE = isLiveTestEnabled(["ANTHROPIC_LIVE_TEST"]);
|
||||
@@ -33,7 +34,7 @@ function buildLiveAnthropicModel(): {
|
||||
name: modelId,
|
||||
api: "anthropic-messages" as const,
|
||||
provider: "anthropic",
|
||||
baseUrl: "https://api.anthropic.com/v1",
|
||||
baseUrl: "https://api.anthropic.com",
|
||||
reasoning: true,
|
||||
input: ["text"] as const,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
@@ -44,6 +45,94 @@ function buildLiveAnthropicModel(): {
|
||||
}
|
||||
|
||||
describeLive("pi embedded anthropic replay sanitization (live)", () => {
|
||||
it(
|
||||
"accepts regular text-only assistant replay history",
|
||||
async () => {
|
||||
const { apiKey, model } = buildLiveAnthropicModel();
|
||||
const messages: Message[] = [
|
||||
{
|
||||
role: "user",
|
||||
content: "Remember the marker REGULAR_ANTHROPIC_REPLAY_OK.",
|
||||
timestamp: Date.now(),
|
||||
},
|
||||
buildAssistantMessageWithZeroUsage({
|
||||
model: { api: model.api, provider: model.provider, id: model.id },
|
||||
content: [{ type: "text", text: "I remember REGULAR_ANTHROPIC_REPLAY_OK." }],
|
||||
stopReason: "stop",
|
||||
}),
|
||||
{
|
||||
role: "user",
|
||||
content: "Reply with a short confirmation if this replay history is valid.",
|
||||
timestamp: Date.now(),
|
||||
},
|
||||
];
|
||||
|
||||
logLiveCache(`anthropic regular replay live model=${model.provider}/${model.id}`);
|
||||
const response = await completeSimpleWithLiveTimeout(
|
||||
model,
|
||||
{ messages },
|
||||
{
|
||||
apiKey,
|
||||
cacheRetention: "none",
|
||||
sessionId: "anthropic-regular-replay-live",
|
||||
maxTokens: 64,
|
||||
temperature: 0,
|
||||
},
|
||||
"anthropic regular text replay live synthetic transcript",
|
||||
ANTHROPIC_TIMEOUT_MS,
|
||||
);
|
||||
|
||||
const text = extractAssistantText(response);
|
||||
logLiveCache(`anthropic regular replay live result=${JSON.stringify(text)}`);
|
||||
expect(text.trim().length).toBeGreaterThan(0);
|
||||
},
|
||||
6 * 60_000,
|
||||
);
|
||||
|
||||
it(
|
||||
"accepts omitted-reasoning placeholder assistant replay history",
|
||||
async () => {
|
||||
const { apiKey, model } = buildLiveAnthropicModel();
|
||||
const messages: Message[] = [
|
||||
{
|
||||
role: "user",
|
||||
content: "Remember that the previous assistant reasoning was omitted.",
|
||||
timestamp: Date.now(),
|
||||
},
|
||||
buildAssistantMessageWithZeroUsage({
|
||||
model: { api: model.api, provider: model.provider, id: model.id },
|
||||
content: [{ type: "text", text: OMITTED_ASSISTANT_REASONING_TEXT }],
|
||||
stopReason: "stop",
|
||||
}),
|
||||
{
|
||||
role: "user",
|
||||
content: "Reply with exactly OK if this placeholder replay history is valid.",
|
||||
timestamp: Date.now(),
|
||||
},
|
||||
];
|
||||
|
||||
logLiveCache(`anthropic omitted-reasoning replay live model=${model.provider}/${model.id}`);
|
||||
const response = await completeSimpleWithLiveTimeout(
|
||||
model,
|
||||
{ messages },
|
||||
{
|
||||
apiKey,
|
||||
cacheRetention: "none",
|
||||
sessionId: "anthropic-omitted-reasoning-replay-live",
|
||||
maxTokens: 64,
|
||||
temperature: 0,
|
||||
},
|
||||
"anthropic omitted reasoning replay live synthetic transcript",
|
||||
ANTHROPIC_TIMEOUT_MS,
|
||||
);
|
||||
|
||||
const text = extractAssistantText(response);
|
||||
logLiveCache(`anthropic omitted-reasoning replay live result=${JSON.stringify(text)}`);
|
||||
expect(text.trim().length).toBeGreaterThan(0);
|
||||
},
|
||||
6 * 60_000,
|
||||
);
|
||||
|
||||
it(
|
||||
"preserves toolCall replay history that Anthropic accepts end-to-end",
|
||||
async () => {
|
||||
|
||||
@@ -16,6 +16,7 @@ import {
|
||||
TEST_SESSION_ID,
|
||||
} from "./pi-embedded-runner.sanitize-session-history.test-harness.js";
|
||||
import { validateReplayTurns } from "./pi-embedded-runner/replay-history.js";
|
||||
import { OMITTED_ASSISTANT_REASONING_TEXT } from "./pi-embedded-runner/thinking.js";
|
||||
import { castAgentMessage, castAgentMessages } from "./test-helpers/agent-message-fixtures.js";
|
||||
import { extractToolCallsFromAssistant } from "./tool-call-id.js";
|
||||
import type { TranscriptPolicy } from "./transcript-policy.js";
|
||||
@@ -1176,6 +1177,92 @@ describe("sanitizeSessionHistory", () => {
|
||||
]);
|
||||
});
|
||||
|
||||
it("keeps regular latest Anthropic thinking replay while preserving older stripped turns", async () => {
|
||||
setNonGoogleModelApi();
|
||||
|
||||
const messages = castAgentMessages([
|
||||
makeUserMessage("first"),
|
||||
makeAssistantMessage([
|
||||
{
|
||||
type: "thinking",
|
||||
thinking: "old private reasoning",
|
||||
thinkingSignature: "sig_old",
|
||||
},
|
||||
]),
|
||||
makeUserMessage("second"),
|
||||
makeAssistantMessage([
|
||||
{
|
||||
type: "thinking",
|
||||
thinking: "latest private reasoning",
|
||||
thinkingSignature: "sig_latest",
|
||||
},
|
||||
{ type: "text", text: "latest visible answer" },
|
||||
]),
|
||||
]);
|
||||
|
||||
const result = await sanitizeAnthropicHistory({
|
||||
messages,
|
||||
modelId: "claude-3-7-sonnet-20250219",
|
||||
});
|
||||
|
||||
expect((result[1] as Extract<AgentMessage, { role: "assistant" }>).content).toEqual([
|
||||
{ type: "text", text: OMITTED_ASSISTANT_REASONING_TEXT },
|
||||
]);
|
||||
expect((result[3] as Extract<AgentMessage, { role: "assistant" }>).content).toEqual([
|
||||
{
|
||||
type: "thinking",
|
||||
thinking: "latest private reasoning",
|
||||
thinkingSignature: "sig_latest",
|
||||
},
|
||||
{ type: "text", text: "latest visible answer" },
|
||||
]);
|
||||
});
|
||||
|
||||
it.each([
|
||||
{
|
||||
provider: "anthropic",
|
||||
modelApi: "anthropic-messages",
|
||||
label: "anthropic",
|
||||
},
|
||||
{
|
||||
provider: "amazon-bedrock",
|
||||
modelApi: "bedrock-converse-stream",
|
||||
label: "bedrock",
|
||||
},
|
||||
])(
|
||||
"preserves older stripped thinking-only assistant turns for $label replay",
|
||||
async ({ provider, modelApi }) => {
|
||||
setNonGoogleModelApi();
|
||||
|
||||
const messages = castAgentMessages([
|
||||
makeUserMessage("first"),
|
||||
makeAssistantMessage([
|
||||
{
|
||||
type: "thinking",
|
||||
thinking: "old private reasoning",
|
||||
thinkingSignature: "sig_old",
|
||||
},
|
||||
]),
|
||||
makeUserMessage("second"),
|
||||
makeAssistantMessage([{ type: "text", text: "latest visible answer" }]),
|
||||
]);
|
||||
|
||||
const result = await sanitizeAnthropicHistory({
|
||||
provider,
|
||||
modelApi,
|
||||
messages,
|
||||
modelId: "claude-3-7-sonnet-20250219",
|
||||
});
|
||||
|
||||
expect((result[1] as Extract<AgentMessage, { role: "assistant" }>).content).toEqual([
|
||||
{ type: "text", text: OMITTED_ASSISTANT_REASONING_TEXT },
|
||||
]);
|
||||
expect((result[3] as Extract<AgentMessage, { role: "assistant" }>).content).toEqual([
|
||||
{ type: "text", text: "latest visible answer" },
|
||||
]);
|
||||
},
|
||||
);
|
||||
|
||||
it("uses immutable thinking replay for anthropic-compatible providers when policy preserves signatures", async () => {
|
||||
setNonGoogleModelApi();
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@ import { createAssistantMessageEventStream } from "@mariozechner/pi-ai";
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { castAgentMessage, castAgentMessages } from "../test-helpers/agent-message-fixtures.js";
|
||||
import {
|
||||
OMITTED_ASSISTANT_REASONING_TEXT,
|
||||
assessLastAssistantMessage,
|
||||
dropThinkingBlocks,
|
||||
isAssistantMessageWithContent,
|
||||
@@ -103,6 +104,56 @@ describe("dropThinkingBlocks", () => {
|
||||
{ type: "text", text: "latest text" },
|
||||
]);
|
||||
});
|
||||
|
||||
it("uses non-empty omitted-reasoning text when an older assistant turn is thinking-only", () => {
|
||||
const messages: AgentMessage[] = [
|
||||
castAgentMessage({ role: "user", content: "first" }),
|
||||
castAgentMessage({
|
||||
role: "assistant",
|
||||
content: [{ type: "thinking", thinking: "old", thinkingSignature: "sig_old" }],
|
||||
}),
|
||||
castAgentMessage({ role: "user", content: "second" }),
|
||||
castAgentMessage({
|
||||
role: "assistant",
|
||||
content: [
|
||||
{ type: "thinking", thinking: "latest", thinkingSignature: "sig_latest" },
|
||||
{ type: "text", text: "latest text" },
|
||||
],
|
||||
}),
|
||||
];
|
||||
|
||||
const result = dropThinkingBlocks(messages);
|
||||
const oldAssistant = result[1] as Extract<AgentMessage, { role: "assistant" }>;
|
||||
const latestAssistant = result[3] as Extract<AgentMessage, { role: "assistant" }>;
|
||||
const originalLatestAssistant = messages[3] as Extract<AgentMessage, { role: "assistant" }>;
|
||||
|
||||
expect(oldAssistant.content).toEqual([
|
||||
{ type: "text", text: OMITTED_ASSISTANT_REASONING_TEXT },
|
||||
]);
|
||||
expect(latestAssistant.content).toEqual(originalLatestAssistant.content);
|
||||
});
|
||||
|
||||
it("uses non-empty omitted-reasoning text when an older assistant turn is redacted-thinking-only", () => {
|
||||
const messages: AgentMessage[] = [
|
||||
castAgentMessage({ role: "user", content: "first" }),
|
||||
castAgentMessage({
|
||||
role: "assistant",
|
||||
content: [{ type: "redacted_thinking", data: "opaque" }],
|
||||
}),
|
||||
castAgentMessage({ role: "user", content: "second" }),
|
||||
castAgentMessage({
|
||||
role: "assistant",
|
||||
content: [{ type: "text", text: "latest text" }],
|
||||
}),
|
||||
];
|
||||
|
||||
const result = dropThinkingBlocks(messages);
|
||||
const oldAssistant = result[1] as Extract<AgentMessage, { role: "assistant" }>;
|
||||
|
||||
expect(oldAssistant.content).toEqual([
|
||||
{ type: "text", text: OMITTED_ASSISTANT_REASONING_TEXT },
|
||||
]);
|
||||
});
|
||||
});
|
||||
|
||||
describe("sanitizeThinkingForRecovery", () => {
|
||||
@@ -191,11 +242,13 @@ describe("wrapAnthropicStreamWithRecovery", () => {
|
||||
"thinking or redacted_thinking blocks in the latest assistant message cannot be modified",
|
||||
);
|
||||
|
||||
it("retries once when the request is rejected before streaming", async () => {
|
||||
it("retries once with omitted-reasoning text when the request is rejected before streaming", async () => {
|
||||
let callCount = 0;
|
||||
const contexts: Array<{ messages?: AgentMessage[] }> = [];
|
||||
const wrapped = wrapAnthropicStreamWithRecovery(
|
||||
(() => {
|
||||
((_model, context) => {
|
||||
callCount += 1;
|
||||
contexts.push(context as { messages?: AgentMessage[] });
|
||||
return Promise.reject(anthropicThinkingError);
|
||||
}) as Parameters<typeof wrapAnthropicStreamWithRecovery>[0],
|
||||
{ id: "test-session" },
|
||||
@@ -216,6 +269,44 @@ describe("wrapAnthropicStreamWithRecovery", () => {
|
||||
),
|
||||
).rejects.toBe(anthropicThinkingError);
|
||||
expect(callCount).toBe(2);
|
||||
expect(contexts[1]?.messages?.[0]).toMatchObject({
|
||||
role: "assistant",
|
||||
content: [{ type: "text", text: OMITTED_ASSISTANT_REASONING_TEXT }],
|
||||
});
|
||||
});
|
||||
|
||||
it("retries with visible assistant text when stripping thinking leaves content", async () => {
|
||||
const contexts: Array<{ messages?: AgentMessage[] }> = [];
|
||||
const wrapped = wrapAnthropicStreamWithRecovery(
|
||||
((_model, context) => {
|
||||
contexts.push(context as { messages?: AgentMessage[] });
|
||||
return Promise.reject(anthropicThinkingError);
|
||||
}) as Parameters<typeof wrapAnthropicStreamWithRecovery>[0],
|
||||
{ id: "test-session" },
|
||||
);
|
||||
|
||||
await expect(
|
||||
wrapped(
|
||||
{} as never,
|
||||
{
|
||||
messages: castAgentMessages([
|
||||
{
|
||||
role: "assistant",
|
||||
content: [
|
||||
{ type: "thinking", thinking: "secret", thinkingSignature: "sig" },
|
||||
{ type: "text", text: "visible answer" },
|
||||
],
|
||||
},
|
||||
]),
|
||||
} as never,
|
||||
{} as never,
|
||||
),
|
||||
).rejects.toBe(anthropicThinkingError);
|
||||
|
||||
expect(contexts[1]?.messages?.[0]).toMatchObject({
|
||||
role: "assistant",
|
||||
content: [{ type: "text", text: "visible answer" }],
|
||||
});
|
||||
});
|
||||
|
||||
it("does not retry when the stream fails after yielding a chunk", async () => {
|
||||
|
||||
@@ -9,6 +9,7 @@ type RecoveryAssessment = "valid" | "incomplete-thinking" | "incomplete-text";
|
||||
type RecoverySessionMeta = { id: string; recoveredAnthropicThinking?: boolean };
|
||||
|
||||
const THINKING_BLOCK_ERROR_PATTERN = /thinking or redacted_thinking blocks?.* cannot be modified/i;
|
||||
export const OMITTED_ASSISTANT_REASONING_TEXT = "[assistant reasoning omitted]";
|
||||
|
||||
export function isAssistantMessageWithContent(message: AgentMessage): message is AssistantMessage {
|
||||
return (
|
||||
@@ -55,6 +56,11 @@ function hasMeaningfulText(block: AssistantContentBlock): boolean {
|
||||
: false;
|
||||
}
|
||||
|
||||
function buildOmittedAssistantReasoningContent(): AssistantContentBlock[] {
|
||||
// Provider converters drop blank text blocks; keep this neutral text non-empty so the assistant turn survives replay.
|
||||
return [{ type: "text", text: OMITTED_ASSISTANT_REASONING_TEXT } as AssistantContentBlock];
|
||||
}
|
||||
|
||||
/**
|
||||
* Strip `type: "thinking"` and `type: "redacted_thinking"` content blocks from
|
||||
* all assistant messages except the latest one.
|
||||
@@ -63,8 +69,8 @@ function hasMeaningfulText(block: AssistantContentBlock): boolean {
|
||||
* providers that require replay signatures can continue the conversation.
|
||||
*
|
||||
* If a non-latest assistant message becomes empty after stripping, it is
|
||||
* replaced with a synthetic `{ type: "text", text: "" }` block to preserve
|
||||
* turn structure (some providers require strict user/assistant alternation).
|
||||
* replaced with a synthetic non-empty text block to preserve turn structure
|
||||
* through provider adapters that filter blank text blocks.
|
||||
*
|
||||
* Returns the original array reference when nothing was changed (callers can
|
||||
* use reference equality to skip downstream work).
|
||||
@@ -104,9 +110,7 @@ export function dropThinkingBlocks(messages: AgentMessage[]): AgentMessage[] {
|
||||
out.push(msg);
|
||||
continue;
|
||||
}
|
||||
// Preserve the assistant turn even if all blocks were thinking-only.
|
||||
const content =
|
||||
nextContent.length > 0 ? nextContent : [{ type: "text", text: "" } as AssistantContentBlock];
|
||||
const content = nextContent.length > 0 ? nextContent : buildOmittedAssistantReasoningContent();
|
||||
out.push({ ...msg, content });
|
||||
}
|
||||
return touched ? out : messages;
|
||||
@@ -130,10 +134,7 @@ function stripAllThinkingBlocks(messages: AgentMessage[]): AgentMessage[] {
|
||||
touched = true;
|
||||
out.push({
|
||||
...message,
|
||||
content:
|
||||
nextContent.length > 0
|
||||
? nextContent
|
||||
: ([{ type: "text", text: "" }] as AssistantContentBlock[]),
|
||||
content: nextContent.length > 0 ? nextContent : buildOmittedAssistantReasoningContent(),
|
||||
});
|
||||
}
|
||||
return touched ? out : messages;
|
||||
|
||||
Reference in New Issue
Block a user