fix(replay-history): drop trailing stream-error placeholder before provider send (#77287)

normalizeAssistantReplayContent rewrites empty assistant error turns into
a STREAM_ERROR_FALLBACK_TEXT sentinel to satisfy Bedrock Converse's
non-empty ContentBlock requirement for non-trailing turns. When that
sentinel is the trailing entry, prefill-strict providers reject the
request with "400 This model does not support assistant message prefill.
The conversation must end with a user message." and the session cannot
recover on its own.

Add a post-loop tail trim that drops trailing assistant turns whose
content is empty with stopReason "error" or zero-usage empty stop, or
carries only the sentinel text with the same synthetic provenance. A real
model reply whose content happens to equal the sentinel string is
preserved by requiring zero usage or stopReason "error" before dropping.
The trim catches both the in-memory rewrite shape and the sentinel
persisted to disk by session-file-repair.

Tests:
- pnpm test src/agents/pi-embedded-runner/replay-history.test.ts
- pnpm exec oxfmt --check --threads=1 CHANGELOG.md src/agents/pi-embedded-runner/replay-history.ts
  src/agents/pi-embedded-runner/replay-history.test.ts
- pnpm check:changed

Refs #77228
This commit is contained in:
Chunyue Wang
2026-05-05 14:14:01 +08:00
committed by GitHub
parent cd66854b66
commit 24bd0b212f
3 changed files with 188 additions and 4 deletions

View File

@@ -305,6 +305,7 @@ Docs: https://docs.openclaw.ai
- Browser/SSRF: enforce the existing current-tab URL navigation policy before tab-scoped debug, export, and read routes (console, page errors, network requests, trace start/stop, response body, screenshot, snapshot, storage, etc.) collect from an already-selected tab, so blocked tabs return a policy error instead of being read first and redacted only at response time. (#75731) Thanks @eleqtrizit.
- Security/Windows: route the `.cmd`/`.bat` process wrapper through the shared Windows install-root resolver instead of `process.env.ComSpec`, so workspace dotenv-blocked `SystemRoot`/`WINDIR` overrides and unsafe values like UNC paths or path-lists cannot redirect `cmd.exe` selection on Windows. (#77472) Thanks @drobison00.
- Agents/bootstrap: honor `BOOTSTRAP.md` content injected by `agent:bootstrap` hooks when deciding whether bootstrap is pending, so hook-provided required setup instructions are included in the system prompt. (#77501) Thanks @ificator.
- Agents/replay-history: drop trailing assistant turns whose content is empty or carries only the stream-error sentinel before sending the transcript to the provider, so prefill-strict providers (such as github-copilot/claude-opus-4.6) no longer reject the request with `400 The conversation must end with a user message` after a session whose last turn errored before producing content. Refs #77228. (#77287) Thanks @openperf.
## 2026.5.3-1

View File

@@ -58,12 +58,15 @@ function openclawTranscriptAssistant(model: "delivery-mirror" | "gateway-injecte
}
describe("normalizeAssistantReplayContent", () => {
it("converts assistant content: [] to a non-empty sentinel text block when stopReason is error", () => {
const messages = [userMessage("hello"), bedrockAssistant([], "error")];
it("converts mid-turn assistant content: [] to a non-empty sentinel text block when stopReason is error", () => {
const messages = [userMessage("hello"), bedrockAssistant([], "error"), userMessage("retry")];
const out = normalizeAssistantReplayContent(messages);
expect(out).not.toBe(messages);
const repaired = out[1] as AgentMessage & { content: { type: string; text: string }[] };
expect(repaired.content).toEqual([{ type: "text", text: FALLBACK_TEXT }]);
// Trailing user is preserved so request still ends with user.
expect(out).toHaveLength(3);
expect((out[2] as { role: string }).role).toBe("user");
});
it("drops blank user text messages from replay", () => {
@@ -108,9 +111,9 @@ describe("normalizeAssistantReplayContent", () => {
expect(out[1]).toBe(silentStop);
});
it("converts zero-usage empty stop turns to a replay sentinel", () => {
it("converts mid-turn zero-usage empty stop turns to a replay sentinel", () => {
const falseSuccessStop = bedrockAssistant([], "stop");
const messages = [userMessage("hello"), falseSuccessStop];
const messages = [userMessage("hello"), falseSuccessStop, userMessage("retry")];
const out = normalizeAssistantReplayContent(messages);
expect(out).not.toBe(messages);
const repaired = out[1] as AgentMessage & { content: { type: string; text: string }[] };
@@ -183,4 +186,117 @@ describe("normalizeAssistantReplayContent", () => {
const out = normalizeAssistantReplayContent(messages);
expect(out).toBe(messages);
});
it("drops a trailing assistant turn whose content: [] would have been rewritten to the sentinel (#77228)", () => {
// The sentinel was synthesized to satisfy Bedrock's non-empty-content
// rule for *non-trailing* error turns. As the trailing message it would
// make prefill-strict providers (e.g. github-copilot/claude-opus-4.6)
// 400 with "conversation must end with a user message". The original
// turn carried content:[] and zero usage — drop is lossless.
const messages = [userMessage("hello"), bedrockAssistant([], "error")];
const out = normalizeAssistantReplayContent(messages);
expect(out).not.toBe(messages);
expect(out).toHaveLength(1);
expect(out[0]).toBe(messages[0]);
});
it("drops a trailing zero-usage empty stop assistant turn (#77228)", () => {
const falseSuccessStop = bedrockAssistant([], "stop");
const messages = [userMessage("hello"), falseSuccessStop];
const out = normalizeAssistantReplayContent(messages);
expect(out).toHaveLength(1);
expect(out[0]).toBe(messages[0]);
});
it("drops a trailing assistant turn that already carries the persisted sentinel content (#77228)", () => {
// Covers the case where session-file-repair persisted the sentinel to
// disk; on the next turn the loaded transcript ends with a non-empty
// assistant turn whose only content is the sentinel text. Provider
// request must still end with user.
const persistedSentinel = bedrockAssistant([{ type: "text", text: FALLBACK_TEXT }], "error");
const messages = [userMessage("hello"), persistedSentinel];
const out = normalizeAssistantReplayContent(messages);
expect(out).toHaveLength(1);
expect(out[0]).toBe(messages[0]);
});
it("drops several consecutive trailing sentinel/empty-error turns at the tail", () => {
const messages = [
userMessage("hi"),
bedrockAssistant([{ type: "text", text: "real" }]),
userMessage("again"),
bedrockAssistant([], "error"),
bedrockAssistant([{ type: "text", text: FALLBACK_TEXT }], "error"),
];
const out = normalizeAssistantReplayContent(messages);
expect(out).toHaveLength(3);
expect((out.at(-1) as { role: string }).role).toBe("user");
});
it("does not drop a trailing assistant turn that has real content", () => {
const realReply = bedrockAssistant([{ type: "text", text: "hello back" }], "stop", {
input: 1,
output: 1,
totalTokens: 2,
});
const messages = [userMessage("hi"), realReply];
const out = normalizeAssistantReplayContent(messages);
expect(out).toBe(messages);
expect(out).toHaveLength(2);
});
it("does not drop a trailing assistant turn with non-error empty content (toolUse / length)", () => {
// Boundary lock: only error/zero-usage-empty-stop and the sentinel
// shape are droppable. toolUse/length empty turns are real provider
// states and must be preserved on the wire.
const toolUse = bedrockAssistant([], "toolUse");
const messages = [userMessage("hi"), toolUse];
const out = normalizeAssistantReplayContent(messages);
expect(out).toBe(messages);
expect(out).toHaveLength(2);
});
it("preserves a trailing real model reply whose only content happens to be the sentinel text (clawsweeper review on #77287)", () => {
// Defensive boundary: even if a model legitimately replies with the
// exact sentinel string, the trim must require synthetic provenance
// (stopReason: "error" or zero-usage stop) before dropping. Without
// this guard the trim would silently delete a real reply on next
// replay.
const realReplyAsStop = bedrockAssistant([{ type: "text", text: FALLBACK_TEXT }], "stop", {
input: 1,
output: 1,
totalTokens: 2,
});
const messages = [userMessage("hi"), realReplyAsStop];
const out = normalizeAssistantReplayContent(messages);
expect(out).toBe(messages);
expect(out).toHaveLength(2);
expect((out[1] as { content: unknown[] }).content).toEqual([
{ type: "text", text: FALLBACK_TEXT },
]);
});
it("preserves a trailing turn whose sentinel content is paired with stopReason: toolUse (real provider state, not synthetic)", () => {
const toolUseSentinel = bedrockAssistant([{ type: "text", text: FALLBACK_TEXT }], "toolUse");
const messages = [userMessage("hi"), toolUseSentinel];
const out = normalizeAssistantReplayContent(messages);
expect(out).toBe(messages);
expect(out).toHaveLength(2);
});
it("still drops a trailing zero-usage stop turn whose content was already lifted to the sentinel block (post-rewrite shape)", () => {
// Confirms the sentinel-content branch still recognizes the post-rewrite
// shape produced by the in-memory rewrite earlier in the same loop:
// stopReason: "stop" + zero usage + sentinel content. Only the synthetic
// provenance (zero usage + stop) makes this droppable; a non-zero-usage
// version is preserved by the regression test above.
const persistedZeroUsageSentinel = bedrockAssistant(
[{ type: "text", text: FALLBACK_TEXT }],
"stop",
);
const messages = [userMessage("hi"), persistedZeroUsageSentinel];
const out = normalizeAssistantReplayContent(messages);
expect(out).toHaveLength(1);
expect(out[0]).toBe(messages[0]);
});
});

View File

@@ -396,9 +396,76 @@ export function normalizeAssistantReplayContent(messages: AgentMessage[]): Agent
}
out.push(message);
}
// Drop trailing stream-error / zero-usage-empty-stop placeholder turns. The
// sentinel was synthesized to satisfy Bedrock Converse's "ContentBlock must
// not be empty" rule for *non-trailing* error turns; when it is the trailing
// entry, prefill-strict providers (e.g. github-copilot/claude-opus-4.6 — the
// exact path reported in #77228) reject the request with
// `400 This model does not support assistant message prefill. The
// conversation must end with a user message.`. The original turn carried
// `content: []` and zero usage — there is no information to lose by
// dropping it. This trim runs after the main loop so it also catches a
// sentinel that was *persisted* to disk by an earlier session-file repair
// pass (matching the same content shape the loop above produces).
while (out.length > 0) {
const last = out[out.length - 1];
if (!isReplayDroppableTrailingAssistant(last)) {
break;
}
out.pop();
touched = true;
}
return touched ? out : messages;
}
function isReplayDroppableTrailingAssistant(message: AgentMessage | undefined): boolean {
if (!message || message.role !== "assistant") {
return false;
}
const content = (message as { content?: unknown }).content;
if (!Array.isArray(content)) {
return false;
}
if (content.length === 0) {
const stopReason = (message as { stopReason?: unknown }).stopReason;
return stopReason === "error" || isZeroUsageEmptyStopAssistantTurn(message);
}
// Sentinel-text content is the post-rewrite shape produced by either
// session-file-repair.rewriteAssistantEntryWithEmptyContent (always
// stopReason="error") or the in-memory rewrite earlier in this same
// normalizeAssistantReplayContent loop (preserves the original
// stopReason — "error" or zero-usage "stop"). Drop only when the trailing
// turn carries that synthetic provenance: without this guard, a real
// model reply that happens to consist of exactly the sentinel string
// would be silently removed on next replay
// (clawsweeper review on #77287, P2).
if (!isStreamErrorSentinelContent(content)) {
return false;
}
const stopReason = (message as { stopReason?: unknown }).stopReason;
if (stopReason === "error") {
return true;
}
return isZeroUsageEmptyStopAssistantTurn({
stopReason,
usage: (message as { usage?: unknown }).usage,
content: [],
});
}
function isStreamErrorSentinelContent(content: readonly unknown[]): boolean {
if (content.length !== 1) {
return false;
}
const block = content[0];
if (!block || typeof block !== "object") {
return false;
}
const blockRecord = block as { type?: unknown; text?: unknown };
return blockRecord.type === "text" && blockRecord.text === STREAM_ERROR_FALLBACK_TEXT;
}
function normalizeAssistantUsageSnapshot(usage: unknown) {
const normalized = normalizeUsage((usage ?? undefined) as UsageLike | undefined);
if (!normalized) {