fix(agents): handle empty Claude stop turns

This commit is contained in:
Peter Steinberger
2026-04-26 03:22:36 +01:00
parent a44a3f9171
commit 90cd9fce85
10 changed files with 201 additions and 39 deletions

View File

@@ -63,6 +63,9 @@ Docs: https://docs.openclaw.ai
### Fixes
- Agents/Claude: treat zero-token empty `stop` turns as failed provider output,
retry once, repair replay, and allow configured model fallback instead of
preserving them as successful silent replies. Fixes #71880. Thanks @MagnaAI.
- Diagnostics/OTEL: treat normal early model stream cleanup as a completed model call instead of exporting a misleading `StreamAbandoned` error span. Thanks @vincentkoc.
- Gateway/pairing: stop corrupt or unreadable device/node pairing stores from being treated as empty state, preserving `paired.json` for repair instead of overwriting approved pairings. Fixes #71873. Thanks @iret77.
- ACP: keep `/acp` management commands, plus local `/status` and `/unfocus`, on the Gateway path inside ACP-bound threads so they are not consumed as ACP prompt text. Fixes #66298. Thanks @kindomLee.

View File

@@ -582,6 +582,28 @@ describe("runWithModelFallback", () => {
});
});
it("classifies non-GPT incomplete terminal errors for configured fallback", () => {
const runResult: EmbeddedPiRunResult = {
payloads: [
{ text: "⚠️ Agent couldn't generate a response. Please try again.", isError: true },
],
meta: {
durationMs: 1,
},
};
expect(
classifyEmbeddedPiRunResultForModelFallback({
provider: "anthropic",
model: "claude-opus-4.7",
result: runResult,
}),
).toMatchObject({
code: "incomplete_result",
reason: "format",
});
});
it("keeps aborted harness-classified GPT-5 runs out of fallback", () => {
const runResult: EmbeddedPiRunResult = {
payloads: [],

View File

@@ -0,0 +1,57 @@
type EmptyAssistantTurnLike = {
content?: unknown;
stopReason?: unknown;
usage?: unknown;
};
type UsageFieldMap = {
input?: unknown;
output?: unknown;
cacheRead?: unknown;
cacheWrite?: unknown;
total?: unknown;
totalTokens?: unknown;
total_tokens?: unknown;
};
// Upstream badlogic/pi-mono should normalize Anthropic zero-token empty `stop`
// turns before OpenClaw sees them. Downstream: openclaw/openclaw#71880.
function readFiniteTokenCount(value: unknown): number | undefined {
return typeof value === "number" && Number.isFinite(value) ? value : undefined;
}
function isZero(value: number | undefined): value is 0 {
return value === 0;
}
export function hasZeroTokenUsageSnapshot(usage: unknown): boolean {
if (!usage || typeof usage !== "object") {
return false;
}
const typed = usage as UsageFieldMap;
const input = readFiniteTokenCount(typed.input);
const output = readFiniteTokenCount(typed.output);
const cacheRead = readFiniteTokenCount(typed.cacheRead);
const cacheWrite = readFiniteTokenCount(typed.cacheWrite);
const total = readFiniteTokenCount(typed.total ?? typed.totalTokens ?? typed.total_tokens);
if (total !== undefined) {
return (
total === 0 &&
[input, output, cacheRead, cacheWrite].every((value) => value === undefined || value === 0)
);
}
const components = [input, output, cacheRead, cacheWrite].filter(
(value): value is number => value !== undefined,
);
return components.length > 0 && components.every(isZero);
}
export function isZeroUsageEmptyStopAssistantTurn(message: EmptyAssistantTurnLike | null): boolean {
return Boolean(
message &&
message.stopReason === "stop" &&
Array.isArray(message.content) &&
message.content.length === 0 &&
hasZeroTokenUsageSnapshot(message.usage),
);
}

View File

@@ -7,6 +7,7 @@ const FALLBACK_TEXT = "[assistant turn failed before producing content]";
function bedrockAssistant(
content: unknown,
stopReason: "error" | "stop" | "toolUse" | "length" = "error",
usageOverrides: Record<string, number> = {},
): AgentMessage {
return {
role: "assistant",
@@ -21,6 +22,7 @@ function bedrockAssistant(
cacheWrite: 0,
totalTokens: 0,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
...usageOverrides,
},
stopReason,
timestamp: 0,
@@ -60,19 +62,28 @@ describe("normalizeAssistantReplayContent", () => {
expect(repaired.content).toEqual([{ type: "text", text: FALLBACK_TEXT }]);
});
it("preserves silent-reply turns (stopReason=stop, content=[]) untouched", () => {
it("preserves nonzero-usage silent-reply turns (stopReason=stop, content=[]) untouched", () => {
// run.empty-error-retry.test.ts treats `stopReason:"stop"` + `content:[]`
// as a legitimate NO_REPLY / silent-reply, NOT a crash. Substituting the
// failure sentinel here would inject a fabricated "[assistant turn failed
// before producing content]" into the next provider request and change
// model behavior even though no failure occurred.
const silentStop = bedrockAssistant([], "stop");
const silentStop = bedrockAssistant([], "stop", { input: 100, totalTokens: 100 });
const messages = [userMessage("hello"), silentStop];
const out = normalizeAssistantReplayContent(messages);
expect(out).toBe(messages);
expect(out[1]).toBe(silentStop);
});
it("converts zero-usage empty stop turns to a replay sentinel", () => {
const falseSuccessStop = bedrockAssistant([], "stop");
const messages = [userMessage("hello"), falseSuccessStop];
const out = normalizeAssistantReplayContent(messages);
expect(out).not.toBe(messages);
const repaired = out[1] as AgentMessage & { content: { type: string; text: string }[] };
expect(repaired.content).toEqual([{ type: "text", text: FALLBACK_TEXT }]);
});
it("preserves empty content with non-error stopReasons (toolUse, length) untouched", () => {
// Boundary lock: only `stopReason:"error"` should trip the sentinel
// substitution. `toolUse` and `length` are reachable in practice when a

View File

@@ -41,6 +41,7 @@ import {
type AssistantUsageSnapshot,
type UsageLike,
} from "../usage.js";
import { isZeroUsageEmptyStopAssistantTurn } from "./empty-assistant-turn.js";
import { dropThinkingBlocks, stripInvalidThinkingSignatures } from "./thinking.js";
const INTER_SESSION_PREFIX_BASE = "[Inter-session message]";
@@ -282,14 +283,16 @@ export function normalizeAssistantReplayContent(messages: AgentMessage[]): Agent
// failure statement in the next provider request and change model
// behavior even when no failure occurred.
//
// Only `stopReason: "error"` turns are the Bedrock-Converse replay
// poison this fix is scoped to: the provider rejects assistant
// messages with no ContentBlock, and the persisted error turn was
// never going to render anything useful to the model anyway. Leaving
// non-error empty-content turns untouched preserves silent-reply
// semantics on every other code path.
// `stopReason: "error"` turns are Bedrock-Converse replay poison:
// the provider rejects assistant messages with no ContentBlock, and
// the persisted error turn was never going to render anything useful
// to the model anyway. A zero-token `stop` turn is the same shape from
// the next run's perspective: the provider produced no billable prompt
// or completion and no content. Leaving other non-error empty-content
// turns untouched preserves silent-reply semantics on every other code
// path.
const stopReason = (message as { stopReason?: unknown }).stopReason;
if (stopReason === "error") {
if (stopReason === "error" || isZeroUsageEmptyStopAssistantTurn(message)) {
out.push({
...message,
content: [{ type: "text", text: STREAM_ERROR_FALLBACK_TEXT }],

View File

@@ -83,7 +83,7 @@ export function classifyEmbeddedPiRunResultForModelFallback(params: {
hasDirectlySentBlockReply?: boolean;
hasBlockReplyPipelineOutput?: boolean;
}): ModelFallbackResultClassification {
if (!isGpt5ModelId(params.model) || !isEmbeddedPiRunResult(params.result)) {
if (!isEmbeddedPiRunResult(params.result)) {
return null;
}
if (
@@ -108,6 +108,22 @@ export function classifyEmbeddedPiRunResultForModelFallback(params: {
}
const payloads = params.result.payloads ?? [];
const errorText = payloads
.filter((payload) => payload?.isError === true)
.map((payload) => (typeof payload.text === "string" ? payload.text : ""))
.join("\n");
if (EMPTY_TERMINAL_REPLY_RE.test(errorText)) {
return {
message: `${params.provider}/${params.model} ended with an incomplete terminal response`,
reason: "format",
code: "incomplete_result",
};
}
if (!isGpt5ModelId(params.model)) {
return null;
}
if (payloads.length === 0 && hasDeliberateSilentTerminalReply(params.result)) {
return null;
}
@@ -126,10 +142,6 @@ export function classifyEmbeddedPiRunResultForModelFallback(params: {
};
}
const errorText = payloads
.filter((payload) => payload?.isError === true)
.map((payload) => (typeof payload.text === "string" ? payload.text : ""))
.join("\n");
if (PLAN_ONLY_TERMINAL_REPLY_RE.test(errorText)) {
return {
message: `${params.provider}/${params.model} exhausted plan-only retries without taking action`,

View File

@@ -14,10 +14,9 @@ import type { EmbeddedRunAttemptResult } from "./run/types.js";
//
// Symptom: ollama/glm-5.1 occasionally ends a turn with stopReason="error" and
// zero output tokens after a successful tool-call sequence. The user sees no
// reply and has to nudge. The existing empty-response retry path is gated on
// the strict-agentic contract (gpt-5 only), so non-frontier models fell
// through to "incomplete turn detected". This suite locks in a narrower,
// model-agnostic resubmission.
// reply and has to nudge. This suite locks in a narrower model-agnostic
// resubmission for errored turns, separate from the visible-answer retry used
// for stopReason="stop" empty zero-token turns.
let runEmbeddedPiAgent: typeof import("./run.js").runEmbeddedPiAgent;

View File

@@ -441,6 +441,60 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => {
expect(mockedLog.warn).toHaveBeenCalledWith(expect.stringContaining("empty response detected"));
});
it("retries zero-token empty Claude stop turns with a visible-answer continuation instruction", async () => {
mockedClassifyFailoverReason.mockReturnValue(null);
mockedRunEmbeddedAttempt.mockResolvedValueOnce(
makeAttemptResult({
assistantTexts: [],
lastAssistant: {
role: "assistant",
stopReason: "stop",
provider: "anthropic",
model: "claude-opus-4.7",
content: [],
usage: {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
totalTokens: 0,
},
} as unknown as EmbeddedRunAttemptResult["lastAssistant"],
}),
);
mockedRunEmbeddedAttempt.mockResolvedValueOnce(
makeAttemptResult({
assistantTexts: ["Visible Claude answer."],
lastAssistant: {
role: "assistant",
stopReason: "stop",
provider: "anthropic",
model: "claude-opus-4.7",
content: [{ type: "text", text: "Visible Claude answer." }],
usage: {
input: 100,
output: 5,
cacheRead: 0,
cacheWrite: 0,
totalTokens: 105,
},
} as unknown as EmbeddedRunAttemptResult["lastAssistant"],
}),
);
await runEmbeddedPiAgent({
...overflowBaseRunParams,
provider: "anthropic",
model: "claude-opus-4.7",
runId: "run-empty-zero-usage-claude-continuation",
});
expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(2);
const secondCall = mockedRunEmbeddedAttempt.mock.calls[1]?.[0] as { prompt?: string };
expect(secondCall.prompt).toContain(EMPTY_RESPONSE_RETRY_INSTRUCTION);
expect(mockedLog.warn).toHaveBeenCalledWith(expect.stringContaining("empty response detected"));
});
it("surfaces an error after exhausting empty-response retries", async () => {
mockedClassifyFailoverReason.mockReturnValue(null);
mockedRunEmbeddedAttempt.mockResolvedValue(

View File

@@ -617,9 +617,9 @@ export async function runEmbeddedPiAgent(
let timeoutCompactionAttempts = 0;
// Silent-error retry: non-strict-agentic models (e.g. ollama/glm-5.1) can
// end a turn with stopReason="error" + zero output tokens, producing no
// user-visible text. The existing empty-response retry is gated on
// isStrictAgenticSupportedProviderModel (gpt-5 only). This is an
// orthogonal, model-agnostic resubmission.
// user-visible text. This is an orthogonal, model-agnostic resubmission
// for errored turns; stopReason="stop" empty zero-token turns use the
// visible-answer retry instruction instead.
const MAX_EMPTY_ERROR_RETRIES = 3;
let emptyErrorRetries = 0;
const overloadFailoverBackoffMs = resolveOverloadFailoverBackoffMs(params.config);
@@ -2089,13 +2089,10 @@ export async function runEmbeddedPiAgent(
// ── silent-error retry ────────────────────────────────────────────
// Observed with ollama/glm-5.1: a turn can end with stopReason="error"
// and zero output tokens AND empty content after a successful
// tool-call sequence, producing no user-visible text at all. The
// existing empty-response retry path (resolveEmptyResponseRetryInstruction)
// is gated on the strict-agentic contract (gpt-5 only), so non-frontier
// models fall through to "incomplete turn detected" → silent gap
// until the user nudges. This is a narrower, model-agnostic
// resubmission: same prompt, same session transcript (tool results
// already captured), no instruction injection. Placed before the
// tool-call sequence, producing no user-visible text at all. This
// path is narrower than the empty-response continuation retry:
// same prompt, same session transcript (tool results already
// captured), no instruction injection. Placed before the
// incompleteTurnText return so it actually gets a chance to fire.
//
// Content-empty guard: a reasoning-only error (content has thinking

View File

@@ -7,6 +7,7 @@ import {
stripProviderPrefix,
} from "../../execution-contract.js";
import { isLikelyMutatingToolName } from "../../tool-mutation.js";
import { isZeroUsageEmptyStopAssistantTurn } from "../empty-assistant-turn.js";
import { assessLastAssistantMessage } from "../thinking.js";
import type { EmbeddedRunLivenessState } from "../types.js";
import type { EmbeddedRunAttemptResult } from "./types.js";
@@ -393,16 +394,6 @@ export function resolveEmptyResponseRetryInstruction(params: {
return null;
}
if (
!shouldApplyPlanningOnlyRetryGuard({
provider: params.provider,
modelId: params.modelId,
executionContract: params.executionContract,
})
) {
return null;
}
if (
!isEmptyResponseAssistantTurn({
payloadCount: params.payloadCount,
@@ -412,7 +403,20 @@ export function resolveEmptyResponseRetryInstruction(params: {
return null;
}
return EMPTY_RESPONSE_RETRY_INSTRUCTION;
if (
shouldApplyPlanningOnlyRetryGuard({
provider: params.provider,
modelId: params.modelId,
executionContract: params.executionContract,
}) ||
isZeroUsageEmptyStopAssistantTurn(
params.attempt.currentAttemptAssistant ?? params.attempt.lastAssistant ?? null,
)
) {
return EMPTY_RESPONSE_RETRY_INSTRUCTION;
}
return null;
}
function shouldApplyPlanningOnlyRetryGuard(params: {