mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 06:40:44 +00:00
fix(agents): handle empty Claude stop turns
This commit is contained in:
@@ -63,6 +63,9 @@ Docs: https://docs.openclaw.ai
|
||||
|
||||
### Fixes
|
||||
|
||||
- Agents/Claude: treat zero-token empty `stop` turns as failed provider output,
|
||||
retry once, repair replay, and allow configured model fallback instead of
|
||||
preserving them as successful silent replies. Fixes #71880. Thanks @MagnaAI.
|
||||
- Diagnostics/OTEL: treat normal early model stream cleanup as a completed model call instead of exporting a misleading `StreamAbandoned` error span. Thanks @vincentkoc.
|
||||
- Gateway/pairing: stop corrupt or unreadable device/node pairing stores from being treated as empty state, preserving `paired.json` for repair instead of overwriting approved pairings. Fixes #71873. Thanks @iret77.
|
||||
- ACP: keep `/acp` management commands, plus local `/status` and `/unfocus`, on the Gateway path inside ACP-bound threads so they are not consumed as ACP prompt text. Fixes #66298. Thanks @kindomLee.
|
||||
|
||||
@@ -582,6 +582,28 @@ describe("runWithModelFallback", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("classifies non-GPT incomplete terminal errors for configured fallback", () => {
|
||||
const runResult: EmbeddedPiRunResult = {
|
||||
payloads: [
|
||||
{ text: "⚠️ Agent couldn't generate a response. Please try again.", isError: true },
|
||||
],
|
||||
meta: {
|
||||
durationMs: 1,
|
||||
},
|
||||
};
|
||||
|
||||
expect(
|
||||
classifyEmbeddedPiRunResultForModelFallback({
|
||||
provider: "anthropic",
|
||||
model: "claude-opus-4.7",
|
||||
result: runResult,
|
||||
}),
|
||||
).toMatchObject({
|
||||
code: "incomplete_result",
|
||||
reason: "format",
|
||||
});
|
||||
});
|
||||
|
||||
it("keeps aborted harness-classified GPT-5 runs out of fallback", () => {
|
||||
const runResult: EmbeddedPiRunResult = {
|
||||
payloads: [],
|
||||
|
||||
57
src/agents/pi-embedded-runner/empty-assistant-turn.ts
Normal file
57
src/agents/pi-embedded-runner/empty-assistant-turn.ts
Normal file
@@ -0,0 +1,57 @@
|
||||
type EmptyAssistantTurnLike = {
|
||||
content?: unknown;
|
||||
stopReason?: unknown;
|
||||
usage?: unknown;
|
||||
};
|
||||
|
||||
type UsageFieldMap = {
|
||||
input?: unknown;
|
||||
output?: unknown;
|
||||
cacheRead?: unknown;
|
||||
cacheWrite?: unknown;
|
||||
total?: unknown;
|
||||
totalTokens?: unknown;
|
||||
total_tokens?: unknown;
|
||||
};
|
||||
|
||||
// Upstream badlogic/pi-mono should normalize Anthropic zero-token empty `stop`
|
||||
// turns before OpenClaw sees them. Downstream: openclaw/openclaw#71880.
|
||||
function readFiniteTokenCount(value: unknown): number | undefined {
|
||||
return typeof value === "number" && Number.isFinite(value) ? value : undefined;
|
||||
}
|
||||
|
||||
function isZero(value: number | undefined): value is 0 {
|
||||
return value === 0;
|
||||
}
|
||||
|
||||
export function hasZeroTokenUsageSnapshot(usage: unknown): boolean {
|
||||
if (!usage || typeof usage !== "object") {
|
||||
return false;
|
||||
}
|
||||
const typed = usage as UsageFieldMap;
|
||||
const input = readFiniteTokenCount(typed.input);
|
||||
const output = readFiniteTokenCount(typed.output);
|
||||
const cacheRead = readFiniteTokenCount(typed.cacheRead);
|
||||
const cacheWrite = readFiniteTokenCount(typed.cacheWrite);
|
||||
const total = readFiniteTokenCount(typed.total ?? typed.totalTokens ?? typed.total_tokens);
|
||||
if (total !== undefined) {
|
||||
return (
|
||||
total === 0 &&
|
||||
[input, output, cacheRead, cacheWrite].every((value) => value === undefined || value === 0)
|
||||
);
|
||||
}
|
||||
const components = [input, output, cacheRead, cacheWrite].filter(
|
||||
(value): value is number => value !== undefined,
|
||||
);
|
||||
return components.length > 0 && components.every(isZero);
|
||||
}
|
||||
|
||||
export function isZeroUsageEmptyStopAssistantTurn(message: EmptyAssistantTurnLike | null): boolean {
|
||||
return Boolean(
|
||||
message &&
|
||||
message.stopReason === "stop" &&
|
||||
Array.isArray(message.content) &&
|
||||
message.content.length === 0 &&
|
||||
hasZeroTokenUsageSnapshot(message.usage),
|
||||
);
|
||||
}
|
||||
@@ -7,6 +7,7 @@ const FALLBACK_TEXT = "[assistant turn failed before producing content]";
|
||||
function bedrockAssistant(
|
||||
content: unknown,
|
||||
stopReason: "error" | "stop" | "toolUse" | "length" = "error",
|
||||
usageOverrides: Record<string, number> = {},
|
||||
): AgentMessage {
|
||||
return {
|
||||
role: "assistant",
|
||||
@@ -21,6 +22,7 @@ function bedrockAssistant(
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
...usageOverrides,
|
||||
},
|
||||
stopReason,
|
||||
timestamp: 0,
|
||||
@@ -60,19 +62,28 @@ describe("normalizeAssistantReplayContent", () => {
|
||||
expect(repaired.content).toEqual([{ type: "text", text: FALLBACK_TEXT }]);
|
||||
});
|
||||
|
||||
it("preserves silent-reply turns (stopReason=stop, content=[]) untouched", () => {
|
||||
it("preserves nonzero-usage silent-reply turns (stopReason=stop, content=[]) untouched", () => {
|
||||
// run.empty-error-retry.test.ts treats `stopReason:"stop"` + `content:[]`
|
||||
// as a legitimate NO_REPLY / silent-reply, NOT a crash. Substituting the
|
||||
// failure sentinel here would inject a fabricated "[assistant turn failed
|
||||
// before producing content]" into the next provider request and change
|
||||
// model behavior even though no failure occurred.
|
||||
const silentStop = bedrockAssistant([], "stop");
|
||||
const silentStop = bedrockAssistant([], "stop", { input: 100, totalTokens: 100 });
|
||||
const messages = [userMessage("hello"), silentStop];
|
||||
const out = normalizeAssistantReplayContent(messages);
|
||||
expect(out).toBe(messages);
|
||||
expect(out[1]).toBe(silentStop);
|
||||
});
|
||||
|
||||
it("converts zero-usage empty stop turns to a replay sentinel", () => {
|
||||
const falseSuccessStop = bedrockAssistant([], "stop");
|
||||
const messages = [userMessage("hello"), falseSuccessStop];
|
||||
const out = normalizeAssistantReplayContent(messages);
|
||||
expect(out).not.toBe(messages);
|
||||
const repaired = out[1] as AgentMessage & { content: { type: string; text: string }[] };
|
||||
expect(repaired.content).toEqual([{ type: "text", text: FALLBACK_TEXT }]);
|
||||
});
|
||||
|
||||
it("preserves empty content with non-error stopReasons (toolUse, length) untouched", () => {
|
||||
// Boundary lock: only `stopReason:"error"` should trip the sentinel
|
||||
// substitution. `toolUse` and `length` are reachable in practice when a
|
||||
|
||||
@@ -41,6 +41,7 @@ import {
|
||||
type AssistantUsageSnapshot,
|
||||
type UsageLike,
|
||||
} from "../usage.js";
|
||||
import { isZeroUsageEmptyStopAssistantTurn } from "./empty-assistant-turn.js";
|
||||
import { dropThinkingBlocks, stripInvalidThinkingSignatures } from "./thinking.js";
|
||||
|
||||
const INTER_SESSION_PREFIX_BASE = "[Inter-session message]";
|
||||
@@ -282,14 +283,16 @@ export function normalizeAssistantReplayContent(messages: AgentMessage[]): Agent
|
||||
// failure statement in the next provider request and change model
|
||||
// behavior even when no failure occurred.
|
||||
//
|
||||
// Only `stopReason: "error"` turns are the Bedrock-Converse replay
|
||||
// poison this fix is scoped to: the provider rejects assistant
|
||||
// messages with no ContentBlock, and the persisted error turn was
|
||||
// never going to render anything useful to the model anyway. Leaving
|
||||
// non-error empty-content turns untouched preserves silent-reply
|
||||
// semantics on every other code path.
|
||||
// `stopReason: "error"` turns are Bedrock-Converse replay poison:
|
||||
// the provider rejects assistant messages with no ContentBlock, and
|
||||
// the persisted error turn was never going to render anything useful
|
||||
// to the model anyway. A zero-token `stop` turn is the same shape from
|
||||
// the next run's perspective: the provider produced no billable prompt
|
||||
// or completion and no content. Leaving other non-error empty-content
|
||||
// turns untouched preserves silent-reply semantics on every other code
|
||||
// path.
|
||||
const stopReason = (message as { stopReason?: unknown }).stopReason;
|
||||
if (stopReason === "error") {
|
||||
if (stopReason === "error" || isZeroUsageEmptyStopAssistantTurn(message)) {
|
||||
out.push({
|
||||
...message,
|
||||
content: [{ type: "text", text: STREAM_ERROR_FALLBACK_TEXT }],
|
||||
|
||||
@@ -83,7 +83,7 @@ export function classifyEmbeddedPiRunResultForModelFallback(params: {
|
||||
hasDirectlySentBlockReply?: boolean;
|
||||
hasBlockReplyPipelineOutput?: boolean;
|
||||
}): ModelFallbackResultClassification {
|
||||
if (!isGpt5ModelId(params.model) || !isEmbeddedPiRunResult(params.result)) {
|
||||
if (!isEmbeddedPiRunResult(params.result)) {
|
||||
return null;
|
||||
}
|
||||
if (
|
||||
@@ -108,6 +108,22 @@ export function classifyEmbeddedPiRunResultForModelFallback(params: {
|
||||
}
|
||||
|
||||
const payloads = params.result.payloads ?? [];
|
||||
const errorText = payloads
|
||||
.filter((payload) => payload?.isError === true)
|
||||
.map((payload) => (typeof payload.text === "string" ? payload.text : ""))
|
||||
.join("\n");
|
||||
if (EMPTY_TERMINAL_REPLY_RE.test(errorText)) {
|
||||
return {
|
||||
message: `${params.provider}/${params.model} ended with an incomplete terminal response`,
|
||||
reason: "format",
|
||||
code: "incomplete_result",
|
||||
};
|
||||
}
|
||||
|
||||
if (!isGpt5ModelId(params.model)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (payloads.length === 0 && hasDeliberateSilentTerminalReply(params.result)) {
|
||||
return null;
|
||||
}
|
||||
@@ -126,10 +142,6 @@ export function classifyEmbeddedPiRunResultForModelFallback(params: {
|
||||
};
|
||||
}
|
||||
|
||||
const errorText = payloads
|
||||
.filter((payload) => payload?.isError === true)
|
||||
.map((payload) => (typeof payload.text === "string" ? payload.text : ""))
|
||||
.join("\n");
|
||||
if (PLAN_ONLY_TERMINAL_REPLY_RE.test(errorText)) {
|
||||
return {
|
||||
message: `${params.provider}/${params.model} exhausted plan-only retries without taking action`,
|
||||
|
||||
@@ -14,10 +14,9 @@ import type { EmbeddedRunAttemptResult } from "./run/types.js";
|
||||
//
|
||||
// Symptom: ollama/glm-5.1 occasionally ends a turn with stopReason="error" and
|
||||
// zero output tokens after a successful tool-call sequence. The user sees no
|
||||
// reply and has to nudge. The existing empty-response retry path is gated on
|
||||
// the strict-agentic contract (gpt-5 only), so non-frontier models fell
|
||||
// through to "incomplete turn detected". This suite locks in a narrower,
|
||||
// model-agnostic resubmission.
|
||||
// reply and has to nudge. This suite locks in a narrower model-agnostic
|
||||
// resubmission for errored turns, separate from the visible-answer retry used
|
||||
// for stopReason="stop" empty zero-token turns.
|
||||
|
||||
let runEmbeddedPiAgent: typeof import("./run.js").runEmbeddedPiAgent;
|
||||
|
||||
|
||||
@@ -441,6 +441,60 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => {
|
||||
expect(mockedLog.warn).toHaveBeenCalledWith(expect.stringContaining("empty response detected"));
|
||||
});
|
||||
|
||||
it("retries zero-token empty Claude stop turns with a visible-answer continuation instruction", async () => {
|
||||
mockedClassifyFailoverReason.mockReturnValue(null);
|
||||
mockedRunEmbeddedAttempt.mockResolvedValueOnce(
|
||||
makeAttemptResult({
|
||||
assistantTexts: [],
|
||||
lastAssistant: {
|
||||
role: "assistant",
|
||||
stopReason: "stop",
|
||||
provider: "anthropic",
|
||||
model: "claude-opus-4.7",
|
||||
content: [],
|
||||
usage: {
|
||||
input: 0,
|
||||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
},
|
||||
} as unknown as EmbeddedRunAttemptResult["lastAssistant"],
|
||||
}),
|
||||
);
|
||||
mockedRunEmbeddedAttempt.mockResolvedValueOnce(
|
||||
makeAttemptResult({
|
||||
assistantTexts: ["Visible Claude answer."],
|
||||
lastAssistant: {
|
||||
role: "assistant",
|
||||
stopReason: "stop",
|
||||
provider: "anthropic",
|
||||
model: "claude-opus-4.7",
|
||||
content: [{ type: "text", text: "Visible Claude answer." }],
|
||||
usage: {
|
||||
input: 100,
|
||||
output: 5,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 105,
|
||||
},
|
||||
} as unknown as EmbeddedRunAttemptResult["lastAssistant"],
|
||||
}),
|
||||
);
|
||||
|
||||
await runEmbeddedPiAgent({
|
||||
...overflowBaseRunParams,
|
||||
provider: "anthropic",
|
||||
model: "claude-opus-4.7",
|
||||
runId: "run-empty-zero-usage-claude-continuation",
|
||||
});
|
||||
|
||||
expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(2);
|
||||
const secondCall = mockedRunEmbeddedAttempt.mock.calls[1]?.[0] as { prompt?: string };
|
||||
expect(secondCall.prompt).toContain(EMPTY_RESPONSE_RETRY_INSTRUCTION);
|
||||
expect(mockedLog.warn).toHaveBeenCalledWith(expect.stringContaining("empty response detected"));
|
||||
});
|
||||
|
||||
it("surfaces an error after exhausting empty-response retries", async () => {
|
||||
mockedClassifyFailoverReason.mockReturnValue(null);
|
||||
mockedRunEmbeddedAttempt.mockResolvedValue(
|
||||
|
||||
@@ -617,9 +617,9 @@ export async function runEmbeddedPiAgent(
|
||||
let timeoutCompactionAttempts = 0;
|
||||
// Silent-error retry: non-strict-agentic models (e.g. ollama/glm-5.1) can
|
||||
// end a turn with stopReason="error" + zero output tokens, producing no
|
||||
// user-visible text. The existing empty-response retry is gated on
|
||||
// isStrictAgenticSupportedProviderModel (gpt-5 only). This is an
|
||||
// orthogonal, model-agnostic resubmission.
|
||||
// user-visible text. This is an orthogonal, model-agnostic resubmission
|
||||
// for errored turns; stopReason="stop" empty zero-token turns use the
|
||||
// visible-answer retry instruction instead.
|
||||
const MAX_EMPTY_ERROR_RETRIES = 3;
|
||||
let emptyErrorRetries = 0;
|
||||
const overloadFailoverBackoffMs = resolveOverloadFailoverBackoffMs(params.config);
|
||||
@@ -2089,13 +2089,10 @@ export async function runEmbeddedPiAgent(
|
||||
// ── silent-error retry ────────────────────────────────────────────
|
||||
// Observed with ollama/glm-5.1: a turn can end with stopReason="error"
|
||||
// and zero output tokens AND empty content after a successful
|
||||
// tool-call sequence, producing no user-visible text at all. The
|
||||
// existing empty-response retry path (resolveEmptyResponseRetryInstruction)
|
||||
// is gated on the strict-agentic contract (gpt-5 only), so non-frontier
|
||||
// models fall through to "incomplete turn detected" → silent gap
|
||||
// until the user nudges. This is a narrower, model-agnostic
|
||||
// resubmission: same prompt, same session transcript (tool results
|
||||
// already captured), no instruction injection. Placed before the
|
||||
// tool-call sequence, producing no user-visible text at all. This
|
||||
// path is narrower than the empty-response continuation retry:
|
||||
// same prompt, same session transcript (tool results already
|
||||
// captured), no instruction injection. Placed before the
|
||||
// incompleteTurnText return so it actually gets a chance to fire.
|
||||
//
|
||||
// Content-empty guard: a reasoning-only error (content has thinking
|
||||
|
||||
@@ -7,6 +7,7 @@ import {
|
||||
stripProviderPrefix,
|
||||
} from "../../execution-contract.js";
|
||||
import { isLikelyMutatingToolName } from "../../tool-mutation.js";
|
||||
import { isZeroUsageEmptyStopAssistantTurn } from "../empty-assistant-turn.js";
|
||||
import { assessLastAssistantMessage } from "../thinking.js";
|
||||
import type { EmbeddedRunLivenessState } from "../types.js";
|
||||
import type { EmbeddedRunAttemptResult } from "./types.js";
|
||||
@@ -393,16 +394,6 @@ export function resolveEmptyResponseRetryInstruction(params: {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (
|
||||
!shouldApplyPlanningOnlyRetryGuard({
|
||||
provider: params.provider,
|
||||
modelId: params.modelId,
|
||||
executionContract: params.executionContract,
|
||||
})
|
||||
) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (
|
||||
!isEmptyResponseAssistantTurn({
|
||||
payloadCount: params.payloadCount,
|
||||
@@ -412,7 +403,20 @@ export function resolveEmptyResponseRetryInstruction(params: {
|
||||
return null;
|
||||
}
|
||||
|
||||
return EMPTY_RESPONSE_RETRY_INSTRUCTION;
|
||||
if (
|
||||
shouldApplyPlanningOnlyRetryGuard({
|
||||
provider: params.provider,
|
||||
modelId: params.modelId,
|
||||
executionContract: params.executionContract,
|
||||
}) ||
|
||||
isZeroUsageEmptyStopAssistantTurn(
|
||||
params.attempt.currentAttemptAssistant ?? params.attempt.lastAssistant ?? null,
|
||||
)
|
||||
) {
|
||||
return EMPTY_RESPONSE_RETRY_INSTRUCTION;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
function shouldApplyPlanningOnlyRetryGuard(params: {
|
||||
|
||||
Reference in New Issue
Block a user