diff --git a/CHANGELOG.md b/CHANGELOG.md index 2149a303e78..aa7636f0a07 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -63,6 +63,9 @@ Docs: https://docs.openclaw.ai ### Fixes +- Agents/Claude: treat zero-token empty `stop` turns as failed provider output, + retry once, repair replay, and allow configured model fallback instead of + preserving them as successful silent replies. Fixes #71880. Thanks @MagnaAI. - Diagnostics/OTEL: treat normal early model stream cleanup as a completed model call instead of exporting a misleading `StreamAbandoned` error span. Thanks @vincentkoc. - Gateway/pairing: stop corrupt or unreadable device/node pairing stores from being treated as empty state, preserving `paired.json` for repair instead of overwriting approved pairings. Fixes #71873. Thanks @iret77. - ACP: keep `/acp` management commands, plus local `/status` and `/unfocus`, on the Gateway path inside ACP-bound threads so they are not consumed as ACP prompt text. Fixes #66298. Thanks @kindomLee. diff --git a/src/agents/model-fallback.test.ts b/src/agents/model-fallback.test.ts index e9be6319e88..f9ea9a8e045 100644 --- a/src/agents/model-fallback.test.ts +++ b/src/agents/model-fallback.test.ts @@ -582,6 +582,28 @@ describe("runWithModelFallback", () => { }); }); + it("classifies non-GPT incomplete terminal errors for configured fallback", () => { + const runResult: EmbeddedPiRunResult = { + payloads: [ + { text: "⚠️ Agent couldn't generate a response. Please try again.", isError: true }, + ], + meta: { + durationMs: 1, + }, + }; + + expect( + classifyEmbeddedPiRunResultForModelFallback({ + provider: "anthropic", + model: "claude-opus-4.7", + result: runResult, + }), + ).toMatchObject({ + code: "incomplete_result", + reason: "format", + }); + }); + it("keeps aborted harness-classified GPT-5 runs out of fallback", () => { const runResult: EmbeddedPiRunResult = { payloads: [], diff --git a/src/agents/pi-embedded-runner/empty-assistant-turn.ts b/src/agents/pi-embedded-runner/empty-assistant-turn.ts new file mode 100644 index 00000000000..6235aec7bb5 --- /dev/null +++ b/src/agents/pi-embedded-runner/empty-assistant-turn.ts @@ -0,0 +1,57 @@ +type EmptyAssistantTurnLike = { + content?: unknown; + stopReason?: unknown; + usage?: unknown; +}; + +type UsageFieldMap = { + input?: unknown; + output?: unknown; + cacheRead?: unknown; + cacheWrite?: unknown; + total?: unknown; + totalTokens?: unknown; + total_tokens?: unknown; +}; + +// Upstream badlogic/pi-mono should normalize Anthropic zero-token empty `stop` +// turns before OpenClaw sees them. Downstream: openclaw/openclaw#71880. +function readFiniteTokenCount(value: unknown): number | undefined { + return typeof value === "number" && Number.isFinite(value) ? value : undefined; +} + +function isZero(value: number | undefined): value is 0 { + return value === 0; +} + +export function hasZeroTokenUsageSnapshot(usage: unknown): boolean { + if (!usage || typeof usage !== "object") { + return false; + } + const typed = usage as UsageFieldMap; + const input = readFiniteTokenCount(typed.input); + const output = readFiniteTokenCount(typed.output); + const cacheRead = readFiniteTokenCount(typed.cacheRead); + const cacheWrite = readFiniteTokenCount(typed.cacheWrite); + const total = readFiniteTokenCount(typed.total ?? typed.totalTokens ?? typed.total_tokens); + if (total !== undefined) { + return ( + total === 0 && + [input, output, cacheRead, cacheWrite].every((value) => value === undefined || value === 0) + ); + } + const components = [input, output, cacheRead, cacheWrite].filter( + (value): value is number => value !== undefined, + ); + return components.length > 0 && components.every(isZero); +} + +export function isZeroUsageEmptyStopAssistantTurn(message: EmptyAssistantTurnLike | null): boolean { + return Boolean( + message && + message.stopReason === "stop" && + Array.isArray(message.content) && + message.content.length === 0 && + hasZeroTokenUsageSnapshot(message.usage), + ); +} diff --git a/src/agents/pi-embedded-runner/replay-history.test.ts b/src/agents/pi-embedded-runner/replay-history.test.ts index a294cb060a2..5942e9189d4 100644 --- a/src/agents/pi-embedded-runner/replay-history.test.ts +++ b/src/agents/pi-embedded-runner/replay-history.test.ts @@ -7,6 +7,7 @@ const FALLBACK_TEXT = "[assistant turn failed before producing content]"; function bedrockAssistant( content: unknown, stopReason: "error" | "stop" | "toolUse" | "length" = "error", + usageOverrides: Record = {}, ): AgentMessage { return { role: "assistant", @@ -21,6 +22,7 @@ function bedrockAssistant( cacheWrite: 0, totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, + ...usageOverrides, }, stopReason, timestamp: 0, @@ -60,19 +62,28 @@ describe("normalizeAssistantReplayContent", () => { expect(repaired.content).toEqual([{ type: "text", text: FALLBACK_TEXT }]); }); - it("preserves silent-reply turns (stopReason=stop, content=[]) untouched", () => { + it("preserves nonzero-usage silent-reply turns (stopReason=stop, content=[]) untouched", () => { // run.empty-error-retry.test.ts treats `stopReason:"stop"` + `content:[]` // as a legitimate NO_REPLY / silent-reply, NOT a crash. Substituting the // failure sentinel here would inject a fabricated "[assistant turn failed // before producing content]" into the next provider request and change // model behavior even though no failure occurred. - const silentStop = bedrockAssistant([], "stop"); + const silentStop = bedrockAssistant([], "stop", { input: 100, totalTokens: 100 }); const messages = [userMessage("hello"), silentStop]; const out = normalizeAssistantReplayContent(messages); expect(out).toBe(messages); expect(out[1]).toBe(silentStop); }); + it("converts zero-usage empty stop turns to a replay sentinel", () => { + const falseSuccessStop = bedrockAssistant([], "stop"); + const messages = [userMessage("hello"), falseSuccessStop]; + const out = normalizeAssistantReplayContent(messages); + expect(out).not.toBe(messages); + const repaired = out[1] as AgentMessage & { content: { type: string; text: string }[] }; + expect(repaired.content).toEqual([{ type: "text", text: FALLBACK_TEXT }]); + }); + it("preserves empty content with non-error stopReasons (toolUse, length) untouched", () => { // Boundary lock: only `stopReason:"error"` should trip the sentinel // substitution. `toolUse` and `length` are reachable in practice when a diff --git a/src/agents/pi-embedded-runner/replay-history.ts b/src/agents/pi-embedded-runner/replay-history.ts index cc2676b9f77..7310161399b 100644 --- a/src/agents/pi-embedded-runner/replay-history.ts +++ b/src/agents/pi-embedded-runner/replay-history.ts @@ -41,6 +41,7 @@ import { type AssistantUsageSnapshot, type UsageLike, } from "../usage.js"; +import { isZeroUsageEmptyStopAssistantTurn } from "./empty-assistant-turn.js"; import { dropThinkingBlocks, stripInvalidThinkingSignatures } from "./thinking.js"; const INTER_SESSION_PREFIX_BASE = "[Inter-session message]"; @@ -282,14 +283,16 @@ export function normalizeAssistantReplayContent(messages: AgentMessage[]): Agent // failure statement in the next provider request and change model // behavior even when no failure occurred. // - // Only `stopReason: "error"` turns are the Bedrock-Converse replay - // poison this fix is scoped to: the provider rejects assistant - // messages with no ContentBlock, and the persisted error turn was - // never going to render anything useful to the model anyway. Leaving - // non-error empty-content turns untouched preserves silent-reply - // semantics on every other code path. + // `stopReason: "error"` turns are Bedrock-Converse replay poison: + // the provider rejects assistant messages with no ContentBlock, and + // the persisted error turn was never going to render anything useful + // to the model anyway. A zero-token `stop` turn is the same shape from + // the next run's perspective: the provider produced no billable prompt + // or completion and no content. Leaving other non-error empty-content + // turns untouched preserves silent-reply semantics on every other code + // path. const stopReason = (message as { stopReason?: unknown }).stopReason; - if (stopReason === "error") { + if (stopReason === "error" || isZeroUsageEmptyStopAssistantTurn(message)) { out.push({ ...message, content: [{ type: "text", text: STREAM_ERROR_FALLBACK_TEXT }], diff --git a/src/agents/pi-embedded-runner/result-fallback-classifier.ts b/src/agents/pi-embedded-runner/result-fallback-classifier.ts index 9373aa7b91c..a202d7066f9 100644 --- a/src/agents/pi-embedded-runner/result-fallback-classifier.ts +++ b/src/agents/pi-embedded-runner/result-fallback-classifier.ts @@ -83,7 +83,7 @@ export function classifyEmbeddedPiRunResultForModelFallback(params: { hasDirectlySentBlockReply?: boolean; hasBlockReplyPipelineOutput?: boolean; }): ModelFallbackResultClassification { - if (!isGpt5ModelId(params.model) || !isEmbeddedPiRunResult(params.result)) { + if (!isEmbeddedPiRunResult(params.result)) { return null; } if ( @@ -108,6 +108,22 @@ export function classifyEmbeddedPiRunResultForModelFallback(params: { } const payloads = params.result.payloads ?? []; + const errorText = payloads + .filter((payload) => payload?.isError === true) + .map((payload) => (typeof payload.text === "string" ? payload.text : "")) + .join("\n"); + if (EMPTY_TERMINAL_REPLY_RE.test(errorText)) { + return { + message: `${params.provider}/${params.model} ended with an incomplete terminal response`, + reason: "format", + code: "incomplete_result", + }; + } + + if (!isGpt5ModelId(params.model)) { + return null; + } + if (payloads.length === 0 && hasDeliberateSilentTerminalReply(params.result)) { return null; } @@ -126,10 +142,6 @@ export function classifyEmbeddedPiRunResultForModelFallback(params: { }; } - const errorText = payloads - .filter((payload) => payload?.isError === true) - .map((payload) => (typeof payload.text === "string" ? payload.text : "")) - .join("\n"); if (PLAN_ONLY_TERMINAL_REPLY_RE.test(errorText)) { return { message: `${params.provider}/${params.model} exhausted plan-only retries without taking action`, diff --git a/src/agents/pi-embedded-runner/run.empty-error-retry.test.ts b/src/agents/pi-embedded-runner/run.empty-error-retry.test.ts index a3c714bbb9d..b1cab8458e9 100644 --- a/src/agents/pi-embedded-runner/run.empty-error-retry.test.ts +++ b/src/agents/pi-embedded-runner/run.empty-error-retry.test.ts @@ -14,10 +14,9 @@ import type { EmbeddedRunAttemptResult } from "./run/types.js"; // // Symptom: ollama/glm-5.1 occasionally ends a turn with stopReason="error" and // zero output tokens after a successful tool-call sequence. The user sees no -// reply and has to nudge. The existing empty-response retry path is gated on -// the strict-agentic contract (gpt-5 only), so non-frontier models fell -// through to "incomplete turn detected". This suite locks in a narrower, -// model-agnostic resubmission. +// reply and has to nudge. This suite locks in a narrower model-agnostic +// resubmission for errored turns, separate from the visible-answer retry used +// for stopReason="stop" empty zero-token turns. let runEmbeddedPiAgent: typeof import("./run.js").runEmbeddedPiAgent; diff --git a/src/agents/pi-embedded-runner/run.incomplete-turn.test.ts b/src/agents/pi-embedded-runner/run.incomplete-turn.test.ts index 88546d6a4c3..23054174fce 100644 --- a/src/agents/pi-embedded-runner/run.incomplete-turn.test.ts +++ b/src/agents/pi-embedded-runner/run.incomplete-turn.test.ts @@ -441,6 +441,60 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => { expect(mockedLog.warn).toHaveBeenCalledWith(expect.stringContaining("empty response detected")); }); + it("retries zero-token empty Claude stop turns with a visible-answer continuation instruction", async () => { + mockedClassifyFailoverReason.mockReturnValue(null); + mockedRunEmbeddedAttempt.mockResolvedValueOnce( + makeAttemptResult({ + assistantTexts: [], + lastAssistant: { + role: "assistant", + stopReason: "stop", + provider: "anthropic", + model: "claude-opus-4.7", + content: [], + usage: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + totalTokens: 0, + }, + } as unknown as EmbeddedRunAttemptResult["lastAssistant"], + }), + ); + mockedRunEmbeddedAttempt.mockResolvedValueOnce( + makeAttemptResult({ + assistantTexts: ["Visible Claude answer."], + lastAssistant: { + role: "assistant", + stopReason: "stop", + provider: "anthropic", + model: "claude-opus-4.7", + content: [{ type: "text", text: "Visible Claude answer." }], + usage: { + input: 100, + output: 5, + cacheRead: 0, + cacheWrite: 0, + totalTokens: 105, + }, + } as unknown as EmbeddedRunAttemptResult["lastAssistant"], + }), + ); + + await runEmbeddedPiAgent({ + ...overflowBaseRunParams, + provider: "anthropic", + model: "claude-opus-4.7", + runId: "run-empty-zero-usage-claude-continuation", + }); + + expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(2); + const secondCall = mockedRunEmbeddedAttempt.mock.calls[1]?.[0] as { prompt?: string }; + expect(secondCall.prompt).toContain(EMPTY_RESPONSE_RETRY_INSTRUCTION); + expect(mockedLog.warn).toHaveBeenCalledWith(expect.stringContaining("empty response detected")); + }); + it("surfaces an error after exhausting empty-response retries", async () => { mockedClassifyFailoverReason.mockReturnValue(null); mockedRunEmbeddedAttempt.mockResolvedValue( diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts index 78305ab4b03..d1ff60bec63 100644 --- a/src/agents/pi-embedded-runner/run.ts +++ b/src/agents/pi-embedded-runner/run.ts @@ -617,9 +617,9 @@ export async function runEmbeddedPiAgent( let timeoutCompactionAttempts = 0; // Silent-error retry: non-strict-agentic models (e.g. ollama/glm-5.1) can // end a turn with stopReason="error" + zero output tokens, producing no - // user-visible text. The existing empty-response retry is gated on - // isStrictAgenticSupportedProviderModel (gpt-5 only). This is an - // orthogonal, model-agnostic resubmission. + // user-visible text. This is an orthogonal, model-agnostic resubmission + // for errored turns; stopReason="stop" empty zero-token turns use the + // visible-answer retry instruction instead. const MAX_EMPTY_ERROR_RETRIES = 3; let emptyErrorRetries = 0; const overloadFailoverBackoffMs = resolveOverloadFailoverBackoffMs(params.config); @@ -2089,13 +2089,10 @@ export async function runEmbeddedPiAgent( // ── silent-error retry ──────────────────────────────────────────── // Observed with ollama/glm-5.1: a turn can end with stopReason="error" // and zero output tokens AND empty content after a successful - // tool-call sequence, producing no user-visible text at all. The - // existing empty-response retry path (resolveEmptyResponseRetryInstruction) - // is gated on the strict-agentic contract (gpt-5 only), so non-frontier - // models fall through to "incomplete turn detected" → silent gap - // until the user nudges. This is a narrower, model-agnostic - // resubmission: same prompt, same session transcript (tool results - // already captured), no instruction injection. Placed before the + // tool-call sequence, producing no user-visible text at all. This + // path is narrower than the empty-response continuation retry: + // same prompt, same session transcript (tool results already + // captured), no instruction injection. Placed before the // incompleteTurnText return so it actually gets a chance to fire. // // Content-empty guard: a reasoning-only error (content has thinking diff --git a/src/agents/pi-embedded-runner/run/incomplete-turn.ts b/src/agents/pi-embedded-runner/run/incomplete-turn.ts index 0dde8fc53e3..1038aa9b71e 100644 --- a/src/agents/pi-embedded-runner/run/incomplete-turn.ts +++ b/src/agents/pi-embedded-runner/run/incomplete-turn.ts @@ -7,6 +7,7 @@ import { stripProviderPrefix, } from "../../execution-contract.js"; import { isLikelyMutatingToolName } from "../../tool-mutation.js"; +import { isZeroUsageEmptyStopAssistantTurn } from "../empty-assistant-turn.js"; import { assessLastAssistantMessage } from "../thinking.js"; import type { EmbeddedRunLivenessState } from "../types.js"; import type { EmbeddedRunAttemptResult } from "./types.js"; @@ -393,16 +394,6 @@ export function resolveEmptyResponseRetryInstruction(params: { return null; } - if ( - !shouldApplyPlanningOnlyRetryGuard({ - provider: params.provider, - modelId: params.modelId, - executionContract: params.executionContract, - }) - ) { - return null; - } - if ( !isEmptyResponseAssistantTurn({ payloadCount: params.payloadCount, @@ -412,7 +403,20 @@ export function resolveEmptyResponseRetryInstruction(params: { return null; } - return EMPTY_RESPONSE_RETRY_INSTRUCTION; + if ( + shouldApplyPlanningOnlyRetryGuard({ + provider: params.provider, + modelId: params.modelId, + executionContract: params.executionContract, + }) || + isZeroUsageEmptyStopAssistantTurn( + params.attempt.currentAttemptAssistant ?? params.attempt.lastAssistant ?? null, + ) + ) { + return EMPTY_RESPONSE_RETRY_INSTRUCTION; + } + + return null; } function shouldApplyPlanningOnlyRetryGuard(params: {