fix(agents): handle empty Claude stop turns

2026-05-06 06:40:44 +00:00 · 2026-04-26 03:22:36 +01:00
parent a44a3f9171
commit 90cd9fce85
10 changed files with 201 additions and 39 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -63,6 +63,9 @@ Docs: https://docs.openclaw.ai

 ### Fixes

+- Agents/Claude: treat zero-token empty `stop` turns as failed provider output,
+  retry once, repair replay, and allow configured model fallback instead of
+  preserving them as successful silent replies. Fixes #71880. Thanks @MagnaAI.
 - Diagnostics/OTEL: treat normal early model stream cleanup as a completed model call instead of exporting a misleading `StreamAbandoned` error span. Thanks @vincentkoc.
 - Gateway/pairing: stop corrupt or unreadable device/node pairing stores from being treated as empty state, preserving `paired.json` for repair instead of overwriting approved pairings. Fixes #71873. Thanks @iret77.
 - ACP: keep `/acp` management commands, plus local `/status` and `/unfocus`, on the Gateway path inside ACP-bound threads so they are not consumed as ACP prompt text. Fixes #66298. Thanks @kindomLee.
--- a/src/agents/model-fallback.test.ts
+++ b/src/agents/model-fallback.test.ts
@@ -582,6 +582,28 @@ describe("runWithModelFallback", () => {
    });
  });

+  it("classifies non-GPT incomplete terminal errors for configured fallback", () => {
+    const runResult: EmbeddedPiRunResult = {
+      payloads: [
+        { text: "⚠️ Agent couldn't generate a response. Please try again.", isError: true },
+      ],
+      meta: {
+        durationMs: 1,
+      },
+    };
+
+    expect(
+      classifyEmbeddedPiRunResultForModelFallback({
+        provider: "anthropic",
+        model: "claude-opus-4.7",
+        result: runResult,
+      }),
+    ).toMatchObject({
+      code: "incomplete_result",
+      reason: "format",
+    });
+  });
+
  it("keeps aborted harness-classified GPT-5 runs out of fallback", () => {
    const runResult: EmbeddedPiRunResult = {
      payloads: [],
--- a/src/agents/pi-embedded-runner/empty-assistant-turn.ts
+++ b/src/agents/pi-embedded-runner/empty-assistant-turn.ts
@@ -0,0 +1,57 @@
+type EmptyAssistantTurnLike = {
+  content?: unknown;
+  stopReason?: unknown;
+  usage?: unknown;
+};
+
+type UsageFieldMap = {
+  input?: unknown;
+  output?: unknown;
+  cacheRead?: unknown;
+  cacheWrite?: unknown;
+  total?: unknown;
+  totalTokens?: unknown;
+  total_tokens?: unknown;
+};
+
+// Upstream badlogic/pi-mono should normalize Anthropic zero-token empty `stop`
+// turns before OpenClaw sees them. Downstream: openclaw/openclaw#71880.
+function readFiniteTokenCount(value: unknown): number | undefined {
+  return typeof value === "number" && Number.isFinite(value) ? value : undefined;
+}
+
+function isZero(value: number | undefined): value is 0 {
+  return value === 0;
+}
+
+export function hasZeroTokenUsageSnapshot(usage: unknown): boolean {
+  if (!usage || typeof usage !== "object") {
+    return false;
+  }
+  const typed = usage as UsageFieldMap;
+  const input = readFiniteTokenCount(typed.input);
+  const output = readFiniteTokenCount(typed.output);
+  const cacheRead = readFiniteTokenCount(typed.cacheRead);
+  const cacheWrite = readFiniteTokenCount(typed.cacheWrite);
+  const total = readFiniteTokenCount(typed.total ?? typed.totalTokens ?? typed.total_tokens);
+  if (total !== undefined) {
+    return (
+      total === 0 &&
+      [input, output, cacheRead, cacheWrite].every((value) => value === undefined || value === 0)
+    );
+  }
+  const components = [input, output, cacheRead, cacheWrite].filter(
+    (value): value is number => value !== undefined,
+  );
+  return components.length > 0 && components.every(isZero);
+}
+
+export function isZeroUsageEmptyStopAssistantTurn(message: EmptyAssistantTurnLike | null): boolean {
+  return Boolean(
+    message &&
+    message.stopReason === "stop" &&
+    Array.isArray(message.content) &&
+    message.content.length === 0 &&
+    hasZeroTokenUsageSnapshot(message.usage),
+  );
+}
--- a/src/agents/pi-embedded-runner/replay-history.test.ts
+++ b/src/agents/pi-embedded-runner/replay-history.test.ts
@@ -7,6 +7,7 @@ const FALLBACK_TEXT = "[assistant turn failed before producing content]";
 function bedrockAssistant(
  content: unknown,
  stopReason: "error" | "stop" | "toolUse" | "length" = "error",
+  usageOverrides: Record<string, number> = {},
 ): AgentMessage {
  return {
    role: "assistant",
@@ -21,6 +22,7 @@ function bedrockAssistant(
      cacheWrite: 0,
      totalTokens: 0,
      cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+      ...usageOverrides,
    },
    stopReason,
    timestamp: 0,
@@ -60,19 +62,28 @@ describe("normalizeAssistantReplayContent", () => {
    expect(repaired.content).toEqual([{ type: "text", text: FALLBACK_TEXT }]);
  });

-  it("preserves silent-reply turns (stopReason=stop, content=[]) untouched", () => {
+  it("preserves nonzero-usage silent-reply turns (stopReason=stop, content=[]) untouched", () => {
    // run.empty-error-retry.test.ts treats `stopReason:"stop"` + `content:[]`
    // as a legitimate NO_REPLY / silent-reply, NOT a crash. Substituting the
    // failure sentinel here would inject a fabricated "[assistant turn failed
    // before producing content]" into the next provider request and change
    // model behavior even though no failure occurred.
-    const silentStop = bedrockAssistant([], "stop");
+    const silentStop = bedrockAssistant([], "stop", { input: 100, totalTokens: 100 });
    const messages = [userMessage("hello"), silentStop];
    const out = normalizeAssistantReplayContent(messages);
    expect(out).toBe(messages);
    expect(out[1]).toBe(silentStop);
  });

+  it("converts zero-usage empty stop turns to a replay sentinel", () => {
+    const falseSuccessStop = bedrockAssistant([], "stop");
+    const messages = [userMessage("hello"), falseSuccessStop];
+    const out = normalizeAssistantReplayContent(messages);
+    expect(out).not.toBe(messages);
+    const repaired = out[1] as AgentMessage & { content: { type: string; text: string }[] };
+    expect(repaired.content).toEqual([{ type: "text", text: FALLBACK_TEXT }]);
+  });
+
  it("preserves empty content with non-error stopReasons (toolUse, length) untouched", () => {
    // Boundary lock: only `stopReason:"error"` should trip the sentinel
    // substitution. `toolUse` and `length` are reachable in practice when a
--- a/src/agents/pi-embedded-runner/replay-history.ts
+++ b/src/agents/pi-embedded-runner/replay-history.ts
@@ -41,6 +41,7 @@ import {
  type AssistantUsageSnapshot,
  type UsageLike,
 } from "../usage.js";
+import { isZeroUsageEmptyStopAssistantTurn } from "./empty-assistant-turn.js";
 import { dropThinkingBlocks, stripInvalidThinkingSignatures } from "./thinking.js";

 const INTER_SESSION_PREFIX_BASE = "[Inter-session message]";
@@ -282,14 +283,16 @@ export function normalizeAssistantReplayContent(messages: AgentMessage[]): Agent
      // failure statement in the next provider request and change model
      // behavior even when no failure occurred.
      //
-      // Only `stopReason: "error"` turns are the Bedrock-Converse replay
-      // poison this fix is scoped to: the provider rejects assistant
-      // messages with no ContentBlock, and the persisted error turn was
-      // never going to render anything useful to the model anyway. Leaving
-      // non-error empty-content turns untouched preserves silent-reply
-      // semantics on every other code path.
+      // `stopReason: "error"` turns are Bedrock-Converse replay poison:
+      // the provider rejects assistant messages with no ContentBlock, and
+      // the persisted error turn was never going to render anything useful
+      // to the model anyway. A zero-token `stop` turn is the same shape from
+      // the next run's perspective: the provider produced no billable prompt
+      // or completion and no content. Leaving other non-error empty-content
+      // turns untouched preserves silent-reply semantics on every other code
+      // path.
      const stopReason = (message as { stopReason?: unknown }).stopReason;
-      if (stopReason === "error") {
+      if (stopReason === "error" || isZeroUsageEmptyStopAssistantTurn(message)) {
        out.push({
          ...message,
          content: [{ type: "text", text: STREAM_ERROR_FALLBACK_TEXT }],
--- a/src/agents/pi-embedded-runner/result-fallback-classifier.ts
+++ b/src/agents/pi-embedded-runner/result-fallback-classifier.ts
@@ -83,7 +83,7 @@ export function classifyEmbeddedPiRunResultForModelFallback(params: {
  hasDirectlySentBlockReply?: boolean;
  hasBlockReplyPipelineOutput?: boolean;
 }): ModelFallbackResultClassification {
-  if (!isGpt5ModelId(params.model) || !isEmbeddedPiRunResult(params.result)) {
+  if (!isEmbeddedPiRunResult(params.result)) {
    return null;
  }
  if (
@@ -108,6 +108,22 @@ export function classifyEmbeddedPiRunResultForModelFallback(params: {
  }

  const payloads = params.result.payloads ?? [];
+  const errorText = payloads
+    .filter((payload) => payload?.isError === true)
+    .map((payload) => (typeof payload.text === "string" ? payload.text : ""))
+    .join("\n");
+  if (EMPTY_TERMINAL_REPLY_RE.test(errorText)) {
+    return {
+      message: `${params.provider}/${params.model} ended with an incomplete terminal response`,
+      reason: "format",
+      code: "incomplete_result",
+    };
+  }
+
+  if (!isGpt5ModelId(params.model)) {
+    return null;
+  }
+
  if (payloads.length === 0 && hasDeliberateSilentTerminalReply(params.result)) {
    return null;
  }
@@ -126,10 +142,6 @@ export function classifyEmbeddedPiRunResultForModelFallback(params: {
    };
  }

-  const errorText = payloads
-    .filter((payload) => payload?.isError === true)
-    .map((payload) => (typeof payload.text === "string" ? payload.text : ""))
-    .join("\n");
  if (PLAN_ONLY_TERMINAL_REPLY_RE.test(errorText)) {
    return {
      message: `${params.provider}/${params.model} exhausted plan-only retries without taking action`,
--- a/src/agents/pi-embedded-runner/run.empty-error-retry.test.ts
+++ b/src/agents/pi-embedded-runner/run.empty-error-retry.test.ts
@@ -14,10 +14,9 @@ import type { EmbeddedRunAttemptResult } from "./run/types.js";
 //
 // Symptom: ollama/glm-5.1 occasionally ends a turn with stopReason="error" and
 // zero output tokens after a successful tool-call sequence. The user sees no
-// reply and has to nudge. The existing empty-response retry path is gated on
-// the strict-agentic contract (gpt-5 only), so non-frontier models fell
-// through to "incomplete turn detected". This suite locks in a narrower,
-// model-agnostic resubmission.
+// reply and has to nudge. This suite locks in a narrower model-agnostic
+// resubmission for errored turns, separate from the visible-answer retry used
+// for stopReason="stop" empty zero-token turns.

 let runEmbeddedPiAgent: typeof import("./run.js").runEmbeddedPiAgent;

--- a/src/agents/pi-embedded-runner/run.incomplete-turn.test.ts
+++ b/src/agents/pi-embedded-runner/run.incomplete-turn.test.ts
@@ -441,6 +441,60 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => {
    expect(mockedLog.warn).toHaveBeenCalledWith(expect.stringContaining("empty response detected"));
  });

+  it("retries zero-token empty Claude stop turns with a visible-answer continuation instruction", async () => {
+    mockedClassifyFailoverReason.mockReturnValue(null);
+    mockedRunEmbeddedAttempt.mockResolvedValueOnce(
+      makeAttemptResult({
+        assistantTexts: [],
+        lastAssistant: {
+          role: "assistant",
+          stopReason: "stop",
+          provider: "anthropic",
+          model: "claude-opus-4.7",
+          content: [],
+          usage: {
+            input: 0,
+            output: 0,
+            cacheRead: 0,
+            cacheWrite: 0,
+            totalTokens: 0,
+          },
+        } as unknown as EmbeddedRunAttemptResult["lastAssistant"],
+      }),
+    );
+    mockedRunEmbeddedAttempt.mockResolvedValueOnce(
+      makeAttemptResult({
+        assistantTexts: ["Visible Claude answer."],
+        lastAssistant: {
+          role: "assistant",
+          stopReason: "stop",
+          provider: "anthropic",
+          model: "claude-opus-4.7",
+          content: [{ type: "text", text: "Visible Claude answer." }],
+          usage: {
+            input: 100,
+            output: 5,
+            cacheRead: 0,
+            cacheWrite: 0,
+            totalTokens: 105,
+          },
+        } as unknown as EmbeddedRunAttemptResult["lastAssistant"],
+      }),
+    );
+
+    await runEmbeddedPiAgent({
+      ...overflowBaseRunParams,
+      provider: "anthropic",
+      model: "claude-opus-4.7",
+      runId: "run-empty-zero-usage-claude-continuation",
+    });
+
+    expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(2);
+    const secondCall = mockedRunEmbeddedAttempt.mock.calls[1]?.[0] as { prompt?: string };
+    expect(secondCall.prompt).toContain(EMPTY_RESPONSE_RETRY_INSTRUCTION);
+    expect(mockedLog.warn).toHaveBeenCalledWith(expect.stringContaining("empty response detected"));
+  });
+
  it("surfaces an error after exhausting empty-response retries", async () => {
    mockedClassifyFailoverReason.mockReturnValue(null);
    mockedRunEmbeddedAttempt.mockResolvedValue(
--- a/src/agents/pi-embedded-runner/run.ts
+++ b/src/agents/pi-embedded-runner/run.ts
@@ -617,9 +617,9 @@ export async function runEmbeddedPiAgent(
      let timeoutCompactionAttempts = 0;
      // Silent-error retry: non-strict-agentic models (e.g. ollama/glm-5.1) can
      // end a turn with stopReason="error" + zero output tokens, producing no
-      // user-visible text. The existing empty-response retry is gated on
-      // isStrictAgenticSupportedProviderModel (gpt-5 only). This is an
-      // orthogonal, model-agnostic resubmission.
+      // user-visible text. This is an orthogonal, model-agnostic resubmission
+      // for errored turns; stopReason="stop" empty zero-token turns use the
+      // visible-answer retry instruction instead.
      const MAX_EMPTY_ERROR_RETRIES = 3;
      let emptyErrorRetries = 0;
      const overloadFailoverBackoffMs = resolveOverloadFailoverBackoffMs(params.config);
@@ -2089,13 +2089,10 @@ export async function runEmbeddedPiAgent(
          // ── silent-error retry ────────────────────────────────────────────
          // Observed with ollama/glm-5.1: a turn can end with stopReason="error"
          // and zero output tokens AND empty content after a successful
-          // tool-call sequence, producing no user-visible text at all. The
-          // existing empty-response retry path (resolveEmptyResponseRetryInstruction)
-          // is gated on the strict-agentic contract (gpt-5 only), so non-frontier
-          // models fall through to "incomplete turn detected" → silent gap
-          // until the user nudges. This is a narrower, model-agnostic
-          // resubmission: same prompt, same session transcript (tool results
-          // already captured), no instruction injection. Placed before the
+          // tool-call sequence, producing no user-visible text at all. This
+          // path is narrower than the empty-response continuation retry:
+          // same prompt, same session transcript (tool results already
+          // captured), no instruction injection. Placed before the
          // incompleteTurnText return so it actually gets a chance to fire.
          //
          // Content-empty guard: a reasoning-only error (content has thinking
--- a/src/agents/pi-embedded-runner/run/incomplete-turn.ts
+++ b/src/agents/pi-embedded-runner/run/incomplete-turn.ts
@@ -7,6 +7,7 @@ import {
  stripProviderPrefix,
 } from "../../execution-contract.js";
 import { isLikelyMutatingToolName } from "../../tool-mutation.js";
+import { isZeroUsageEmptyStopAssistantTurn } from "../empty-assistant-turn.js";
 import { assessLastAssistantMessage } from "../thinking.js";
 import type { EmbeddedRunLivenessState } from "../types.js";
 import type { EmbeddedRunAttemptResult } from "./types.js";
@@ -393,16 +394,6 @@ export function resolveEmptyResponseRetryInstruction(params: {
    return null;
  }

-  if (
-    !shouldApplyPlanningOnlyRetryGuard({
-      provider: params.provider,
-      modelId: params.modelId,
-      executionContract: params.executionContract,
-    })
-  ) {
-    return null;
-  }
-
  if (
    !isEmptyResponseAssistantTurn({
      payloadCount: params.payloadCount,
@@ -412,7 +403,20 @@ export function resolveEmptyResponseRetryInstruction(params: {
    return null;
  }

-  return EMPTY_RESPONSE_RETRY_INSTRUCTION;
+  if (
+    shouldApplyPlanningOnlyRetryGuard({
+      provider: params.provider,
+      modelId: params.modelId,
+      executionContract: params.executionContract,
+    }) ||
+    isZeroUsageEmptyStopAssistantTurn(
+      params.attempt.currentAttemptAssistant ?? params.attempt.lastAssistant ?? null,
+    )
+  ) {
+    return EMPTY_RESPONSE_RETRY_INSTRUCTION;
+  }
+
+  return null;
 }

 function shouldApplyPlanningOnlyRetryGuard(params: {