diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2149a303e78..aa7636f0a07 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -63,6 +63,9 @@ Docs: https://docs.openclaw.ai
 
 ### Fixes
 
+- Agents/Claude: treat zero-token empty `stop` turns as failed provider output,
+  retry once, repair replay, and allow configured model fallback instead of
+  preserving them as successful silent replies. Fixes #71880. Thanks @MagnaAI.
 - Diagnostics/OTEL: treat normal early model stream cleanup as a completed model call instead of exporting a misleading `StreamAbandoned` error span. Thanks @vincentkoc.
 - Gateway/pairing: stop corrupt or unreadable device/node pairing stores from being treated as empty state, preserving `paired.json` for repair instead of overwriting approved pairings. Fixes #71873. Thanks @iret77.
 - ACP: keep `/acp` management commands, plus local `/status` and `/unfocus`, on the Gateway path inside ACP-bound threads so they are not consumed as ACP prompt text. Fixes #66298. Thanks @kindomLee.
diff --git a/src/agents/model-fallback.test.ts b/src/agents/model-fallback.test.ts
index e9be6319e88..f9ea9a8e045 100644
--- a/src/agents/model-fallback.test.ts
+++ b/src/agents/model-fallback.test.ts
@@ -582,6 +582,28 @@ describe("runWithModelFallback", () => {
     });
   });
 
+  it("classifies non-GPT incomplete terminal errors for configured fallback", () => {
+    const runResult: EmbeddedPiRunResult = {
+      payloads: [
+        { text: "⚠️ Agent couldn't generate a response. Please try again.", isError: true },
+      ],
+      meta: {
+        durationMs: 1,
+      },
+    };
+
+    expect(
+      classifyEmbeddedPiRunResultForModelFallback({
+        provider: "anthropic",
+        model: "claude-opus-4.7",
+        result: runResult,
+      }),
+    ).toMatchObject({
+      code: "incomplete_result",
+      reason: "format",
+    });
+  });
+
   it("keeps aborted harness-classified GPT-5 runs out of fallback", () => {
     const runResult: EmbeddedPiRunResult = {
       payloads: [],
diff --git a/src/agents/pi-embedded-runner/empty-assistant-turn.ts b/src/agents/pi-embedded-runner/empty-assistant-turn.ts
new file mode 100644
index 00000000000..6235aec7bb5
--- /dev/null
+++ b/src/agents/pi-embedded-runner/empty-assistant-turn.ts
@@ -0,0 +1,57 @@
+type EmptyAssistantTurnLike = {
+  content?: unknown;
+  stopReason?: unknown;
+  usage?: unknown;
+};
+
+type UsageFieldMap = {
+  input?: unknown;
+  output?: unknown;
+  cacheRead?: unknown;
+  cacheWrite?: unknown;
+  total?: unknown;
+  totalTokens?: unknown;
+  total_tokens?: unknown;
+};
+
+// Upstream badlogic/pi-mono should normalize Anthropic zero-token empty `stop`
+// turns before OpenClaw sees them. Downstream: openclaw/openclaw#71880.
+function readFiniteTokenCount(value: unknown): number | undefined {
+  return typeof value === "number" && Number.isFinite(value) ? value : undefined;
+}
+
+function isZero(value: number | undefined): value is 0 {
+  return value === 0;
+}
+
+export function hasZeroTokenUsageSnapshot(usage: unknown): boolean {
+  if (!usage || typeof usage !== "object") {
+    return false;
+  }
+  const typed = usage as UsageFieldMap;
+  const input = readFiniteTokenCount(typed.input);
+  const output = readFiniteTokenCount(typed.output);
+  const cacheRead = readFiniteTokenCount(typed.cacheRead);
+  const cacheWrite = readFiniteTokenCount(typed.cacheWrite);
+  const total = readFiniteTokenCount(typed.total ?? typed.totalTokens ?? typed.total_tokens);
+  if (total !== undefined) {
+    return (
+      total === 0 &&
+      [input, output, cacheRead, cacheWrite].every((value) => value === undefined || value === 0)
+    );
+  }
+  const components = [input, output, cacheRead, cacheWrite].filter(
+    (value): value is number => value !== undefined,
+  );
+  return components.length > 0 && components.every(isZero);
+}
+
+export function isZeroUsageEmptyStopAssistantTurn(message: EmptyAssistantTurnLike | null): boolean {
+  return Boolean(
+    message &&
+    message.stopReason === "stop" &&
+    Array.isArray(message.content) &&
+    message.content.length === 0 &&
+    hasZeroTokenUsageSnapshot(message.usage),
+  );
+}
diff --git a/src/agents/pi-embedded-runner/replay-history.test.ts b/src/agents/pi-embedded-runner/replay-history.test.ts
index a294cb060a2..5942e9189d4 100644
--- a/src/agents/pi-embedded-runner/replay-history.test.ts
+++ b/src/agents/pi-embedded-runner/replay-history.test.ts
@@ -7,6 +7,7 @@ const FALLBACK_TEXT = "[assistant turn failed before producing content]";
 function bedrockAssistant(
   content: unknown,
   stopReason: "error" | "stop" | "toolUse" | "length" = "error",
+  usageOverrides: Record<string, number> = {},
 ): AgentMessage {
   return {
     role: "assistant",
@@ -21,6 +22,7 @@ function bedrockAssistant(
       cacheWrite: 0,
       totalTokens: 0,
       cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+      ...usageOverrides,
     },
     stopReason,
     timestamp: 0,
@@ -60,19 +62,28 @@ describe("normalizeAssistantReplayContent", () => {
     expect(repaired.content).toEqual([{ type: "text", text: FALLBACK_TEXT }]);
   });
 
-  it("preserves silent-reply turns (stopReason=stop, content=[]) untouched", () => {
+  it("preserves nonzero-usage silent-reply turns (stopReason=stop, content=[]) untouched", () => {
     // run.empty-error-retry.test.ts treats `stopReason:"stop"` + `content:[]`
     // as a legitimate NO_REPLY / silent-reply, NOT a crash. Substituting the
     // failure sentinel here would inject a fabricated "[assistant turn failed
     // before producing content]" into the next provider request and change
     // model behavior even though no failure occurred.
-    const silentStop = bedrockAssistant([], "stop");
+    const silentStop = bedrockAssistant([], "stop", { input: 100, totalTokens: 100 });
     const messages = [userMessage("hello"), silentStop];
     const out = normalizeAssistantReplayContent(messages);
     expect(out).toBe(messages);
     expect(out[1]).toBe(silentStop);
   });
 
+  it("converts zero-usage empty stop turns to a replay sentinel", () => {
+    const falseSuccessStop = bedrockAssistant([], "stop");
+    const messages = [userMessage("hello"), falseSuccessStop];
+    const out = normalizeAssistantReplayContent(messages);
+    expect(out).not.toBe(messages);
+    const repaired = out[1] as AgentMessage & { content: { type: string; text: string }[] };
+    expect(repaired.content).toEqual([{ type: "text", text: FALLBACK_TEXT }]);
+  });
+
   it("preserves empty content with non-error stopReasons (toolUse, length) untouched", () => {
     // Boundary lock: only `stopReason:"error"` should trip the sentinel
     // substitution. `toolUse` and `length` are reachable in practice when a
diff --git a/src/agents/pi-embedded-runner/replay-history.ts b/src/agents/pi-embedded-runner/replay-history.ts
index cc2676b9f77..7310161399b 100644
--- a/src/agents/pi-embedded-runner/replay-history.ts
+++ b/src/agents/pi-embedded-runner/replay-history.ts
@@ -41,6 +41,7 @@ import {
   type AssistantUsageSnapshot,
   type UsageLike,
 } from "../usage.js";
+import { isZeroUsageEmptyStopAssistantTurn } from "./empty-assistant-turn.js";
 import { dropThinkingBlocks, stripInvalidThinkingSignatures } from "./thinking.js";
 
 const INTER_SESSION_PREFIX_BASE = "[Inter-session message]";
@@ -282,14 +283,16 @@ export function normalizeAssistantReplayContent(messages: AgentMessage[]): Agent
       // failure statement in the next provider request and change model
       // behavior even when no failure occurred.
       //
-      // Only `stopReason: "error"` turns are the Bedrock-Converse replay
-      // poison this fix is scoped to: the provider rejects assistant
-      // messages with no ContentBlock, and the persisted error turn was
-      // never going to render anything useful to the model anyway. Leaving
-      // non-error empty-content turns untouched preserves silent-reply
-      // semantics on every other code path.
+      // `stopReason: "error"` turns are Bedrock-Converse replay poison:
+      // the provider rejects assistant messages with no ContentBlock, and
+      // the persisted error turn was never going to render anything useful
+      // to the model anyway. A zero-token `stop` turn is the same shape from
+      // the next run's perspective: the provider produced no billable prompt
+      // or completion and no content. Leaving other non-error empty-content
+      // turns untouched preserves silent-reply semantics on every other code
+      // path.
       const stopReason = (message as { stopReason?: unknown }).stopReason;
-      if (stopReason === "error") {
+      if (stopReason === "error" || isZeroUsageEmptyStopAssistantTurn(message)) {
         out.push({
           ...message,
           content: [{ type: "text", text: STREAM_ERROR_FALLBACK_TEXT }],
diff --git a/src/agents/pi-embedded-runner/result-fallback-classifier.ts b/src/agents/pi-embedded-runner/result-fallback-classifier.ts
index 9373aa7b91c..a202d7066f9 100644
--- a/src/agents/pi-embedded-runner/result-fallback-classifier.ts
+++ b/src/agents/pi-embedded-runner/result-fallback-classifier.ts
@@ -83,7 +83,7 @@ export function classifyEmbeddedPiRunResultForModelFallback(params: {
   hasDirectlySentBlockReply?: boolean;
   hasBlockReplyPipelineOutput?: boolean;
 }): ModelFallbackResultClassification {
-  if (!isGpt5ModelId(params.model) || !isEmbeddedPiRunResult(params.result)) {
+  if (!isEmbeddedPiRunResult(params.result)) {
     return null;
   }
   if (
@@ -108,6 +108,22 @@ export function classifyEmbeddedPiRunResultForModelFallback(params: {
   }
 
   const payloads = params.result.payloads ?? [];
+  const errorText = payloads
+    .filter((payload) => payload?.isError === true)
+    .map((payload) => (typeof payload.text === "string" ? payload.text : ""))
+    .join("\n");
+  if (EMPTY_TERMINAL_REPLY_RE.test(errorText)) {
+    return {
+      message: `${params.provider}/${params.model} ended with an incomplete terminal response`,
+      reason: "format",
+      code: "incomplete_result",
+    };
+  }
+
+  if (!isGpt5ModelId(params.model)) {
+    return null;
+  }
+
   if (payloads.length === 0 && hasDeliberateSilentTerminalReply(params.result)) {
     return null;
   }
@@ -126,10 +142,6 @@ export function classifyEmbeddedPiRunResultForModelFallback(params: {
     };
   }
 
-  const errorText = payloads
-    .filter((payload) => payload?.isError === true)
-    .map((payload) => (typeof payload.text === "string" ? payload.text : ""))
-    .join("\n");
   if (PLAN_ONLY_TERMINAL_REPLY_RE.test(errorText)) {
     return {
       message: `${params.provider}/${params.model} exhausted plan-only retries without taking action`,
diff --git a/src/agents/pi-embedded-runner/run.empty-error-retry.test.ts b/src/agents/pi-embedded-runner/run.empty-error-retry.test.ts
index a3c714bbb9d..b1cab8458e9 100644
--- a/src/agents/pi-embedded-runner/run.empty-error-retry.test.ts
+++ b/src/agents/pi-embedded-runner/run.empty-error-retry.test.ts
@@ -14,10 +14,9 @@ import type { EmbeddedRunAttemptResult } from "./run/types.js";
 //
 // Symptom: ollama/glm-5.1 occasionally ends a turn with stopReason="error" and
 // zero output tokens after a successful tool-call sequence. The user sees no
-// reply and has to nudge. The existing empty-response retry path is gated on
-// the strict-agentic contract (gpt-5 only), so non-frontier models fell
-// through to "incomplete turn detected". This suite locks in a narrower,
-// model-agnostic resubmission.
+// reply and has to nudge. This suite locks in a narrower model-agnostic
+// resubmission for errored turns, separate from the visible-answer retry used
+// for stopReason="stop" empty zero-token turns.
 
 let runEmbeddedPiAgent: typeof import("./run.js").runEmbeddedPiAgent;
 
diff --git a/src/agents/pi-embedded-runner/run.incomplete-turn.test.ts b/src/agents/pi-embedded-runner/run.incomplete-turn.test.ts
index 88546d6a4c3..23054174fce 100644
--- a/src/agents/pi-embedded-runner/run.incomplete-turn.test.ts
+++ b/src/agents/pi-embedded-runner/run.incomplete-turn.test.ts
@@ -441,6 +441,60 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => {
     expect(mockedLog.warn).toHaveBeenCalledWith(expect.stringContaining("empty response detected"));
   });
 
+  it("retries zero-token empty Claude stop turns with a visible-answer continuation instruction", async () => {
+    mockedClassifyFailoverReason.mockReturnValue(null);
+    mockedRunEmbeddedAttempt.mockResolvedValueOnce(
+      makeAttemptResult({
+        assistantTexts: [],
+        lastAssistant: {
+          role: "assistant",
+          stopReason: "stop",
+          provider: "anthropic",
+          model: "claude-opus-4.7",
+          content: [],
+          usage: {
+            input: 0,
+            output: 0,
+            cacheRead: 0,
+            cacheWrite: 0,
+            totalTokens: 0,
+          },
+        } as unknown as EmbeddedRunAttemptResult["lastAssistant"],
+      }),
+    );
+    mockedRunEmbeddedAttempt.mockResolvedValueOnce(
+      makeAttemptResult({
+        assistantTexts: ["Visible Claude answer."],
+        lastAssistant: {
+          role: "assistant",
+          stopReason: "stop",
+          provider: "anthropic",
+          model: "claude-opus-4.7",
+          content: [{ type: "text", text: "Visible Claude answer." }],
+          usage: {
+            input: 100,
+            output: 5,
+            cacheRead: 0,
+            cacheWrite: 0,
+            totalTokens: 105,
+          },
+        } as unknown as EmbeddedRunAttemptResult["lastAssistant"],
+      }),
+    );
+
+    await runEmbeddedPiAgent({
+      ...overflowBaseRunParams,
+      provider: "anthropic",
+      model: "claude-opus-4.7",
+      runId: "run-empty-zero-usage-claude-continuation",
+    });
+
+    expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(2);
+    const secondCall = mockedRunEmbeddedAttempt.mock.calls[1]?.[0] as { prompt?: string };
+    expect(secondCall.prompt).toContain(EMPTY_RESPONSE_RETRY_INSTRUCTION);
+    expect(mockedLog.warn).toHaveBeenCalledWith(expect.stringContaining("empty response detected"));
+  });
+
   it("surfaces an error after exhausting empty-response retries", async () => {
     mockedClassifyFailoverReason.mockReturnValue(null);
     mockedRunEmbeddedAttempt.mockResolvedValue(
diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts
index 78305ab4b03..d1ff60bec63 100644
--- a/src/agents/pi-embedded-runner/run.ts
+++ b/src/agents/pi-embedded-runner/run.ts
@@ -617,9 +617,9 @@ export async function runEmbeddedPiAgent(
       let timeoutCompactionAttempts = 0;
       // Silent-error retry: non-strict-agentic models (e.g. ollama/glm-5.1) can
       // end a turn with stopReason="error" + zero output tokens, producing no
-      // user-visible text. The existing empty-response retry is gated on
-      // isStrictAgenticSupportedProviderModel (gpt-5 only). This is an
-      // orthogonal, model-agnostic resubmission.
+      // user-visible text. This is an orthogonal, model-agnostic resubmission
+      // for errored turns; stopReason="stop" empty zero-token turns use the
+      // visible-answer retry instruction instead.
       const MAX_EMPTY_ERROR_RETRIES = 3;
       let emptyErrorRetries = 0;
       const overloadFailoverBackoffMs = resolveOverloadFailoverBackoffMs(params.config);
@@ -2089,13 +2089,10 @@ export async function runEmbeddedPiAgent(
           // ── silent-error retry ────────────────────────────────────────────
           // Observed with ollama/glm-5.1: a turn can end with stopReason="error"
           // and zero output tokens AND empty content after a successful
-          // tool-call sequence, producing no user-visible text at all. The
-          // existing empty-response retry path (resolveEmptyResponseRetryInstruction)
-          // is gated on the strict-agentic contract (gpt-5 only), so non-frontier
-          // models fall through to "incomplete turn detected" → silent gap
-          // until the user nudges. This is a narrower, model-agnostic
-          // resubmission: same prompt, same session transcript (tool results
-          // already captured), no instruction injection. Placed before the
+          // tool-call sequence, producing no user-visible text at all. This
+          // path is narrower than the empty-response continuation retry:
+          // same prompt, same session transcript (tool results already
+          // captured), no instruction injection. Placed before the
           // incompleteTurnText return so it actually gets a chance to fire.
           //
           // Content-empty guard: a reasoning-only error (content has thinking
diff --git a/src/agents/pi-embedded-runner/run/incomplete-turn.ts b/src/agents/pi-embedded-runner/run/incomplete-turn.ts
index 0dde8fc53e3..1038aa9b71e 100644
--- a/src/agents/pi-embedded-runner/run/incomplete-turn.ts
+++ b/src/agents/pi-embedded-runner/run/incomplete-turn.ts
@@ -7,6 +7,7 @@ import {
   stripProviderPrefix,
 } from "../../execution-contract.js";
 import { isLikelyMutatingToolName } from "../../tool-mutation.js";
+import { isZeroUsageEmptyStopAssistantTurn } from "../empty-assistant-turn.js";
 import { assessLastAssistantMessage } from "../thinking.js";
 import type { EmbeddedRunLivenessState } from "../types.js";
 import type { EmbeddedRunAttemptResult } from "./types.js";
@@ -393,16 +394,6 @@ export function resolveEmptyResponseRetryInstruction(params: {
     return null;
   }
 
-  if (
-    !shouldApplyPlanningOnlyRetryGuard({
-      provider: params.provider,
-      modelId: params.modelId,
-      executionContract: params.executionContract,
-    })
-  ) {
-    return null;
-  }
-
   if (
     !isEmptyResponseAssistantTurn({
       payloadCount: params.payloadCount,
@@ -412,7 +403,20 @@ export function resolveEmptyResponseRetryInstruction(params: {
     return null;
   }
 
-  return EMPTY_RESPONSE_RETRY_INSTRUCTION;
+  if (
+    shouldApplyPlanningOnlyRetryGuard({
+      provider: params.provider,
+      modelId: params.modelId,
+      executionContract: params.executionContract,
+    }) ||
+    isZeroUsageEmptyStopAssistantTurn(
+      params.attempt.currentAttemptAssistant ?? params.attempt.lastAssistant ?? null,
+    )
+  ) {
+    return EMPTY_RESPONSE_RETRY_INSTRUCTION;
+  }
+
+  return null;
 }
 
 function shouldApplyPlanningOnlyRetryGuard(params: {