From fa0d81ed1332c219226299d8bd2d4449ddcac909 Mon Sep 17 00:00:00 2001
From: Peter Steinberger <steipete@gmail.com>
Date: Mon, 27 Apr 2026 11:24:07 +0100
Subject: [PATCH] fix(agents): retry empty openai-compatible turns

---
 CHANGELOG.md                                  |  1 +
 docs/gateway/troubleshooting.md               |  2 ++
 .../run.incomplete-turn.test.ts               | 24 +++++++++++++++++++
 src/agents/pi-embedded-runner/run.ts          |  2 ++
 .../pi-embedded-runner/run/incomplete-turn.ts |  8 +++++++
 5 files changed, 37 insertions(+)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5880db9da44..739ea4821b9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -20,6 +20,7 @@ Docs: https://docs.openclaw.ai
 ### Fixes
 
 - Channels/setup: treat bundled channel plugins as already bundled during `channels add` and onboarding, enabling them without writing redundant `plugins.load.paths` entries or path install records. Fixes #72740. Thanks @iCodePoet.
+- Agents/OpenAI-compatible: retry replay-safe empty `stop` turns once for `openai-completions` endpoints, so transient empty local backend responses no longer surface as “Agent couldn't generate a response” when a continuation succeeds. Fixes #72751. Thanks @moooV252.
 - Git hooks: skip ignored staged paths when formatting and restaging pre-commit files, so merge commits no longer abort when `.gitignore` newly ignores staged merged content. Fixes #72744. Thanks @100yenadmin.
 - Memory-core/dreaming: add a supported `dreaming.model` knob for Dream Diary narrative subagents, wired through phase config and the existing plugin subagent model-override trust gate. Refs #65963. Thanks @esqandil and @mjamiv.
 - Memory-core/dreaming: treat request-scoped narrative fallback as expected, skip session cleanup when no subagent run was created, and remove duplicate phase-level cleanup so fallback no longer emits warning noise. Fixes #67152. Thanks @jsompis.
diff --git a/docs/gateway/troubleshooting.md b/docs/gateway/troubleshooting.md
index ef2c6d6295e..7fca62f86a6 100644
--- a/docs/gateway/troubleshooting.md
+++ b/docs/gateway/troubleshooting.md
@@ -119,11 +119,13 @@ Look for:
 
 - direct tiny calls succeed, but OpenClaw runs fail only on larger prompts
 - backend errors about `messages[].content` expecting a string
+- intermittent `incomplete turn detected ... stopReason=stop payloads=0` warnings with an OpenAI-compatible local backend
 - backend crashes that appear only with larger prompt-token counts or full agent runtime prompts
 
 <AccordionGroup>
   <Accordion title="Common signatures">
     - `messages[...].content: invalid type: sequence, expected a string` → backend rejects structured Chat Completions content parts. Fix: set `models.providers.<provider>.models[].compat.requiresStringContent: true`.
+    - `incomplete turn detected ... stopReason=stop payloads=0` → the backend completed the Chat Completions request but returned no user-visible assistant text for that turn. OpenClaw retries replay-safe empty OpenAI-compatible turns once; persistent failures usually mean the backend is emitting empty/non-text content or suppressing final-answer text.
     - direct tiny requests succeed, but OpenClaw agent runs fail with backend/model crashes (for example Gemma on some `inferrs` builds) → OpenClaw transport is likely already correct; the backend is failing on the larger agent-runtime prompt shape.
     - failures shrink after disabling tools but do not disappear → tool schemas were part of the pressure, but the remaining issue is still upstream model/server capacity or a backend bug.
   </Accordion>
diff --git a/src/agents/pi-embedded-runner/run.incomplete-turn.test.ts b/src/agents/pi-embedded-runner/run.incomplete-turn.test.ts
index c0f1f77589d..29c2c72fa29 100644
--- a/src/agents/pi-embedded-runner/run.incomplete-turn.test.ts
+++ b/src/agents/pi-embedded-runner/run.incomplete-turn.test.ts
@@ -1080,6 +1080,30 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => {
     expect(retryInstruction).toBe(EMPTY_RESPONSE_RETRY_INSTRUCTION);
   });
 
+  it("retries generic empty OpenAI-compatible turns from custom endpoints", () => {
+    const retryInstruction = resolveEmptyResponseRetryInstruction({
+      provider: "llama-cpp-local",
+      modelId: "qwen3.6-27b",
+      modelApi: "openai-completions",
+      payloadCount: 0,
+      aborted: false,
+      timedOut: false,
+      attempt: makeAttemptResult({
+        assistantTexts: [],
+        lastAssistant: {
+          role: "assistant",
+          stopReason: "stop",
+          provider: "llama-cpp-local",
+          model: "qwen3.6-27b",
+          content: [],
+          usage: { input: 950, output: 103, totalTokens: 1053 },
+        } as unknown as EmbeddedRunAttemptResult["lastAssistant"],
+      }),
+    });
+
+    expect(retryInstruction).toBe(EMPTY_RESPONSE_RETRY_INSTRUCTION);
+  });
+
   it("does not retry clean zero-token Ollama stop turns", () => {
     const retryInstruction = resolveEmptyResponseRetryInstruction({
       provider: "ollama",
diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts
index a3370fa9e04..f8240826be7 100644
--- a/src/agents/pi-embedded-runner/run.ts
+++ b/src/agents/pi-embedded-runner/run.ts
@@ -1980,6 +1980,7 @@ export async function runEmbeddedPiAgent(
             : resolveReasoningOnlyRetryInstruction({
                 provider: activeErrorContext.provider,
                 modelId: activeErrorContext.model,
+                modelApi: effectiveModel.api,
                 executionContract,
                 aborted,
                 timedOut,
@@ -1990,6 +1991,7 @@ export async function runEmbeddedPiAgent(
             : resolveEmptyResponseRetryInstruction({
                 provider: activeErrorContext.provider,
                 modelId: activeErrorContext.model,
+                modelApi: effectiveModel.api,
                 executionContract,
                 payloadCount,
                 aborted,
diff --git a/src/agents/pi-embedded-runner/run/incomplete-turn.ts b/src/agents/pi-embedded-runner/run/incomplete-turn.ts
index 112dfecd1c3..56f00c74851 100644
--- a/src/agents/pi-embedded-runner/run/incomplete-turn.ts
+++ b/src/agents/pi-embedded-runner/run/incomplete-turn.ts
@@ -506,6 +506,7 @@ export function shouldTreatEmptyAssistantReplyAsSilent(params: {
 export function resolveReasoningOnlyRetryInstruction(params: {
   provider?: string;
   modelId?: string;
+  modelApi?: string;
   executionContract?: string;
   aborted: boolean;
   timedOut: boolean;
@@ -519,6 +520,7 @@ export function resolveReasoningOnlyRetryInstruction(params: {
     !shouldApplyNonVisibleTurnRetryGuard({
       provider: params.provider,
       modelId: params.modelId,
+      modelApi: params.modelApi,
       executionContract: params.executionContract,
     })
   ) {
@@ -542,6 +544,7 @@ export function resolveReasoningOnlyRetryInstruction(params: {
 export function resolveEmptyResponseRetryInstruction(params: {
   provider?: string;
   modelId?: string;
+  modelApi?: string;
   executionContract?: string;
   payloadCount: number;
   aborted: boolean;
@@ -575,6 +578,7 @@ export function resolveEmptyResponseRetryInstruction(params: {
     shouldApplyNonVisibleTurnRetryGuard({
       provider: params.provider,
       modelId: params.modelId,
+      modelApi: params.modelApi,
       executionContract: params.executionContract,
     }) ||
     // Keep the generic zero-usage stop retry for providers that expose a
@@ -605,11 +609,15 @@ function shouldApplyPlanningOnlyRetryGuard(params: {
 function shouldApplyNonVisibleTurnRetryGuard(params: {
   provider?: string;
   modelId?: string;
+  modelApi?: string;
   executionContract?: string;
 }): boolean {
   if (shouldApplyPlanningOnlyRetryGuard(params)) {
     return true;
   }
+  if (params.modelApi === "openai-completions") {
+    return true;
+  }
   // Non-visible final turns are narrower than planning-only turns: there is no
   // user text to classify, just a replay-safe empty/thinking-only result. Ollama
   // gets this continuation guard without getting the planning-only or ack