fix(agents): retry empty openai-compatible turns

2026-05-06 07:40:44 +00:00 · 2026-04-27 11:24:07 +01:00
parent f820f89f14
commit fa0d81ed13
5 changed files with 37 additions and 0 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -20,6 +20,7 @@ Docs: https://docs.openclaw.ai
 ### Fixes

 - Channels/setup: treat bundled channel plugins as already bundled during `channels add` and onboarding, enabling them without writing redundant `plugins.load.paths` entries or path install records. Fixes #72740. Thanks @iCodePoet.
+- Agents/OpenAI-compatible: retry replay-safe empty `stop` turns once for `openai-completions` endpoints, so transient empty local backend responses no longer surface as “Agent couldn't generate a response” when a continuation succeeds. Fixes #72751. Thanks @moooV252.
 - Git hooks: skip ignored staged paths when formatting and restaging pre-commit files, so merge commits no longer abort when `.gitignore` newly ignores staged merged content. Fixes #72744. Thanks @100yenadmin.
 - Memory-core/dreaming: add a supported `dreaming.model` knob for Dream Diary narrative subagents, wired through phase config and the existing plugin subagent model-override trust gate. Refs #65963. Thanks @esqandil and @mjamiv.
 - Memory-core/dreaming: treat request-scoped narrative fallback as expected, skip session cleanup when no subagent run was created, and remove duplicate phase-level cleanup so fallback no longer emits warning noise. Fixes #67152. Thanks @jsompis.
--- a/docs/gateway/troubleshooting.md
+++ b/docs/gateway/troubleshooting.md
@@ -119,11 +119,13 @@ Look for:

 - direct tiny calls succeed, but OpenClaw runs fail only on larger prompts
 - backend errors about `messages[].content` expecting a string
+- intermittent `incomplete turn detected ... stopReason=stop payloads=0` warnings with an OpenAI-compatible local backend
 - backend crashes that appear only with larger prompt-token counts or full agent runtime prompts

 <AccordionGroup>
  <Accordion title="Common signatures">
    - `messages[...].content: invalid type: sequence, expected a string` → backend rejects structured Chat Completions content parts. Fix: set `models.providers.<provider>.models[].compat.requiresStringContent: true`.
+    - `incomplete turn detected ... stopReason=stop payloads=0` → the backend completed the Chat Completions request but returned no user-visible assistant text for that turn. OpenClaw retries replay-safe empty OpenAI-compatible turns once; persistent failures usually mean the backend is emitting empty/non-text content or suppressing final-answer text.
    - direct tiny requests succeed, but OpenClaw agent runs fail with backend/model crashes (for example Gemma on some `inferrs` builds) → OpenClaw transport is likely already correct; the backend is failing on the larger agent-runtime prompt shape.
    - failures shrink after disabling tools but do not disappear → tool schemas were part of the pressure, but the remaining issue is still upstream model/server capacity or a backend bug.
  </Accordion>
--- a/src/agents/pi-embedded-runner/run.incomplete-turn.test.ts
+++ b/src/agents/pi-embedded-runner/run.incomplete-turn.test.ts
@@ -1080,6 +1080,30 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => {
    expect(retryInstruction).toBe(EMPTY_RESPONSE_RETRY_INSTRUCTION);
  });

+  it("retries generic empty OpenAI-compatible turns from custom endpoints", () => {
+    const retryInstruction = resolveEmptyResponseRetryInstruction({
+      provider: "llama-cpp-local",
+      modelId: "qwen3.6-27b",
+      modelApi: "openai-completions",
+      payloadCount: 0,
+      aborted: false,
+      timedOut: false,
+      attempt: makeAttemptResult({
+        assistantTexts: [],
+        lastAssistant: {
+          role: "assistant",
+          stopReason: "stop",
+          provider: "llama-cpp-local",
+          model: "qwen3.6-27b",
+          content: [],
+          usage: { input: 950, output: 103, totalTokens: 1053 },
+        } as unknown as EmbeddedRunAttemptResult["lastAssistant"],
+      }),
+    });
+
+    expect(retryInstruction).toBe(EMPTY_RESPONSE_RETRY_INSTRUCTION);
+  });
+
  it("does not retry clean zero-token Ollama stop turns", () => {
    const retryInstruction = resolveEmptyResponseRetryInstruction({
      provider: "ollama",
--- a/src/agents/pi-embedded-runner/run.ts
+++ b/src/agents/pi-embedded-runner/run.ts
@@ -1980,6 +1980,7 @@ export async function runEmbeddedPiAgent(
            : resolveReasoningOnlyRetryInstruction({
                provider: activeErrorContext.provider,
                modelId: activeErrorContext.model,
+                modelApi: effectiveModel.api,
                executionContract,
                aborted,
                timedOut,
@@ -1990,6 +1991,7 @@ export async function runEmbeddedPiAgent(
            : resolveEmptyResponseRetryInstruction({
                provider: activeErrorContext.provider,
                modelId: activeErrorContext.model,
+                modelApi: effectiveModel.api,
                executionContract,
                payloadCount,
                aborted,
--- a/src/agents/pi-embedded-runner/run/incomplete-turn.ts
+++ b/src/agents/pi-embedded-runner/run/incomplete-turn.ts
@@ -506,6 +506,7 @@ export function shouldTreatEmptyAssistantReplyAsSilent(params: {
 export function resolveReasoningOnlyRetryInstruction(params: {
  provider?: string;
  modelId?: string;
+  modelApi?: string;
  executionContract?: string;
  aborted: boolean;
  timedOut: boolean;
@@ -519,6 +520,7 @@ export function resolveReasoningOnlyRetryInstruction(params: {
    !shouldApplyNonVisibleTurnRetryGuard({
      provider: params.provider,
      modelId: params.modelId,
+      modelApi: params.modelApi,
      executionContract: params.executionContract,
    })
  ) {
@@ -542,6 +544,7 @@ export function resolveReasoningOnlyRetryInstruction(params: {
 export function resolveEmptyResponseRetryInstruction(params: {
  provider?: string;
  modelId?: string;
+  modelApi?: string;
  executionContract?: string;
  payloadCount: number;
  aborted: boolean;
@@ -575,6 +578,7 @@ export function resolveEmptyResponseRetryInstruction(params: {
    shouldApplyNonVisibleTurnRetryGuard({
      provider: params.provider,
      modelId: params.modelId,
+      modelApi: params.modelApi,
      executionContract: params.executionContract,
    }) ||
    // Keep the generic zero-usage stop retry for providers that expose a
@@ -605,11 +609,15 @@ function shouldApplyPlanningOnlyRetryGuard(params: {
 function shouldApplyNonVisibleTurnRetryGuard(params: {
  provider?: string;
  modelId?: string;
+  modelApi?: string;
  executionContract?: string;
 }): boolean {
  if (shouldApplyPlanningOnlyRetryGuard(params)) {
    return true;
  }
+  if (params.modelApi === "openai-completions") {
+    return true;
+  }
  // Non-visible final turns are narrower than planning-only turns: there is no
  // user text to classify, just a replay-safe empty/thinking-only result. Ollama
  // gets this continuation guard without getting the planning-only or ack