From fa0d81ed1332c219226299d8bd2d4449ddcac909 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Mon, 27 Apr 2026 11:24:07 +0100 Subject: [PATCH] fix(agents): retry empty openai-compatible turns --- CHANGELOG.md | 1 + docs/gateway/troubleshooting.md | 2 ++ .../run.incomplete-turn.test.ts | 24 +++++++++++++++++++ src/agents/pi-embedded-runner/run.ts | 2 ++ .../pi-embedded-runner/run/incomplete-turn.ts | 8 +++++++ 5 files changed, 37 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5880db9da44..739ea4821b9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ Docs: https://docs.openclaw.ai ### Fixes - Channels/setup: treat bundled channel plugins as already bundled during `channels add` and onboarding, enabling them without writing redundant `plugins.load.paths` entries or path install records. Fixes #72740. Thanks @iCodePoet. +- Agents/OpenAI-compatible: retry replay-safe empty `stop` turns once for `openai-completions` endpoints, so transient empty local backend responses no longer surface as “Agent couldn't generate a response” when a continuation succeeds. Fixes #72751. Thanks @moooV252. - Git hooks: skip ignored staged paths when formatting and restaging pre-commit files, so merge commits no longer abort when `.gitignore` newly ignores staged merged content. Fixes #72744. Thanks @100yenadmin. - Memory-core/dreaming: add a supported `dreaming.model` knob for Dream Diary narrative subagents, wired through phase config and the existing plugin subagent model-override trust gate. Refs #65963. Thanks @esqandil and @mjamiv. - Memory-core/dreaming: treat request-scoped narrative fallback as expected, skip session cleanup when no subagent run was created, and remove duplicate phase-level cleanup so fallback no longer emits warning noise. Fixes #67152. Thanks @jsompis. diff --git a/docs/gateway/troubleshooting.md b/docs/gateway/troubleshooting.md index ef2c6d6295e..7fca62f86a6 100644 --- a/docs/gateway/troubleshooting.md +++ b/docs/gateway/troubleshooting.md @@ -119,11 +119,13 @@ Look for: - direct tiny calls succeed, but OpenClaw runs fail only on larger prompts - backend errors about `messages[].content` expecting a string +- intermittent `incomplete turn detected ... stopReason=stop payloads=0` warnings with an OpenAI-compatible local backend - backend crashes that appear only with larger prompt-token counts or full agent runtime prompts - `messages[...].content: invalid type: sequence, expected a string` → backend rejects structured Chat Completions content parts. Fix: set `models.providers..models[].compat.requiresStringContent: true`. + - `incomplete turn detected ... stopReason=stop payloads=0` → the backend completed the Chat Completions request but returned no user-visible assistant text for that turn. OpenClaw retries replay-safe empty OpenAI-compatible turns once; persistent failures usually mean the backend is emitting empty/non-text content or suppressing final-answer text. - direct tiny requests succeed, but OpenClaw agent runs fail with backend/model crashes (for example Gemma on some `inferrs` builds) → OpenClaw transport is likely already correct; the backend is failing on the larger agent-runtime prompt shape. - failures shrink after disabling tools but do not disappear → tool schemas were part of the pressure, but the remaining issue is still upstream model/server capacity or a backend bug. diff --git a/src/agents/pi-embedded-runner/run.incomplete-turn.test.ts b/src/agents/pi-embedded-runner/run.incomplete-turn.test.ts index c0f1f77589d..29c2c72fa29 100644 --- a/src/agents/pi-embedded-runner/run.incomplete-turn.test.ts +++ b/src/agents/pi-embedded-runner/run.incomplete-turn.test.ts @@ -1080,6 +1080,30 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => { expect(retryInstruction).toBe(EMPTY_RESPONSE_RETRY_INSTRUCTION); }); + it("retries generic empty OpenAI-compatible turns from custom endpoints", () => { + const retryInstruction = resolveEmptyResponseRetryInstruction({ + provider: "llama-cpp-local", + modelId: "qwen3.6-27b", + modelApi: "openai-completions", + payloadCount: 0, + aborted: false, + timedOut: false, + attempt: makeAttemptResult({ + assistantTexts: [], + lastAssistant: { + role: "assistant", + stopReason: "stop", + provider: "llama-cpp-local", + model: "qwen3.6-27b", + content: [], + usage: { input: 950, output: 103, totalTokens: 1053 }, + } as unknown as EmbeddedRunAttemptResult["lastAssistant"], + }), + }); + + expect(retryInstruction).toBe(EMPTY_RESPONSE_RETRY_INSTRUCTION); + }); + it("does not retry clean zero-token Ollama stop turns", () => { const retryInstruction = resolveEmptyResponseRetryInstruction({ provider: "ollama", diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts index a3370fa9e04..f8240826be7 100644 --- a/src/agents/pi-embedded-runner/run.ts +++ b/src/agents/pi-embedded-runner/run.ts @@ -1980,6 +1980,7 @@ export async function runEmbeddedPiAgent( : resolveReasoningOnlyRetryInstruction({ provider: activeErrorContext.provider, modelId: activeErrorContext.model, + modelApi: effectiveModel.api, executionContract, aborted, timedOut, @@ -1990,6 +1991,7 @@ export async function runEmbeddedPiAgent( : resolveEmptyResponseRetryInstruction({ provider: activeErrorContext.provider, modelId: activeErrorContext.model, + modelApi: effectiveModel.api, executionContract, payloadCount, aborted, diff --git a/src/agents/pi-embedded-runner/run/incomplete-turn.ts b/src/agents/pi-embedded-runner/run/incomplete-turn.ts index 112dfecd1c3..56f00c74851 100644 --- a/src/agents/pi-embedded-runner/run/incomplete-turn.ts +++ b/src/agents/pi-embedded-runner/run/incomplete-turn.ts @@ -506,6 +506,7 @@ export function shouldTreatEmptyAssistantReplyAsSilent(params: { export function resolveReasoningOnlyRetryInstruction(params: { provider?: string; modelId?: string; + modelApi?: string; executionContract?: string; aborted: boolean; timedOut: boolean; @@ -519,6 +520,7 @@ export function resolveReasoningOnlyRetryInstruction(params: { !shouldApplyNonVisibleTurnRetryGuard({ provider: params.provider, modelId: params.modelId, + modelApi: params.modelApi, executionContract: params.executionContract, }) ) { @@ -542,6 +544,7 @@ export function resolveReasoningOnlyRetryInstruction(params: { export function resolveEmptyResponseRetryInstruction(params: { provider?: string; modelId?: string; + modelApi?: string; executionContract?: string; payloadCount: number; aborted: boolean; @@ -575,6 +578,7 @@ export function resolveEmptyResponseRetryInstruction(params: { shouldApplyNonVisibleTurnRetryGuard({ provider: params.provider, modelId: params.modelId, + modelApi: params.modelApi, executionContract: params.executionContract, }) || // Keep the generic zero-usage stop retry for providers that expose a @@ -605,11 +609,15 @@ function shouldApplyPlanningOnlyRetryGuard(params: { function shouldApplyNonVisibleTurnRetryGuard(params: { provider?: string; modelId?: string; + modelApi?: string; executionContract?: string; }): boolean { if (shouldApplyPlanningOnlyRetryGuard(params)) { return true; } + if (params.modelApi === "openai-completions") { + return true; + } // Non-visible final turns are narrower than planning-only turns: there is no // user text to classify, just a replay-safe empty/thinking-only result. Ollama // gets this continuation guard without getting the planning-only or ack