From fd9d32f02264679abb495808614991d1650e786d Mon Sep 17 00:00:00 2001
From: Peter Steinberger <steipete@gmail.com>
Date: Mon, 27 Apr 2026 11:41:31 +0100
Subject: [PATCH] fix(agents): retry empty compatible turns

---
 CHANGELOG.md                                  |  2 +-
 docs/cli/agent.md                             |  2 +
 .../run.incomplete-turn.test.ts               | 80 +++++++++++++++++++
 .../pi-embedded-runner/run/incomplete-turn.ts |  2 +-
 src/cli/program/register.agent.test.ts        | 14 ++++
 src/cli/program/register.agent.ts             |  1 +
 src/commands/agent-via-gateway.test.ts        | 15 ++++
 src/commands/agent-via-gateway.ts             |  2 +
 8 files changed, 116 insertions(+), 2 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5b775dca202..484055d8ba6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -26,7 +26,7 @@ Docs: https://docs.openclaw.ai
 - Channels/setup: treat bundled channel plugins as already bundled during `channels add` and onboarding, enabling them without writing redundant `plugins.load.paths` entries or path install records. Fixes #72740. Thanks @iCodePoet.
 - WhatsApp: honor gateway `HTTPS_PROXY` / `HTTP_PROXY` env vars for QR-login WebSocket connections, while respecting `NO_PROXY`, so proxied networks no longer fall back to direct `mmg.whatsapp.net` connections that time out with 408. Fixes #72547; supersedes #72692. Thanks @mebusw and @SymbolStar.
 - Bonjour: default mDNS advertisements to the system hostname when it is DNS-safe, avoiding `openclaw.local` probing conflicts and Gateway restart loops on hosts such as `Lobster` or `ubuntu`. Fixes #72355 and #72689; supersedes #72694. Thanks @mscheuerlein-bot, @gcusms, @moyuwuhen601, @pavan987, @zml-0912, @hhq365, and @SymbolStar.
-- Agents/OpenAI-compatible: retry replay-safe empty `stop` turns once for `openai-completions` endpoints, so transient empty local backend responses no longer surface as “Agent couldn't generate a response” when a continuation succeeds. Fixes #72751. Thanks @moooV252.
+- Agents/OpenAI-compatible: retry replay-safe empty `stop` turns once for `openai-completions` endpoints, so transient empty local backend responses no longer surface as “Agent couldn't generate a response” when a continuation succeeds, and restore `openclaw agent --model` for one-shot CLI runs. Fixes #72751. Thanks @moooV252.
 - Git hooks: skip ignored staged paths when formatting and restaging pre-commit files, so merge commits no longer abort when `.gitignore` newly ignores staged merged content. Fixes #72744. Thanks @100yenadmin.
 - Memory-core/dreaming: add a supported `dreaming.model` knob for Dream Diary narrative subagents, wired through phase config and the existing plugin subagent model-override trust gate. Refs #65963. Thanks @esqandil and @mjamiv.
 - Memory-core/dreaming: treat request-scoped narrative fallback as expected, skip session cleanup when no subagent run was created, and remove duplicate phase-level cleanup so fallback no longer emits warning noise. Fixes #67152. Thanks @jsompis.
diff --git a/docs/cli/agent.md b/docs/cli/agent.md
index e0119c082a1..f8e4bd8d5d4 100644
--- a/docs/cli/agent.md
+++ b/docs/cli/agent.md
@@ -26,6 +26,7 @@ Related:
 - `-t, --to <dest>`: recipient used to derive the session key
 - `--session-id <id>`: explicit session id
 - `--agent <id>`: agent id; overrides routing bindings
+- `--model <id>`: model override for this run (`provider/model` or model id)
 - `--thinking <level>`: agent thinking level (`off`, `minimal`, `low`, `medium`, `high`, plus provider-supported custom levels such as `xhigh`, `adaptive`, or `max`)
 - `--verbose <on|off>`: persist verbose level for the session
 - `--channel <channel>`: delivery channel; omit to use the main session channel
@@ -42,6 +43,7 @@ Related:
 ```bash
 openclaw agent --to +15555550123 --message "status update" --deliver
 openclaw agent --agent ops --message "Summarize logs"
+openclaw agent --agent ops --model openai/gpt-5.4 --message "Summarize logs"
 openclaw agent --session-id 1234 --message "Summarize inbox" --thinking medium
 openclaw agent --to +15555550123 --message "Trace logs" --verbose on --json
 openclaw agent --agent ops --message "Generate report" --deliver --reply-channel slack --reply-to "#reports"
diff --git a/src/agents/pi-embedded-runner/run.incomplete-turn.test.ts b/src/agents/pi-embedded-runner/run.incomplete-turn.test.ts
index 29c2c72fa29..677804d33bb 100644
--- a/src/agents/pi-embedded-runner/run.incomplete-turn.test.ts
+++ b/src/agents/pi-embedded-runner/run.incomplete-turn.test.ts
@@ -642,6 +642,62 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => {
     expect(mockedLog.warn).toHaveBeenCalledWith(expect.stringContaining("empty response detected"));
   });
 
+  it("retries empty openai-compatible stop turns even when the backend reports output tokens", async () => {
+    mockedClassifyFailoverReason.mockReturnValue(null);
+    mockedRunEmbeddedAttempt.mockResolvedValueOnce(
+      makeAttemptResult({
+        assistantTexts: [],
+        lastAssistant: {
+          role: "assistant",
+          api: "openai-completions",
+          stopReason: "stop",
+          provider: "llamacpp",
+          model: "qwen3.6-27b",
+          content: [],
+          usage: {
+            input: 512,
+            output: 103,
+            cacheRead: 0,
+            cacheWrite: 0,
+            totalTokens: 615,
+          },
+        } as unknown as EmbeddedRunAttemptResult["lastAssistant"],
+      }),
+    );
+    mockedRunEmbeddedAttempt.mockResolvedValueOnce(
+      makeAttemptResult({
+        assistantTexts: ["Visible local answer."],
+        lastAssistant: {
+          role: "assistant",
+          api: "openai-completions",
+          stopReason: "stop",
+          provider: "llamacpp",
+          model: "qwen3.6-27b",
+          content: [{ type: "text", text: "Visible local answer." }],
+          usage: {
+            input: 640,
+            output: 5,
+            cacheRead: 0,
+            cacheWrite: 0,
+            totalTokens: 645,
+          },
+        } as unknown as EmbeddedRunAttemptResult["lastAssistant"],
+      }),
+    );
+
+    await runEmbeddedPiAgent({
+      ...overflowBaseRunParams,
+      provider: "llamacpp",
+      model: "qwen3.6-27b",
+      runId: "run-empty-openai-compatible-stop-continuation",
+    });
+
+    expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(2);
+    const secondCall = mockedRunEmbeddedAttempt.mock.calls[1]?.[0] as { prompt?: string };
+    expect(secondCall.prompt).toContain(EMPTY_RESPONSE_RETRY_INSTRUCTION);
+    expect(mockedLog.warn).toHaveBeenCalledWith(expect.stringContaining("empty response detected"));
+  });
+
   it("surfaces an error after exhausting empty-response retries", async () => {
     mockedClassifyFailoverReason.mockReturnValue(null);
     mockedRunEmbeddedAttempt.mockResolvedValue(
@@ -1426,6 +1482,30 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => {
     expect(retryInstruction).toBeNull();
   });
 
+  it("detects empty openai-compatible stop turns with non-zero output usage", () => {
+    const retryInstruction = resolveEmptyResponseRetryInstruction({
+      provider: "llamacpp",
+      modelId: "qwen3.6-27b",
+      modelApi: "openai-completions",
+      payloadCount: 0,
+      aborted: false,
+      timedOut: false,
+      attempt: makeAttemptResult({
+        assistantTexts: [],
+        lastAssistant: {
+          role: "assistant",
+          stopReason: "stop",
+          provider: "llamacpp",
+          model: "qwen3.6-27b",
+          content: [],
+          usage: { input: 512, output: 103, totalTokens: 615 },
+        } as unknown as EmbeddedRunAttemptResult["lastAssistant"],
+      }),
+    });
+
+    expect(retryInstruction).toBe(EMPTY_RESPONSE_RETRY_INSTRUCTION);
+  });
+
   it("detects generic empty GPT turns without visible text", () => {
     const retryInstruction = resolveEmptyResponseRetryInstruction({
       provider: "openai",
diff --git a/src/agents/pi-embedded-runner/run/incomplete-turn.ts b/src/agents/pi-embedded-runner/run/incomplete-turn.ts
index 56f00c74851..5bcf46f6123 100644
--- a/src/agents/pi-embedded-runner/run/incomplete-turn.ts
+++ b/src/agents/pi-embedded-runner/run/incomplete-turn.ts
@@ -615,7 +615,7 @@ function shouldApplyNonVisibleTurnRetryGuard(params: {
   if (shouldApplyPlanningOnlyRetryGuard(params)) {
     return true;
   }
-  if (params.modelApi === "openai-completions") {
+  if (normalizeLowercaseStringOrEmpty(params.modelApi ?? "") === "openai-completions") {
     return true;
   }
   // Non-visible final turns are narrower than planning-only turns: there is no
diff --git a/src/cli/program/register.agent.test.ts b/src/cli/program/register.agent.test.ts
index f6bc70373d9..0979050ad4a 100644
--- a/src/cli/program/register.agent.test.ts
+++ b/src/cli/program/register.agent.test.ts
@@ -109,6 +109,20 @@ describe("registerAgentCommands", () => {
     );
   });
 
+  it("accepts a model override for one-shot agent runs", async () => {
+    await runCli(["agent", "--message", "hi", "--agent", "ops", "--model", "openai/gpt-5.4"]);
+
+    expect(agentCliCommandMock).toHaveBeenCalledWith(
+      expect.objectContaining({
+        message: "hi",
+        agent: "ops",
+        model: "openai/gpt-5.4",
+      }),
+      runtime,
+      { deps: true },
+    );
+  });
+
   it("runs agents add and computes hasFlags based on explicit options", async () => {
     await runCli(["agents", "add", "alpha"]);
     expect(agentsAddCommandMock).toHaveBeenNthCalledWith(
diff --git a/src/cli/program/register.agent.ts b/src/cli/program/register.agent.ts
index 9c5227351c3..6addea2479c 100644
--- a/src/cli/program/register.agent.ts
+++ b/src/cli/program/register.agent.ts
@@ -28,6 +28,7 @@ export function registerAgentCommands(program: Command, args: { agentChannelOpti
     .option("-t, --to <number>", "Recipient number in E.164 used to derive the session key")
     .option("--session-id <id>", "Use an explicit session id")
     .option("--agent <id>", "Agent id (overrides routing bindings)")
+    .option("--model <id>", "Model override for this run (provider/model or model id)")
     .option(
       "--thinking <level>",
       "Thinking level: off | minimal | low | medium | high | xhigh | adaptive | max where supported",
diff --git a/src/commands/agent-via-gateway.test.ts b/src/commands/agent-via-gateway.test.ts
index 02e99f17930..80221e59420 100644
--- a/src/commands/agent-via-gateway.test.ts
+++ b/src/commands/agent-via-gateway.test.ts
@@ -127,6 +127,21 @@ describe("agentCliCommand", () => {
     });
   });
 
+  it("passes model overrides through gateway requests", async () => {
+    await withTempStore(async () => {
+      mockGatewaySuccessReply();
+
+      await agentCliCommand({ message: "hi", to: "+1555", model: "ollama/qwen3.5:9b" }, runtime);
+
+      expect(callGateway).toHaveBeenCalledTimes(1);
+      expect(callGateway.mock.calls[0]?.[0]).toMatchObject({
+        params: {
+          model: "ollama/qwen3.5:9b",
+        },
+      });
+    });
+  });
+
   it("routes diagnostics to stderr before JSON gateway execution", async () => {
     await withTempStore(async () => {
       const response = {
diff --git a/src/commands/agent-via-gateway.ts b/src/commands/agent-via-gateway.ts
index a993e7341bd..3428b1b2b93 100644
--- a/src/commands/agent-via-gateway.ts
+++ b/src/commands/agent-via-gateway.ts
@@ -36,6 +36,7 @@ const NO_GATEWAY_TIMEOUT_MS = 2_147_000_000;
 export type AgentCliOpts = {
   message: string;
   agent?: string;
+  model?: string;
   to?: string;
   sessionId?: string;
   thinking?: string;
@@ -140,6 +141,7 @@ export async function agentViaGatewayCommand(opts: AgentCliOpts, runtime: Runtim
         params: {
           message: body,
           agentId,
+          model: opts.model,
           to: opts.to,
           replyTo: opts.replyTo,
           sessionId: opts.sessionId,