From 5ac1fee4de70f90db459cdd18339770f0c8de659 Mon Sep 17 00:00:00 2001
From: VACInc <3279061+VACInc@users.noreply.github.com>
Date: Tue, 28 Apr 2026 11:39:12 -0400
Subject: [PATCH] fix(cli): route infer thinking through simple completions

---
 docs/cli/infer.md                            |  1 +
 src/agents/simple-completion-runtime.test.ts | 81 +++++++++++++++++++-
 src/agents/simple-completion-runtime.ts      | 34 ++++++--
 src/cli/capability-cli.test.ts               |  2 +-
 4 files changed, 111 insertions(+), 7 deletions(-)

diff --git a/docs/cli/infer.md b/docs/cli/infer.md
index ac11066c296..c0adcfc57fc 100644
--- a/docs/cli/infer.md
+++ b/docs/cli/infer.md
@@ -137,6 +137,7 @@ This table maps common inference tasks to the corresponding infer command.
 - `model run --file` rejects non-image inputs. Use `infer audio transcribe` for audio files and `infer video describe` for video files.
 - `model run --gateway` exercises Gateway routing, saved auth, provider selection, and the embedded runtime, but still runs as a raw model probe: it sends the supplied prompt and any image attachments without prior session transcript, bootstrap/AGENTS context, context-engine assembly, tools, or bundled MCP servers.
 - `model run --gateway --model <provider/model>` requires a trusted operator gateway credential because the request asks the Gateway to run a one-off provider/model override.
+- Local `model run --thinking` uses the lean provider-completion path; provider-specific levels such as `adaptive` and `max` are mapped to the closest portable simple-completion level.
 
 ## Model
 
diff --git a/src/agents/simple-completion-runtime.test.ts b/src/agents/simple-completion-runtime.test.ts
index b2c0a214687..fc7c0e3c515 100644
--- a/src/agents/simple-completion-runtime.test.ts
+++ b/src/agents/simple-completion-runtime.test.ts
@@ -14,7 +14,7 @@ const hoisted = vi.hoisted(() => ({
 }));
 
 vi.mock("@mariozechner/pi-ai", () => ({
-  complete: hoisted.completeMock,
+  completeSimple: hoisted.completeMock,
 }));
 
 vi.mock("./pi-embedded-runner/model.js", () => ({
@@ -516,4 +516,83 @@ describe("completeWithPreparedSimpleCompletionModel", () => {
       },
     );
   });
+
+  it("normalizes OpenClaw-only thinking levels before using pi-ai simple completion", async () => {
+    const model = {
+      provider: "openai",
+      id: "gpt-5.4",
+      name: "gpt-5.4",
+      api: "openai-responses",
+      reasoning: true,
+      input: ["text"],
+      cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+      contextWindow: 128000,
+      maxTokens: 4096,
+    } satisfies Model<"openai-responses">;
+
+    await completeWithPreparedSimpleCompletionModel({
+      model,
+      auth: {
+        apiKey: "sk-test",
+        source: "env:OPENAI_API_KEY",
+        mode: "api-key",
+      },
+      context: {
+        messages: [{ role: "user", content: "pong", timestamp: 1 }],
+      },
+      options: {
+        reasoning: "max",
+      },
+    });
+
+    expect(hoisted.completeMock).toHaveBeenCalledWith(
+      model,
+      {
+        messages: [{ role: "user", content: "pong", timestamp: 1 }],
+      },
+      {
+        reasoning: "xhigh",
+        apiKey: "sk-test",
+      },
+    );
+  });
+
+  it("omits reasoning for local simple completion when thinking is off", async () => {
+    const model = {
+      provider: "openai",
+      id: "gpt-5.4",
+      name: "gpt-5.4",
+      api: "openai-responses",
+      reasoning: true,
+      input: ["text"],
+      cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+      contextWindow: 128000,
+      maxTokens: 4096,
+    } satisfies Model<"openai-responses">;
+
+    await completeWithPreparedSimpleCompletionModel({
+      model,
+      auth: {
+        apiKey: "sk-test",
+        source: "env:OPENAI_API_KEY",
+        mode: "api-key",
+      },
+      context: {
+        messages: [{ role: "user", content: "pong", timestamp: 1 }],
+      },
+      options: {
+        reasoning: "off",
+      },
+    });
+
+    expect(hoisted.completeMock).toHaveBeenCalledWith(
+      model,
+      {
+        messages: [{ role: "user", content: "pong", timestamp: 1 }],
+      },
+      {
+        apiKey: "sk-test",
+      },
+    );
+  });
 });
diff --git a/src/agents/simple-completion-runtime.ts b/src/agents/simple-completion-runtime.ts
index 5c1c6fde029..b877577c359 100644
--- a/src/agents/simple-completion-runtime.ts
+++ b/src/agents/simple-completion-runtime.ts
@@ -1,4 +1,9 @@
-import { complete, type Api, type Model } from "@mariozechner/pi-ai";
+import {
+  completeSimple,
+  type Api,
+  type Model,
+  type ThinkingLevel as SimpleCompletionThinkingLevel,
+} from "@mariozechner/pi-ai";
 import type { ThinkLevel } from "../auto-reply/thinking.js";
 import type { OpenClawConfig } from "../config/types.openclaw.js";
 import { formatErrorMessage } from "../infra/errors.js";
@@ -33,7 +38,7 @@ type AllowedMissingApiKeyMode = ResolvedProviderAuth["mode"];
 export type SimpleCompletionModelOptions = {
   maxTokens?: number;
   temperature?: number;
-  reasoning?: ThinkLevel;
+  reasoning?: ThinkLevel | SimpleCompletionThinkingLevel;
   signal?: AbortSignal;
 };
 
@@ -280,13 +285,32 @@ export async function prepareSimpleCompletionModelForAgent(params: {
 export async function completeWithPreparedSimpleCompletionModel(params: {
   model: Model<Api>;
   auth: ResolvedProviderAuth;
-  context: Parameters<typeof complete>[1];
+  context: Parameters<typeof completeSimple>[1];
   cfg?: OpenClawConfig;
   options?: SimpleCompletionModelOptions;
 }) {
   const completionModel = prepareModelForSimpleCompletion({ model: params.model, cfg: params.cfg });
-  return await complete(completionModel, params.context, {
-    ...params.options,
+  const { reasoning: rawReasoning, ...options } = params.options ?? {};
+  const reasoning = normalizeSimpleCompletionReasoning(rawReasoning);
+  return await completeSimple(completionModel, params.context, {
+    ...options,
+    ...(reasoning ? { reasoning } : {}),
     apiKey: params.auth.apiKey,
   });
 }
+
+function normalizeSimpleCompletionReasoning(
+  reasoning: SimpleCompletionModelOptions["reasoning"],
+): SimpleCompletionThinkingLevel | undefined {
+  switch (reasoning) {
+    case undefined:
+    case "off":
+      return undefined;
+    case "adaptive":
+      return "medium";
+    case "max":
+      return "xhigh";
+    default:
+      return reasoning;
+  }
+}
diff --git a/src/cli/capability-cli.test.ts b/src/cli/capability-cli.test.ts
index f91556dd81d..cc099861849 100644
--- a/src/cli/capability-cli.test.ts
+++ b/src/cli/capability-cli.test.ts
@@ -978,7 +978,7 @@ describe("capability cli", () => {
           "--prompt",
           "hello",
           "--thinking",
-          "turbo-lobster",
+          "turbo-mode",
           "--json",
         ],
       }),