From 223c4cf46c02ff745693307e376b19284b528e8d Mon Sep 17 00:00:00 2001
From: VACInc <3279061+VACInc@users.noreply.github.com>
Date: Tue, 28 Apr 2026 11:32:57 -0400
Subject: [PATCH] feat(cli): add thinking override to infer model run

---
 docs/cli/infer.md                       |  2 +
 src/agents/simple-completion-runtime.ts |  2 +
 src/cli/capability-cli.test.ts          | 69 +++++++++++++++++++++++++
 src/cli/capability-cli.ts               | 23 +++++++++
 4 files changed, 96 insertions(+)
diff --git a/docs/cli/infer.md b/docs/cli/infer.md
index 9e506396669..ac11066c296 100644
--- a/docs/cli/infer.md
+++ b/docs/cli/infer.md
@@ -126,6 +126,7 @@ This table maps common inference tasks to the corresponding infer command.
 - `openclaw infer ...` is the primary CLI surface for these workflows.
 - Use `--json` when the output will be consumed by another command or script.
 - Use `--provider` or `--model provider/model` when a specific backend is required.
+- Use `model run --thinking <level>` to pass a one-shot thinking/reasoning level (`off`, `minimal`, `low`, `medium`, `high`, `adaptive`, `xhigh`, or `max`) while keeping the run raw.
 - For `image describe`, `audio transcribe`, and `video describe`, `--model` must use the form `<provider/model>`.
 - For `image describe`, an explicit `--model` runs that provider/model directly. The model must be image-capable in the model catalog or provider config. `codex/<model>` runs a bounded Codex app-server image-understanding turn; `openai-codex/<model>` uses the OpenAI Codex OAuth provider path.
 - Stateless execution commands default to local.
@@ -145,6 +146,7 @@ Use `model` for provider-backed text inference and model/provider inspection.
 openclaw infer model run --prompt "Reply with exactly: smoke-ok" --json
 openclaw infer model run --prompt "Summarize this changelog entry" --model openai/gpt-5.4 --json
 openclaw infer model run --prompt "Describe this image in one sentence" --file ./photo.jpg --model google/gemini-2.5-flash --json
+openclaw infer model run --prompt "Use more reasoning here" --thinking high --json
 openclaw infer model providers --json
 openclaw infer model inspect --name gpt-5.5 --json
 ```
diff --git a/src/agents/simple-completion-runtime.ts b/src/agents/simple-completion-runtime.ts
index 4c1d2906bd7..5c1c6fde029 100644
--- a/src/agents/simple-completion-runtime.ts
+++ b/src/agents/simple-completion-runtime.ts
@@ -1,4 +1,5 @@
 import { complete, type Api, type Model } from "@mariozechner/pi-ai";
+import type { ThinkLevel } from "../auto-reply/thinking.js";
 import type { OpenClawConfig } from "../config/types.openclaw.js";
 import { formatErrorMessage } from "../infra/errors.js";
 import { prepareProviderRuntimeAuth } from "../plugins/provider-runtime.runtime.js";
@@ -32,6 +33,7 @@ type AllowedMissingApiKeyMode = ResolvedProviderAuth["mode"];
 export type SimpleCompletionModelOptions = {
   maxTokens?: number;
   temperature?: number;
+  reasoning?: ThinkLevel;
   signal?: AbortSignal;
 };
 
diff --git a/src/cli/capability-cli.test.ts b/src/cli/capability-cli.test.ts
index edb2b81c94f..f91556dd81d 100644
--- a/src/cli/capability-cli.test.ts
+++ b/src/cli/capability-cli.test.ts
@@ -573,6 +573,21 @@ describe("capability cli", () => {
     );
   });
 
+  it("passes thinking overrides to local model probes", async () => {
+    await runRegisteredCli({
+      register: registerCapabilityCli as (program: Command) => void,
+      argv: ["capability", "model", "run", "--prompt", "hello", "--thinking", "high", "--json"],
+    });
+
+    expect(mocks.completeWithPreparedSimpleCompletionModel).toHaveBeenCalledWith(
+      expect.objectContaining({
+        options: expect.objectContaining({
+          reasoning: "high",
+        }),
+      }),
+    );
+  });
+
   it("passes image files to gateway model probes as attachments", async () => {
     const tempInput = path.join(os.tmpdir(), `openclaw-model-run-gateway-image-${Date.now()}.png`);
     await fs.writeFile(tempInput, Buffer.from(PNG_1X1_BASE64, "base64"));
@@ -924,6 +939,60 @@ describe("capability cli", () => {
     expectModelRunDispatch("local", "custom/MyModel@work");
   });
 
+  it("passes thinking overrides to gateway model probes", async () => {
+    await runRegisteredCli({
+      register: registerCapabilityCli as (program: Command) => void,
+      argv: [
+        "capability",
+        "model",
+        "run",
+        "--prompt",
+        "hello",
+        "--gateway",
+        "--thinking",
+        "high",
+        "--json",
+      ],
+    });
+
+    expect(mocks.callGateway).toHaveBeenCalledWith(
+      expect.objectContaining({
+        method: "agent",
+        params: expect.objectContaining({
+          thinking: "high",
+          modelRun: true,
+          promptMode: "none",
+        }),
+      }),
+    );
+  });
+
+  it("rejects invalid model run thinking overrides before dispatch", async () => {
+    await expect(
+      runRegisteredCli({
+        register: registerCapabilityCli as (program: Command) => void,
+        argv: [
+          "capability",
+          "model",
+          "run",
+          "--prompt",
+          "hello",
+          "--thinking",
+          "turbo-lobster",
+          "--json",
+        ],
+      }),
+    ).rejects.toThrow("exit 1");
+
+    expect(mocks.runtime.error).toHaveBeenCalledWith(
+      expect.stringContaining("Invalid thinking level."),
+    );
+    expect(mocks.prepareSimpleCompletionModelForAgent).not.toHaveBeenCalled();
+    expect(mocks.completeWithPreparedSimpleCompletionModel).not.toHaveBeenCalled();
+    expect(mocks.callGateway).not.toHaveBeenCalled();
+    expect(mocks.runtime.writeJson).not.toHaveBeenCalled();
+  });
+
   it("rejects empty model run prompts before gateway dispatch", async () => {
     await expect(
       runRegisteredCli({
diff --git a/src/cli/capability-cli.ts b/src/cli/capability-cli.ts
index ee84dd17c3a..0b6f2eceb7d 100644
--- a/src/cli/capability-cli.ts
+++ b/src/cli/capability-cli.ts
@@ -18,6 +18,7 @@ import {
   completeWithPreparedSimpleCompletionModel,
   prepareSimpleCompletionModelForAgent,
 } from "../agents/simple-completion-runtime.js";
+import { normalizeThinkLevel, type ThinkLevel } from "../auto-reply/thinking.js";
 import { getRuntimeConfig } from "../config/config.js";
 import { resolveAgentModelPrimaryValue } from "../config/model-input.js";
 import type { OpenClawConfig } from "../config/types.openclaw.js";
@@ -650,10 +651,27 @@ async function readModelRunImageFiles(files: string[] | undefined): Promise<Mode
   );
 }
 
+function normalizeModelRunThinking(value: unknown): ThinkLevel | undefined {
+  if (value === undefined) {
+    return undefined;
+  }
+  if (typeof value !== "string") {
+    throw new Error("--thinking must be a string.");
+  }
+  const normalized = normalizeThinkLevel(value);
+  if (!normalized) {
+    throw new Error(
+      "Invalid thinking level. Use one of: off, minimal, low, medium, high, adaptive, xhigh, max.",
+    );
+  }
+  return normalized;
+}
+
 async function runModelRun(params: {
   prompt: string;
   files?: string[];
   model?: string;
+  thinking?: ThinkLevel;
   transport: CapabilityTransport;
 }) {
   const cfg = getRuntimeConfig();
@@ -715,6 +733,7 @@ async function runModelRun(params: {
           typeof prepared.model.maxTokens === "number" && Number.isFinite(prepared.model.maxTokens)
             ? prepared.model.maxTokens
             : undefined,
+        ...(params.thinking ? { reasoning: params.thinking } : {}),
       },
     });
     const text = collectModelRunText(result.content);
@@ -783,6 +802,7 @@ async function runModelRun(params: {
           : undefined,
       provider,
       model,
+      ...(params.thinking ? { thinking: params.thinking } : {}),
       modelRun: true,
       promptMode: "none",
       cleanupBundleMcpOnRunEnd: true,
@@ -1651,12 +1671,14 @@ export function registerCapabilityCli(program: Command) {
     .requiredOption("--prompt <text>", "Prompt text")
     .option("--file <path>", "Image file", collectOption, [])
     .option("--model <provider/model>", "Model override")
+    .option("--thinking <level>", "Thinking level override")
     .option("--local", "Force local execution", false)
     .option("--gateway", "Force gateway execution", false)
     .option("--json", "Output JSON", false)
     .action(async (opts) => {
       await runCommandWithRuntime(defaultRuntime, async () => {
         const prompt = requireModelRunPrompt(opts.prompt);
+        const thinking = normalizeModelRunThinking(opts.thinking);
         const transport = resolveTransport({
           local: Boolean(opts.local),
           gateway: Boolean(opts.gateway),
@@ -1667,6 +1689,7 @@ export function registerCapabilityCli(program: Command) {
           prompt,
           files: opts.file as string[] | undefined,
           model: opts.model as string | undefined,
+          thinking,
           transport,
         });
         emitJsonOrText(defaultRuntime, Boolean(opts.json), result, formatEnvelopeForText);