feat(cli): add thinking override to infer model run

2026-05-12 06:10:46 +00:00 · 2026-04-28 11:32:57 -04:00
parent cfb0c34ff6
commit 223c4cf46c
4 changed files with 96 additions and 0 deletions
--- a/docs/cli/infer.md
+++ b/docs/cli/infer.md
@@ -126,6 +126,7 @@ This table maps common inference tasks to the corresponding infer command.
 - `openclaw infer ...` is the primary CLI surface for these workflows.
 - Use `--json` when the output will be consumed by another command or script.
 - Use `--provider` or `--model provider/model` when a specific backend is required.
+- Use `model run --thinking <level>` to pass a one-shot thinking/reasoning level (`off`, `minimal`, `low`, `medium`, `high`, `adaptive`, `xhigh`, or `max`) while keeping the run raw.
 - For `image describe`, `audio transcribe`, and `video describe`, `--model` must use the form `<provider/model>`.
 - For `image describe`, an explicit `--model` runs that provider/model directly. The model must be image-capable in the model catalog or provider config. `codex/<model>` runs a bounded Codex app-server image-understanding turn; `openai-codex/<model>` uses the OpenAI Codex OAuth provider path.
 - Stateless execution commands default to local.
@@ -145,6 +146,7 @@ Use `model` for provider-backed text inference and model/provider inspection.
 openclaw infer model run --prompt "Reply with exactly: smoke-ok" --json
 openclaw infer model run --prompt "Summarize this changelog entry" --model openai/gpt-5.4 --json
 openclaw infer model run --prompt "Describe this image in one sentence" --file ./photo.jpg --model google/gemini-2.5-flash --json
+openclaw infer model run --prompt "Use more reasoning here" --thinking high --json
 openclaw infer model providers --json
 openclaw infer model inspect --name gpt-5.5 --json
 ```
--- a/src/agents/simple-completion-runtime.ts
+++ b/src/agents/simple-completion-runtime.ts
@@ -1,4 +1,5 @@
 import { complete, type Api, type Model } from "@mariozechner/pi-ai";
+import type { ThinkLevel } from "../auto-reply/thinking.js";
 import type { OpenClawConfig } from "../config/types.openclaw.js";
 import { formatErrorMessage } from "../infra/errors.js";
 import { prepareProviderRuntimeAuth } from "../plugins/provider-runtime.runtime.js";
@@ -32,6 +33,7 @@ type AllowedMissingApiKeyMode = ResolvedProviderAuth["mode"];
 export type SimpleCompletionModelOptions = {
  maxTokens?: number;
  temperature?: number;
+  reasoning?: ThinkLevel;
  signal?: AbortSignal;
 };

--- a/src/cli/capability-cli.test.ts
+++ b/src/cli/capability-cli.test.ts
@@ -573,6 +573,21 @@ describe("capability cli", () => {
    );
  });

+  it("passes thinking overrides to local model probes", async () => {
+    await runRegisteredCli({
+      register: registerCapabilityCli as (program: Command) => void,
+      argv: ["capability", "model", "run", "--prompt", "hello", "--thinking", "high", "--json"],
+    });
+
+    expect(mocks.completeWithPreparedSimpleCompletionModel).toHaveBeenCalledWith(
+      expect.objectContaining({
+        options: expect.objectContaining({
+          reasoning: "high",
+        }),
+      }),
+    );
+  });
+
  it("passes image files to gateway model probes as attachments", async () => {
    const tempInput = path.join(os.tmpdir(), `openclaw-model-run-gateway-image-${Date.now()}.png`);
    await fs.writeFile(tempInput, Buffer.from(PNG_1X1_BASE64, "base64"));
@@ -924,6 +939,60 @@ describe("capability cli", () => {
    expectModelRunDispatch("local", "custom/MyModel@work");
  });

+  it("passes thinking overrides to gateway model probes", async () => {
+    await runRegisteredCli({
+      register: registerCapabilityCli as (program: Command) => void,
+      argv: [
+        "capability",
+        "model",
+        "run",
+        "--prompt",
+        "hello",
+        "--gateway",
+        "--thinking",
+        "high",
+        "--json",
+      ],
+    });
+
+    expect(mocks.callGateway).toHaveBeenCalledWith(
+      expect.objectContaining({
+        method: "agent",
+        params: expect.objectContaining({
+          thinking: "high",
+          modelRun: true,
+          promptMode: "none",
+        }),
+      }),
+    );
+  });
+
+  it("rejects invalid model run thinking overrides before dispatch", async () => {
+    await expect(
+      runRegisteredCli({
+        register: registerCapabilityCli as (program: Command) => void,
+        argv: [
+          "capability",
+          "model",
+          "run",
+          "--prompt",
+          "hello",
+          "--thinking",
+          "turbo-lobster",
+          "--json",
+        ],
+      }),
+    ).rejects.toThrow("exit 1");
+
+    expect(mocks.runtime.error).toHaveBeenCalledWith(
+      expect.stringContaining("Invalid thinking level."),
+    );
+    expect(mocks.prepareSimpleCompletionModelForAgent).not.toHaveBeenCalled();
+    expect(mocks.completeWithPreparedSimpleCompletionModel).not.toHaveBeenCalled();
+    expect(mocks.callGateway).not.toHaveBeenCalled();
+    expect(mocks.runtime.writeJson).not.toHaveBeenCalled();
+  });
+
  it("rejects empty model run prompts before gateway dispatch", async () => {
    await expect(
      runRegisteredCli({
--- a/src/cli/capability-cli.ts
+++ b/src/cli/capability-cli.ts
@@ -18,6 +18,7 @@ import {
  completeWithPreparedSimpleCompletionModel,
  prepareSimpleCompletionModelForAgent,
 } from "../agents/simple-completion-runtime.js";
+import { normalizeThinkLevel, type ThinkLevel } from "../auto-reply/thinking.js";
 import { getRuntimeConfig } from "../config/config.js";
 import { resolveAgentModelPrimaryValue } from "../config/model-input.js";
 import type { OpenClawConfig } from "../config/types.openclaw.js";
@@ -650,10 +651,27 @@ async function readModelRunImageFiles(files: string[] | undefined): Promise<Mode
  );
 }

+function normalizeModelRunThinking(value: unknown): ThinkLevel | undefined {
+  if (value === undefined) {
+    return undefined;
+  }
+  if (typeof value !== "string") {
+    throw new Error("--thinking must be a string.");
+  }
+  const normalized = normalizeThinkLevel(value);
+  if (!normalized) {
+    throw new Error(
+      "Invalid thinking level. Use one of: off, minimal, low, medium, high, adaptive, xhigh, max.",
+    );
+  }
+  return normalized;
+}
+
 async function runModelRun(params: {
  prompt: string;
  files?: string[];
  model?: string;
+  thinking?: ThinkLevel;
  transport: CapabilityTransport;
 }) {
  const cfg = getRuntimeConfig();
@@ -715,6 +733,7 @@ async function runModelRun(params: {
          typeof prepared.model.maxTokens === "number" && Number.isFinite(prepared.model.maxTokens)
            ? prepared.model.maxTokens
            : undefined,
+        ...(params.thinking ? { reasoning: params.thinking } : {}),
      },
    });
    const text = collectModelRunText(result.content);
@@ -783,6 +802,7 @@ async function runModelRun(params: {
          : undefined,
      provider,
      model,
+      ...(params.thinking ? { thinking: params.thinking } : {}),
      modelRun: true,
      promptMode: "none",
      cleanupBundleMcpOnRunEnd: true,
@@ -1651,12 +1671,14 @@ export function registerCapabilityCli(program: Command) {
    .requiredOption("--prompt <text>", "Prompt text")
    .option("--file <path>", "Image file", collectOption, [])
    .option("--model <provider/model>", "Model override")
+    .option("--thinking <level>", "Thinking level override")
    .option("--local", "Force local execution", false)
    .option("--gateway", "Force gateway execution", false)
    .option("--json", "Output JSON", false)
    .action(async (opts) => {
      await runCommandWithRuntime(defaultRuntime, async () => {
        const prompt = requireModelRunPrompt(opts.prompt);
+        const thinking = normalizeModelRunThinking(opts.thinking);
        const transport = resolveTransport({
          local: Boolean(opts.local),
          gateway: Boolean(opts.gateway),
@@ -1667,6 +1689,7 @@ export function registerCapabilityCli(program: Command) {
          prompt,
          files: opts.file as string[] | undefined,
          model: opts.model as string | undefined,
+          thinking,
          transport,
        });
        emitJsonOrText(defaultRuntime, Boolean(opts.json), result, formatEnvelopeForText);