fix(cli): route infer thinking through simple completions

2026-05-15 19:10:44 +00:00 · 2026-04-28 11:39:12 -04:00
parent 223c4cf46c
commit 5ac1fee4de
4 changed files with 111 additions and 7 deletions
--- a/docs/cli/infer.md
+++ b/docs/cli/infer.md
@@ -137,6 +137,7 @@ This table maps common inference tasks to the corresponding infer command.
 - `model run --file` rejects non-image inputs. Use `infer audio transcribe` for audio files and `infer video describe` for video files.
 - `model run --gateway` exercises Gateway routing, saved auth, provider selection, and the embedded runtime, but still runs as a raw model probe: it sends the supplied prompt and any image attachments without prior session transcript, bootstrap/AGENTS context, context-engine assembly, tools, or bundled MCP servers.
 - `model run --gateway --model <provider/model>` requires a trusted operator gateway credential because the request asks the Gateway to run a one-off provider/model override.
+- Local `model run --thinking` uses the lean provider-completion path; provider-specific levels such as `adaptive` and `max` are mapped to the closest portable simple-completion level.

 ## Model

--- a/src/agents/simple-completion-runtime.test.ts
+++ b/src/agents/simple-completion-runtime.test.ts
@@ -14,7 +14,7 @@ const hoisted = vi.hoisted(() => ({
 }));

 vi.mock("@mariozechner/pi-ai", () => ({
-  complete: hoisted.completeMock,
+  completeSimple: hoisted.completeMock,
 }));

 vi.mock("./pi-embedded-runner/model.js", () => ({
@@ -516,4 +516,83 @@ describe("completeWithPreparedSimpleCompletionModel", () => {
      },
    );
  });
+
+  it("normalizes OpenClaw-only thinking levels before using pi-ai simple completion", async () => {
+    const model = {
+      provider: "openai",
+      id: "gpt-5.4",
+      name: "gpt-5.4",
+      api: "openai-responses",
+      reasoning: true,
+      input: ["text"],
+      cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+      contextWindow: 128000,
+      maxTokens: 4096,
+    } satisfies Model<"openai-responses">;
+
+    await completeWithPreparedSimpleCompletionModel({
+      model,
+      auth: {
+        apiKey: "sk-test",
+        source: "env:OPENAI_API_KEY",
+        mode: "api-key",
+      },
+      context: {
+        messages: [{ role: "user", content: "pong", timestamp: 1 }],
+      },
+      options: {
+        reasoning: "max",
+      },
+    });
+
+    expect(hoisted.completeMock).toHaveBeenCalledWith(
+      model,
+      {
+        messages: [{ role: "user", content: "pong", timestamp: 1 }],
+      },
+      {
+        reasoning: "xhigh",
+        apiKey: "sk-test",
+      },
+    );
+  });
+
+  it("omits reasoning for local simple completion when thinking is off", async () => {
+    const model = {
+      provider: "openai",
+      id: "gpt-5.4",
+      name: "gpt-5.4",
+      api: "openai-responses",
+      reasoning: true,
+      input: ["text"],
+      cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+      contextWindow: 128000,
+      maxTokens: 4096,
+    } satisfies Model<"openai-responses">;
+
+    await completeWithPreparedSimpleCompletionModel({
+      model,
+      auth: {
+        apiKey: "sk-test",
+        source: "env:OPENAI_API_KEY",
+        mode: "api-key",
+      },
+      context: {
+        messages: [{ role: "user", content: "pong", timestamp: 1 }],
+      },
+      options: {
+        reasoning: "off",
+      },
+    });
+
+    expect(hoisted.completeMock).toHaveBeenCalledWith(
+      model,
+      {
+        messages: [{ role: "user", content: "pong", timestamp: 1 }],
+      },
+      {
+        apiKey: "sk-test",
+      },
+    );
+  });
 });
--- a/src/agents/simple-completion-runtime.ts
+++ b/src/agents/simple-completion-runtime.ts
@@ -1,4 +1,9 @@
-import { complete, type Api, type Model } from "@mariozechner/pi-ai";
+import {
+  completeSimple,
+  type Api,
+  type Model,
+  type ThinkingLevel as SimpleCompletionThinkingLevel,
+} from "@mariozechner/pi-ai";
 import type { ThinkLevel } from "../auto-reply/thinking.js";
 import type { OpenClawConfig } from "../config/types.openclaw.js";
 import { formatErrorMessage } from "../infra/errors.js";
@@ -33,7 +38,7 @@ type AllowedMissingApiKeyMode = ResolvedProviderAuth["mode"];
 export type SimpleCompletionModelOptions = {
  maxTokens?: number;
  temperature?: number;
-  reasoning?: ThinkLevel;
+  reasoning?: ThinkLevel | SimpleCompletionThinkingLevel;
  signal?: AbortSignal;
 };

@@ -280,13 +285,32 @@ export async function prepareSimpleCompletionModelForAgent(params: {
 export async function completeWithPreparedSimpleCompletionModel(params: {
  model: Model<Api>;
  auth: ResolvedProviderAuth;
-  context: Parameters<typeof complete>[1];
+  context: Parameters<typeof completeSimple>[1];
  cfg?: OpenClawConfig;
  options?: SimpleCompletionModelOptions;
 }) {
  const completionModel = prepareModelForSimpleCompletion({ model: params.model, cfg: params.cfg });
-  return await complete(completionModel, params.context, {
-    ...params.options,
+  const { reasoning: rawReasoning, ...options } = params.options ?? {};
+  const reasoning = normalizeSimpleCompletionReasoning(rawReasoning);
+  return await completeSimple(completionModel, params.context, {
+    ...options,
+    ...(reasoning ? { reasoning } : {}),
    apiKey: params.auth.apiKey,
  });
 }
+
+function normalizeSimpleCompletionReasoning(
+  reasoning: SimpleCompletionModelOptions["reasoning"],
+): SimpleCompletionThinkingLevel | undefined {
+  switch (reasoning) {
+    case undefined:
+    case "off":
+      return undefined;
+    case "adaptive":
+      return "medium";
+    case "max":
+      return "xhigh";
+    default:
+      return reasoning;
+  }
+}
--- a/src/cli/capability-cli.test.ts
+++ b/src/cli/capability-cli.test.ts
@@ -978,7 +978,7 @@ describe("capability cli", () => {
          "--prompt",
          "hello",
          "--thinking",
-          "turbo-lobster",
+          "turbo-mode",
          "--json",
        ],
      }),