mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-15 19:10:44 +00:00
fix(cli): route infer thinking through simple completions
This commit is contained in:
committed by
Peter Steinberger
parent
223c4cf46c
commit
5ac1fee4de
@@ -137,6 +137,7 @@ This table maps common inference tasks to the corresponding infer command.
|
||||
- `model run --file` rejects non-image inputs. Use `infer audio transcribe` for audio files and `infer video describe` for video files.
|
||||
- `model run --gateway` exercises Gateway routing, saved auth, provider selection, and the embedded runtime, but still runs as a raw model probe: it sends the supplied prompt and any image attachments without prior session transcript, bootstrap/AGENTS context, context-engine assembly, tools, or bundled MCP servers.
|
||||
- `model run --gateway --model <provider/model>` requires a trusted operator gateway credential because the request asks the Gateway to run a one-off provider/model override.
|
||||
- Local `model run --thinking` uses the lean provider-completion path; provider-specific levels such as `adaptive` and `max` are mapped to the closest portable simple-completion level.
|
||||
|
||||
## Model
|
||||
|
||||
|
||||
@@ -14,7 +14,7 @@ const hoisted = vi.hoisted(() => ({
|
||||
}));
|
||||
|
||||
vi.mock("@mariozechner/pi-ai", () => ({
|
||||
complete: hoisted.completeMock,
|
||||
completeSimple: hoisted.completeMock,
|
||||
}));
|
||||
|
||||
vi.mock("./pi-embedded-runner/model.js", () => ({
|
||||
@@ -516,4 +516,83 @@ describe("completeWithPreparedSimpleCompletionModel", () => {
|
||||
},
|
||||
);
|
||||
});
|
||||
|
||||
it("normalizes OpenClaw-only thinking levels before using pi-ai simple completion", async () => {
|
||||
const model = {
|
||||
provider: "openai",
|
||||
id: "gpt-5.4",
|
||||
name: "gpt-5.4",
|
||||
api: "openai-responses",
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
contextWindow: 128000,
|
||||
maxTokens: 4096,
|
||||
} satisfies Model<"openai-responses">;
|
||||
|
||||
await completeWithPreparedSimpleCompletionModel({
|
||||
model,
|
||||
auth: {
|
||||
apiKey: "sk-test",
|
||||
source: "env:OPENAI_API_KEY",
|
||||
mode: "api-key",
|
||||
},
|
||||
context: {
|
||||
messages: [{ role: "user", content: "pong", timestamp: 1 }],
|
||||
},
|
||||
options: {
|
||||
reasoning: "max",
|
||||
},
|
||||
});
|
||||
|
||||
expect(hoisted.completeMock).toHaveBeenCalledWith(
|
||||
model,
|
||||
{
|
||||
messages: [{ role: "user", content: "pong", timestamp: 1 }],
|
||||
},
|
||||
{
|
||||
reasoning: "xhigh",
|
||||
apiKey: "sk-test",
|
||||
},
|
||||
);
|
||||
});
|
||||
|
||||
it("omits reasoning for local simple completion when thinking is off", async () => {
|
||||
const model = {
|
||||
provider: "openai",
|
||||
id: "gpt-5.4",
|
||||
name: "gpt-5.4",
|
||||
api: "openai-responses",
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
contextWindow: 128000,
|
||||
maxTokens: 4096,
|
||||
} satisfies Model<"openai-responses">;
|
||||
|
||||
await completeWithPreparedSimpleCompletionModel({
|
||||
model,
|
||||
auth: {
|
||||
apiKey: "sk-test",
|
||||
source: "env:OPENAI_API_KEY",
|
||||
mode: "api-key",
|
||||
},
|
||||
context: {
|
||||
messages: [{ role: "user", content: "pong", timestamp: 1 }],
|
||||
},
|
||||
options: {
|
||||
reasoning: "off",
|
||||
},
|
||||
});
|
||||
|
||||
expect(hoisted.completeMock).toHaveBeenCalledWith(
|
||||
model,
|
||||
{
|
||||
messages: [{ role: "user", content: "pong", timestamp: 1 }],
|
||||
},
|
||||
{
|
||||
apiKey: "sk-test",
|
||||
},
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,4 +1,9 @@
|
||||
import { complete, type Api, type Model } from "@mariozechner/pi-ai";
|
||||
import {
|
||||
completeSimple,
|
||||
type Api,
|
||||
type Model,
|
||||
type ThinkingLevel as SimpleCompletionThinkingLevel,
|
||||
} from "@mariozechner/pi-ai";
|
||||
import type { ThinkLevel } from "../auto-reply/thinking.js";
|
||||
import type { OpenClawConfig } from "../config/types.openclaw.js";
|
||||
import { formatErrorMessage } from "../infra/errors.js";
|
||||
@@ -33,7 +38,7 @@ type AllowedMissingApiKeyMode = ResolvedProviderAuth["mode"];
|
||||
export type SimpleCompletionModelOptions = {
|
||||
maxTokens?: number;
|
||||
temperature?: number;
|
||||
reasoning?: ThinkLevel;
|
||||
reasoning?: ThinkLevel | SimpleCompletionThinkingLevel;
|
||||
signal?: AbortSignal;
|
||||
};
|
||||
|
||||
@@ -280,13 +285,32 @@ export async function prepareSimpleCompletionModelForAgent(params: {
|
||||
export async function completeWithPreparedSimpleCompletionModel(params: {
|
||||
model: Model<Api>;
|
||||
auth: ResolvedProviderAuth;
|
||||
context: Parameters<typeof complete>[1];
|
||||
context: Parameters<typeof completeSimple>[1];
|
||||
cfg?: OpenClawConfig;
|
||||
options?: SimpleCompletionModelOptions;
|
||||
}) {
|
||||
const completionModel = prepareModelForSimpleCompletion({ model: params.model, cfg: params.cfg });
|
||||
return await complete(completionModel, params.context, {
|
||||
...params.options,
|
||||
const { reasoning: rawReasoning, ...options } = params.options ?? {};
|
||||
const reasoning = normalizeSimpleCompletionReasoning(rawReasoning);
|
||||
return await completeSimple(completionModel, params.context, {
|
||||
...options,
|
||||
...(reasoning ? { reasoning } : {}),
|
||||
apiKey: params.auth.apiKey,
|
||||
});
|
||||
}
|
||||
|
||||
function normalizeSimpleCompletionReasoning(
|
||||
reasoning: SimpleCompletionModelOptions["reasoning"],
|
||||
): SimpleCompletionThinkingLevel | undefined {
|
||||
switch (reasoning) {
|
||||
case undefined:
|
||||
case "off":
|
||||
return undefined;
|
||||
case "adaptive":
|
||||
return "medium";
|
||||
case "max":
|
||||
return "xhigh";
|
||||
default:
|
||||
return reasoning;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -978,7 +978,7 @@ describe("capability cli", () => {
|
||||
"--prompt",
|
||||
"hello",
|
||||
"--thinking",
|
||||
"turbo-lobster",
|
||||
"turbo-mode",
|
||||
"--json",
|
||||
],
|
||||
}),
|
||||
|
||||
Reference in New Issue
Block a user