From 93d5cd10151fcf802c9e430ef68cbfa812b40fb6 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Wed, 29 Apr 2026 11:35:03 +0100 Subject: [PATCH] fix: honor configured xhigh thinking compat (#74273) * fix: honor configured xhigh thinking compat * test: update agent command model selection mock --- CHANGELOG.md | 1 + docs/gateway/config-agents.md | 1 + docs/gateway/local-models.md | 34 ++++ docs/tools/thinking.md | 2 + .../agent-command.live-model-switch.test.ts | 148 ++++++++++++++---- src/agents/agent-command.ts | 20 ++- src/agents/model-catalog.ts | 4 +- src/agents/model-catalog.types.ts | 3 + src/agents/model-selection-shared.ts | 6 + src/agents/model-selection.test.ts | 4 + src/auto-reply/commands-registry.test.ts | 34 ++++ src/auto-reply/thinking.shared.ts | 3 + src/auto-reply/thinking.test.ts | 40 +++++ src/auto-reply/thinking.ts | 20 ++- src/commands/agent-command.test-mocks.ts | 1 + src/gateway/session-utils.test.ts | 25 +++ src/gateway/sessions-patch.test.ts | 29 ++++ src/plugins/runtime/index.test.ts | 34 ++++ src/plugins/runtime/runtime-agent.ts | 21 ++- 19 files changed, 392 insertions(+), 38 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1d4b564690b..692bb00fe42 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,7 @@ Docs: https://docs.openclaw.ai - Browser/gateway: ignore Playwright dialog-close races from `Page.handleJavaScriptDialog` so browser automation no longer crashes the Gateway when a dialog disappears before Playwright accepts it. (#40067) Thanks @randyjtw. - Cron/Gateway: defer missed isolated agent-turn catch-up out of the channel startup window, so overdue cron work cannot starve Discord or Telegram while providers connect after a restart. Thanks @vincentkoc. - Heartbeat/cron: defer heartbeat turns while cron work is active or queued, add opt-in `heartbeat.skipWhenBusy` for subagent/nested lane pressure, and retry busy skips without advancing the schedule so local Ollama hosts do not run heartbeat and cron prompts concurrently. Fixes #50773. Thanks @scottgl9. +- Agents/thinking: honor configured model `compat.supportedReasoningEfforts` entries that include `xhigh`, so custom OpenAI-compatible provider refs expose and validate `/think xhigh` consistently across command menus, Gateway sessions, agent CLI, and `llm-task`. Carries forward #48904. Thanks @Milchstrassse and @wufunc. - Plugins/runtime-deps: prune stale `openclaw-unknown-*` bundled runtime dependency roots during Gateway startup while keeping recent or locked roots, so old staging debris cannot keep growing across restarts. Thanks @vincentkoc. - Ollama: compose caller abort signals with guarded-fetch timeouts for native `/api/chat` streams, so `/stop` and early cancellation still interrupt local Ollama requests that also carry provider timeout budgets. Refs #74133. Thanks @obviyus. - Doctor/TTS: migrate legacy `messages.tts.enabled`, agent TTS, channel TTS, and voice-call plugin TTS toggles to `auto` mode during `openclaw doctor --fix`, matching the documented TTS config contract. Thanks @vincentkoc. diff --git a/docs/gateway/config-agents.md b/docs/gateway/config-agents.md index fe310945c0c..f6b730f1b9a 100644 --- a/docs/gateway/config-agents.md +++ b/docs/gateway/config-agents.md @@ -375,6 +375,7 @@ Time format in system prompt. Default: `auto` (OS preference). - `params` merge precedence (config): `agents.defaults.params` (global base) is overridden by `agents.defaults.models["provider/model"].params` (per-model), then `agents.list[].params` (matching agent id) overrides by key. See [Prompt Caching](/reference/prompt-caching) for details. - `params.extra_body`/`params.extraBody`: advanced pass-through JSON merged into `api: "openai-completions"` request bodies for OpenAI-compatible proxies. If it collides with generated request keys, the extra body wins; non-native completions routes still strip OpenAI-only `store` afterward. - `params.chat_template_kwargs`: vLLM/OpenAI-compatible chat-template arguments merged into top-level `api: "openai-completions"` request bodies. For `vllm/nemotron-3-*` with thinking off, the bundled vLLM plugin automatically sends `enable_thinking: false` and `force_nonempty_content: true`; explicit `chat_template_kwargs` override generated defaults, and `extra_body.chat_template_kwargs` still has final precedence. For vLLM Qwen thinking controls, set `params.qwenThinkingFormat` to `"chat-template"` or `"top-level"` on that model entry. +- `compat.supportedReasoningEfforts`: per-model OpenAI-compatible reasoning effort list. Include `"xhigh"` for custom endpoints that truly accept it; OpenClaw then exposes `/think xhigh` in command menus, Gateway session rows, session patch validation, agent CLI validation, and `llm-task` validation for that configured provider/model. Use `compat.reasoningEffortMap` when the backend wants a provider-specific value for a canonical level. - `params.preserveThinking`: Z.AI-only opt-in for preserved thinking. When enabled and thinking is on, OpenClaw sends `thinking.clear_thinking: false` and replays prior `reasoning_content`; see [Z.AI thinking and preserved thinking](/providers/zai#thinking-and-preserved-thinking). - `agentRuntime`: default low-level agent runtime policy. Omitted id defaults to OpenClaw Pi. Use `id: "pi"` to force the built-in PI harness, `id: "auto"` to let registered plugin harnesses claim supported models, a registered harness id such as `id: "codex"`, or a supported CLI backend alias such as `id: "claude-cli"`. Set `fallback: "none"` to disable automatic PI fallback. Explicit plugin runtimes such as `codex` fail closed by default unless you set `fallback: "pi"` in the same override scope. Keep model refs canonical as `provider/model`; select Codex, Claude CLI, Gemini CLI, and other execution backends through runtime config instead of legacy runtime provider prefixes. See [Agent runtimes](/concepts/agent-runtimes) for how this differs from provider/model selection. - Config writers that mutate these fields (for example `/models set`, `/models set-image`, and fallback add/remove commands) save canonical object form and preserve existing fallback lists when possible. diff --git a/docs/gateway/local-models.md b/docs/gateway/local-models.md index 0cdba312d0c..d8f36c1b76b 100644 --- a/docs/gateway/local-models.md +++ b/docs/gateway/local-models.md @@ -245,6 +245,40 @@ Compatibility notes for stricter OpenAI-compatible backends: openclaw config set agents.defaults.models '{"local/my-local-model":{"params":{"extra_body":{"tool_choice":"required"}}}}' --strict-json --merge ``` +- If a custom OpenAI-compatible model accepts OpenAI reasoning efforts beyond + the built-in profile, declare them on the model compat block. Adding `"xhigh"` + here makes `/think xhigh`, session pickers, Gateway validation, and `llm-task` + validation expose the level for that configured provider/model ref: + + ```json5 + { + models: { + providers: { + local: { + baseUrl: "http://127.0.0.1:8000/v1", + apiKey: "sk-local", + api: "openai-responses", + models: [ + { + id: "gpt-5.4", + name: "GPT 5.4 via local proxy", + reasoning: true, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 196608, + maxTokens: 8192, + compat: { + supportedReasoningEfforts: ["low", "medium", "high", "xhigh"], + reasoningEffortMap: { xhigh: "xhigh" }, + }, + }, + ], + }, + }, + }, + } + ``` + - Some smaller or stricter local backends are unstable with OpenClaw's full agent-runtime prompt shape, especially when tool schemas are included. First verify the provider path with the lean local probe: diff --git a/docs/tools/thinking.md b/docs/tools/thinking.md index 7699bcbbb9f..254093a8a50 100644 --- a/docs/tools/thinking.md +++ b/docs/tools/thinking.md @@ -28,6 +28,7 @@ title: "Thinking levels" - Anthropic Claude Opus 4.7 also exposes `/think max`; it maps to the same provider-owned max effort path. - Ollama thinking-capable models expose `/think low|medium|high|max`; `max` maps to native `think: "high"` because Ollama's native API accepts `low`, `medium`, and `high` effort strings. - OpenAI GPT models map `/think` through model-specific Responses API effort support. `/think off` sends `reasoning.effort: "none"` only when the target model supports it; otherwise OpenClaw omits the disabled reasoning payload instead of sending an unsupported value. + - Custom OpenAI-compatible catalog entries can opt into `/think xhigh` by setting `models.providers..models[].compat.supportedReasoningEfforts` to include `"xhigh"`. This uses the same compat metadata that maps outbound OpenAI reasoning effort payloads, so menus, session validation, agent CLI, and `llm-task` agree with transport behavior. - Stale configured OpenRouter Hunter Alpha refs skip proxy reasoning injection because that retired route could return final answer text through reasoning fields. - Google Gemini maps `/think adaptive` to Gemini's provider-owned dynamic thinking. Gemini 3 requests omit a fixed `thinkingLevel`, while Gemini 2.5 requests send `thinkingBudget: -1`; fixed levels still map to the closest Gemini `thinkingLevel` or budget for that model family. - MiniMax (`minimax/*`) on the Anthropic-compatible streaming path defaults to `thinking: { type: "disabled" }` unless you explicitly set thinking in model params or request params. This avoids leaked `reasoning_content` deltas from MiniMax's non-native Anthropic stream format. @@ -126,5 +127,6 @@ Malformed local-model reasoning tags are handled conservatively. Closed ` - Provider plugins that proxy Claude models should reuse `resolveClaudeThinkingProfile(modelId)` from `openclaw/plugin-sdk/provider-model-shared` so direct Anthropic and proxy catalogs stay aligned. - Each profile level has a stored canonical `id` (`off`, `minimal`, `low`, `medium`, `high`, `xhigh`, `adaptive`, or `max`) and may include a display `label`. Binary providers use `{ id: "low", label: "on" }`. - Tool plugins that need to validate an explicit thinking override should use `api.runtime.agent.resolveThinkingPolicy({ provider, model })` plus `api.runtime.agent.normalizeThinkingLevel(...)`; they should not keep their own provider/model level lists. +- Tool plugins with access to configured custom model metadata can pass `catalog` into `resolveThinkingPolicy` so `compat.supportedReasoningEfforts` opt-ins are reflected in plugin-side validation. - Published legacy hooks (`supportsXHighThinking`, `isBinaryThinking`, and `resolveDefaultThinkingLevel`) remain as compatibility adapters, but new custom level sets should use `resolveThinkingProfile`. - Gateway rows/defaults expose `thinkingLevels`, `thinkingOptions`, and `thinkingDefault` so ACP/chat clients render the same profile ids and labels that runtime validation uses. diff --git a/src/agents/agent-command.live-model-switch.test.ts b/src/agents/agent-command.live-model-switch.test.ts index c6c89e7b9d7..38ad3238603 100644 --- a/src/agents/agent-command.live-model-switch.test.ts +++ b/src/agents/agent-command.live-model-switch.test.ts @@ -3,6 +3,18 @@ import { INTERNAL_RUNTIME_CONTEXT_BEGIN, INTERNAL_RUNTIME_CONTEXT_END } from "./ import { LiveSessionModelSwitchError } from "./live-model-switch-error.js"; const state = vi.hoisted(() => ({ + defaultRuntimeConfig: { + agents: { + defaults: { + models: { + "anthropic/claude": {}, + "openai/claude": {}, + "openai/gpt-5.4": {}, + }, + }, + }, + }, + runtimeConfigMock: undefined as unknown, acpResolveSessionMock: vi.fn((..._args: unknown[]): unknown => null), acpRunTurnMock: vi.fn((..._args: unknown[]): unknown => undefined), buildAcpResultMock: vi.fn(), @@ -22,6 +34,8 @@ const state = vi.hoisted(() => ({ trajectoryRecordEventMock: vi.fn(), trajectoryFlushMock: vi.fn(async () => undefined), clearSessionAuthProfileOverrideMock: vi.fn(), + isThinkingLevelSupportedMock: vi.fn((_args: unknown) => true), + resolveThinkingDefaultMock: vi.fn((_args: unknown) => "low"), authProfileStoreMock: { profiles: {} } as { profiles: Record }, sessionEntryMock: undefined as unknown, sessionStoreMock: undefined as unknown, @@ -109,7 +123,7 @@ vi.mock("../auto-reply/thinking.js", () => ({ formatXHighModelHint: () => "model-x", normalizeThinkLevel: (v?: string) => v || undefined, normalizeVerboseLevel: (v?: string) => v || undefined, - isThinkingLevelSupported: () => true, + isThinkingLevelSupported: (args: unknown) => state.isThinkingLevelSupportedMock(args), resolveSupportedThinkingLevel: ({ level }: { level?: string }) => level, supportsXHighThinking: () => false, })); @@ -134,39 +148,18 @@ vi.mock("../cli/deps.js", () => ({ })); vi.mock("../config/io.js", () => ({ - getRuntimeConfig: () => ({ - agents: { - defaults: { - models: { - "anthropic/claude": {}, - "openai/claude": {}, - "openai/gpt-5.4": {}, - }, - }, - }, - }), + getRuntimeConfig: () => state.runtimeConfigMock ?? state.defaultRuntimeConfig, readConfigFileSnapshotForWrite: async () => ({ snapshot: { valid: false }, }), })); vi.mock("./agent-runtime-config.js", () => { - const cfg = { - agents: { - defaults: { - models: { - "anthropic/claude": {}, - "openai/claude": {}, - "openai/gpt-5.4": {}, - }, - }, - }, - }; return { resolveAgentRuntimeConfig: async () => ({ - loadedRaw: cfg, - sourceConfig: cfg, - cfg, + loadedRaw: state.runtimeConfigMock ?? state.defaultRuntimeConfig, + sourceConfig: state.runtimeConfigMock ?? state.defaultRuntimeConfig, + cfg: state.runtimeConfigMock ?? state.defaultRuntimeConfig, }), }; }); @@ -311,12 +304,50 @@ vi.mock("./model-selection.js", () => ({ allowedCatalog: [], allowAny: false, }), + buildConfiguredModelCatalog: ({ cfg }: { cfg?: unknown }) => { + const providers = (cfg as { models?: { providers?: Record } }) + ?.models?.providers; + if (!providers) { + return []; + } + return Object.entries(providers).flatMap(([provider, entry]) => + Array.isArray(entry?.models) + ? entry.models + .filter( + (model): model is Record => !!model && typeof model === "object", + ) + .map((model) => { + const id = typeof model.id === "string" ? model.id : ""; + return { + provider, + id, + name: typeof model.name === "string" ? model.name : id, + reasoning: typeof model.reasoning === "boolean" ? model.reasoning : undefined, + compat: model.compat, + }; + }) + .filter((model) => model.id) + : [], + ); + }, modelKey: (p: string, m: string) => `${p}/${m}`, normalizeModelRef: (p: string, m: string) => ({ provider: p, model: m }), parseModelRef: (m: string, p: string) => ({ provider: p, model: m }), - resolveConfiguredModelRef: () => ({ provider: "anthropic", model: "claude" }), - resolveDefaultModelForAgent: () => ({ provider: "anthropic", model: "claude" }), - resolveThinkingDefault: () => "low", + resolveConfiguredModelRef: ({ cfg }: { cfg?: unknown }) => { + const raw = (cfg as { agents?: { defaults?: { model?: string | { primary?: string } } } }) + ?.agents?.defaults?.model; + const primary = typeof raw === "string" ? raw : raw?.primary; + const [provider, ...modelParts] = (primary ?? "anthropic/claude").split("/"); + return { provider, model: modelParts.join("/") || "claude" }; + }, + resolveDefaultModelForAgent: ({ cfg }: { cfg?: unknown }) => { + const raw = (cfg as { agents?: { defaults?: { model?: string | { primary?: string } } } }) + ?.agents?.defaults?.model; + const primary = typeof raw === "string" ? raw : raw?.primary; + const [provider, ...modelParts] = (primary ?? "anthropic/claude").split("/"); + return { provider, model: modelParts.join("/") || "claude" }; + }, + resolveThinkingDefault: (args: unknown) => state.resolveThinkingDefaultMock(args), })); vi.mock("./provider-auth-aliases.js", () => ({ @@ -446,6 +477,9 @@ describe("agentCommand – LiveSessionModelSwitchError retry", () => { state.resolveAcpAgentPolicyErrorMock.mockReturnValue(null); state.resolveAcpDispatchPolicyErrorMock.mockReturnValue(null); state.resolveAcpExplicitTurnPolicyErrorMock.mockReturnValue(null); + state.runtimeConfigMock = undefined; + state.isThinkingLevelSupportedMock.mockReturnValue(true); + state.resolveThinkingDefaultMock.mockReturnValue("low"); state.acpRunTurnMock.mockImplementation(async (params: unknown) => { const onEvent = (params as { onEvent?: (event: unknown) => void }).onEvent; onEvent?.({ type: "text_delta", stream: "output", text: "done" }); @@ -506,6 +540,62 @@ describe("agentCommand – LiveSessionModelSwitchError retry", () => { expect(lifecycleEndCalls.length).toBeGreaterThanOrEqual(1); }); + it("validates explicit thinking against configured model compat without an allowlist", async () => { + state.runtimeConfigMock = { + agents: { + defaults: { + model: { primary: "gmn/gpt-5.4" }, + }, + }, + models: { + providers: { + gmn: { + models: [ + { + id: "gpt-5.4", + name: "GPT 5.4 via GMN", + reasoning: true, + compat: { supportedReasoningEfforts: ["low", "medium", "high", "xhigh"] }, + }, + ], + }, + }, + }, + }; + state.runWithModelFallbackMock.mockImplementation(async (params: FallbackRunnerParams) => { + const result = await params.run(params.provider, params.model); + return { + result, + provider: params.provider, + model: params.model, + attempts: [], + }; + }); + state.runAgentAttemptMock.mockResolvedValue(makeSuccessResult("gmn", "gpt-5.4")); + + await agentCommand({ + message: "hello", + to: "+1234567890", + senderIsOwner: true, + thinking: "xhigh", + }); + + expect(state.isThinkingLevelSupportedMock).toHaveBeenCalledWith( + expect.objectContaining({ + provider: "gmn", + model: "gpt-5.4", + level: "xhigh", + catalog: [ + expect.objectContaining({ + provider: "gmn", + id: "gpt-5.4", + compat: { supportedReasoningEfforts: ["low", "medium", "high", "xhigh"] }, + }), + ], + }), + ); + }); + it("records fallback steps to the session trajectory runtime", async () => { state.runWithModelFallbackMock.mockImplementation(async (params: FallbackRunnerParams) => { await params.onFallbackStep?.({ diff --git a/src/agents/agent-command.ts b/src/agents/agent-command.ts index 53329eb7874..b6931954e52 100644 --- a/src/agents/agent-command.ts +++ b/src/agents/agent-command.ts @@ -54,6 +54,7 @@ import { loadModelCatalog } from "./model-catalog.js"; import { runWithModelFallback } from "./model-fallback.js"; import { buildAllowedModelSet, + buildConfiguredModelCatalog, modelKey, normalizeModelRef, parseModelRef, @@ -298,7 +299,13 @@ async function prepareAgentCommandExecution( defaultProvider: DEFAULT_PROVIDER, defaultModel: DEFAULT_MODEL, }); - const thinkingLevelsHint = formatThinkingLevels(configuredModel.provider, configuredModel.model); + const configuredThinkingCatalog = buildConfiguredModelCatalog({ cfg }); + const thinkingLevelsHint = formatThinkingLevels( + configuredModel.provider, + configuredModel.model, + ", ", + configuredThinkingCatalog.length > 0 ? configuredThinkingCatalog : undefined, + ); const thinkOverride = normalizeThinkLevel(opts.thinking); const thinkOnce = normalizeThinkLevel(opts.thinkingOnce); @@ -388,6 +395,7 @@ async function prepareAgentCommandExecution( body, transcriptBody, cfg, + configuredThinkingCatalog, normalizedSpawned, agentCfg, thinkOverride, @@ -424,6 +432,7 @@ async function agentCommandInternal( body, transcriptBody, cfg, + configuredThinkingCatalog, normalizedSpawned, agentCfg, thinkOverride, @@ -818,17 +827,18 @@ async function agentCommandInternal( } } + const catalogForThinking = + modelCatalog ?? + (allowedModelCatalog.length > 0 ? allowedModelCatalog : configuredThinkingCatalog); + const thinkingCatalog = catalogForThinking.length > 0 ? catalogForThinking : undefined; if (!resolvedThinkLevel) { - const catalogForThinking = modelCatalog ?? allowedModelCatalog; resolvedThinkLevel = resolveThinkingDefault({ cfg, provider, model, - catalog: catalogForThinking.length > 0 ? catalogForThinking : undefined, + catalog: thinkingCatalog, }); } - const catalogForThinking = modelCatalog ?? allowedModelCatalog; - const thinkingCatalog = catalogForThinking.length > 0 ? catalogForThinking : undefined; if ( !isThinkingLevelSupported({ provider, diff --git a/src/agents/model-catalog.ts b/src/agents/model-catalog.ts index b30bb4e4993..d256255f695 100644 --- a/src/agents/model-catalog.ts +++ b/src/agents/model-catalog.ts @@ -30,6 +30,7 @@ type DiscoveredModel = { contextWindow?: number; reasoning?: boolean; input?: ModelInputType[]; + compat?: ModelCatalogEntry["compat"]; }; type PiSdkModule = typeof import("./pi-model-discovery-runtime.js"); @@ -187,7 +188,8 @@ export async function loadModelCatalog(params?: { : undefined; const reasoning = typeof entry?.reasoning === "boolean" ? entry.reasoning : undefined; const input = Array.isArray(entry?.input) ? entry.input : undefined; - models.push({ id, name, provider, contextWindow, reasoning, input }); + const compat = entry?.compat && typeof entry.compat === "object" ? entry.compat : undefined; + models.push({ id, name, provider, contextWindow, reasoning, input, compat }); } const supplemental = await augmentModelCatalogWithProviderPlugins({ config: cfg, diff --git a/src/agents/model-catalog.types.ts b/src/agents/model-catalog.types.ts index 0c54d405695..aeb6aada5c5 100644 --- a/src/agents/model-catalog.types.ts +++ b/src/agents/model-catalog.types.ts @@ -1,3 +1,5 @@ +import type { ModelCompatConfig } from "../config/types.models.js"; + export type ModelInputType = "text" | "image" | "audio" | "video" | "document"; export type ModelCatalogEntry = { @@ -8,4 +10,5 @@ export type ModelCatalogEntry = { contextWindow?: number; reasoning?: boolean; input?: ModelInputType[]; + compat?: ModelCompatConfig; }; diff --git a/src/agents/model-selection-shared.ts b/src/agents/model-selection-shared.ts index 93a79a412c7..aa89107a164 100644 --- a/src/agents/model-selection-shared.ts +++ b/src/agents/model-selection-shared.ts @@ -405,6 +405,7 @@ function applyModelCatalogMetadata(params: { const nextContextWindow = configuredEntry?.contextWindow ?? params.entry.contextWindow; const nextReasoning = configuredEntry?.reasoning ?? params.entry.reasoning; const nextInput = configuredEntry?.input ?? params.entry.input; + const nextCompat = configuredEntry?.compat ?? params.entry.compat; return { ...params.entry, @@ -413,6 +414,7 @@ function applyModelCatalogMetadata(params: { ...(nextContextWindow !== undefined ? { contextWindow: nextContextWindow } : {}), ...(nextReasoning !== undefined ? { reasoning: nextReasoning } : {}), ...(nextInput ? { input: nextInput } : {}), + ...(nextCompat ? { compat: nextCompat } : {}), }; } @@ -426,6 +428,7 @@ function buildSyntheticAllowedCatalogEntry(params: { const nextContextWindow = configuredEntry?.contextWindow; const nextReasoning = configuredEntry?.reasoning; const nextInput = configuredEntry?.input; + const nextCompat = configuredEntry?.compat; return { id: params.parsed.model, @@ -435,6 +438,7 @@ function buildSyntheticAllowedCatalogEntry(params: { ...(nextContextWindow !== undefined ? { contextWindow: nextContextWindow } : {}), ...(nextReasoning !== undefined ? { reasoning: nextReasoning } : {}), ...(nextInput ? { input: nextInput } : {}), + ...(nextCompat ? { compat: nextCompat } : {}), }; } @@ -788,6 +792,7 @@ export function buildConfiguredModelCatalog(params: { cfg: OpenClawConfig }): Mo : undefined; const reasoning = typeof model?.reasoning === "boolean" ? model.reasoning : undefined; const input = Array.isArray(model?.input) ? model.input : undefined; + const compat = model?.compat && typeof model.compat === "object" ? model.compat : undefined; catalog.push({ provider: providerId, id, @@ -795,6 +800,7 @@ export function buildConfiguredModelCatalog(params: { cfg: OpenClawConfig }): Mo contextWindow, reasoning, input, + compat, }); } } diff --git a/src/agents/model-selection.test.ts b/src/agents/model-selection.test.ts index 765cb34deb4..4133b08df0f 100644 --- a/src/agents/model-selection.test.ts +++ b/src/agents/model-selection.test.ts @@ -629,6 +629,7 @@ describe("model-selection", () => { id: "gpt-test-z", name: "Configured GPT Test Z", contextWindow: 64_000, + compat: { supportedReasoningEfforts: ["low", "medium", "high", "xhigh"] }, }, ], }, @@ -650,6 +651,7 @@ describe("model-selection", () => { name: "Configured GPT Test Z", alias: "GPT Test Z Alias", contextWindow: 64_000, + compat: { supportedReasoningEfforts: ["low", "medium", "high", "xhigh"] }, }, ]); }); @@ -707,6 +709,7 @@ describe("model-selection", () => { name: "Kimi K2.5 (Configured)", contextWindow: 32_000, reasoning: true, + compat: { supportedReasoningEfforts: ["low", "medium", "high", "xhigh"] }, }, ], }, @@ -729,6 +732,7 @@ describe("model-selection", () => { alias: "Kimi K2.5 (NVIDIA)", contextWindow: 32_000, reasoning: true, + compat: { supportedReasoningEfforts: ["low", "medium", "high", "xhigh"] }, }, ]); }); diff --git a/src/auto-reply/commands-registry.test.ts b/src/auto-reply/commands-registry.test.ts index ab29d16342d..e66496f2244 100644 --- a/src/auto-reply/commands-registry.test.ts +++ b/src/auto-reply/commands-registry.test.ts @@ -576,6 +576,40 @@ describe("commands registry args", () => { ); }); + it("uses configured model compat for /think arg menus", () => { + const command = findCommandByNativeName("think"); + expect(command).toBeTruthy(); + if (!command) { + return; + } + + const menu = resolveCommandArgMenu({ + command, + args: undefined, + cfg: { + models: { + providers: { + gmn: { + models: [ + { + id: "gpt-5.4", + name: "GPT 5.4 via GMN", + reasoning: true, + compat: { supportedReasoningEfforts: ["low", "medium", "high", "xhigh"] }, + }, + ], + }, + }, + }, + } as never, + provider: "gmn", + model: "gpt-5.4", + }); + + expect(menu?.choices.map((choice) => choice.value)).toContain("xhigh"); + expect(formatCommandArgMenuTitle({ command, menu: menu! })).toContain("xhigh"); + }); + it("does not show menus when args were provided as raw text only", () => { const command = createUsageModeCommand("none", "on or off"); diff --git a/src/auto-reply/thinking.shared.ts b/src/auto-reply/thinking.shared.ts index b94c09ad1f4..932a8910073 100644 --- a/src/auto-reply/thinking.shared.ts +++ b/src/auto-reply/thinking.shared.ts @@ -26,6 +26,9 @@ export type ThinkingCatalogEntry = { provider: string; id: string; reasoning?: boolean; + compat?: { + supportedReasoningEfforts?: readonly string[] | null; + } | null; }; export const BASE_THINKING_LEVELS: ThinkLevel[] = ["off", "minimal", "low", "medium", "high"]; diff --git a/src/auto-reply/thinking.test.ts b/src/auto-reply/thinking.test.ts index ed32e6de8e8..7b486b02bcd 100644 --- a/src/auto-reply/thinking.test.ts +++ b/src/auto-reply/thinking.test.ts @@ -205,6 +205,46 @@ describe("listThinkingLevels", () => { ).toBe("max"); }); + it("uses catalog compat reasoning efforts to expose xhigh for configured custom models", () => { + const catalog = [ + { + provider: "gmn", + id: "gpt-5.4", + name: "GPT 5.4 via GMN", + reasoning: true, + compat: { supportedReasoningEfforts: ["low", "medium", "high", "xhigh"] }, + }, + ]; + + expect(listThinkingLevels("gmn", "gpt-5.4", catalog)).toContain("xhigh"); + expect(formatThinkingLevels("gmn", "gpt-5.4", ", ", catalog)).toBe( + "off, minimal, low, medium, high, xhigh", + ); + expect( + isThinkingLevelSupported({ + provider: "gmn", + model: "gpt-5.4", + level: "xhigh", + catalog, + }), + ).toBe(true); + }); + + it("does not let catalog xhigh compat override binary thinking providers", () => { + providerRuntimeMocks.resolveProviderBinaryThinking.mockReturnValue(true); + const catalog = [ + { + provider: "zai", + id: "glm-4.7", + name: "GLM 4.7", + compat: { supportedReasoningEfforts: ["xhigh"] }, + }, + ]; + + expect(listThinkingLevels("zai", "glm-4.7", catalog)).toEqual(["off", "low"]); + expect(listThinkingLevelLabels("zai", "glm-4.7", catalog)).toEqual(["off", "on"]); + }); + it("maps stale unsupported levels to the largest profile level", () => { providerRuntimeMocks.resolveProviderThinkingProfile.mockReturnValue({ levels: [{ id: "off" }, { id: "high" }], diff --git a/src/auto-reply/thinking.ts b/src/auto-reply/thinking.ts index c33f21fa64a..6bf736131b2 100644 --- a/src/auto-reply/thinking.ts +++ b/src/auto-reply/thinking.ts @@ -68,7 +68,21 @@ function resolveThinkingPolicyContext(params: { const candidate = params.catalog?.find( (entry) => normalizeProviderId(entry.provider) === normalizedProvider && entry.id === modelId, ); - return { normalizedProvider, modelId, modelKey, reasoning: candidate?.reasoning }; + return { + normalizedProvider, + modelId, + modelKey, + reasoning: candidate?.reasoning, + compat: candidate?.compat, + }; +} + +function catalogSupportsXHigh(compat: ThinkingCatalogEntry["compat"]): boolean { + const efforts = compat?.supportedReasoningEfforts; + if (!Array.isArray(efforts)) { + return false; + } + return efforts.some((effort) => normalizeThinkLevel(effort) === "xhigh"); } function normalizeProfileLevel( @@ -170,11 +184,15 @@ export function resolveThinkingProfile(params: { binaryDecision === true ? buildBinaryThinkingProfile(defaultLevel) : buildBaseThinkingProfile(defaultLevel); + if (binaryDecision !== true && catalogSupportsXHigh(context.compat)) { + appendProfileLevel(profile, "xhigh"); + } const policyContext = { provider: context.normalizedProvider, modelId: context.modelKey || context.modelId, }; if ( + binaryDecision !== true && resolveProviderXHighThinking({ provider: context.normalizedProvider, context: policyContext, diff --git a/src/commands/agent-command.test-mocks.ts b/src/commands/agent-command.test-mocks.ts index a71a2c0516b..d57d27e9fe7 100644 --- a/src/commands/agent-command.test-mocks.ts +++ b/src/commands/agent-command.test-mocks.ts @@ -130,6 +130,7 @@ vi.mock("../agents/model-selection.js", () => { allowAny: Object.keys(modelConfig).length === 0, }; }), + buildConfiguredModelCatalog: vi.fn(() => []), isCliProvider: vi.fn(() => false), modelKey, normalizeModelRef, diff --git a/src/gateway/session-utils.test.ts b/src/gateway/session-utils.test.ts index 4a30e6f3445..7408faec29d 100644 --- a/src/gateway/session-utils.test.ts +++ b/src/gateway/session-utils.test.ts @@ -200,6 +200,31 @@ describe("gateway session utils", () => { expect(row.thinkingDefault).toBe("medium"); }); + test("session defaults and rows expose xhigh from configured catalog compat", () => { + const cfg = createModelDefaultsConfig({ primary: "gmn/gpt-5.4" }); + const catalog = [ + { + provider: "gmn", + id: "gpt-5.4", + name: "GPT 5.4 via GMN", + reasoning: true, + compat: { supportedReasoningEfforts: ["low", "medium", "high", "xhigh"] }, + }, + ]; + + const defaults = getSessionDefaults(cfg, catalog); + const row = buildGatewaySessionRow({ + cfg, + storePath: "", + store: {}, + key: "main", + modelCatalog: catalog, + }); + + expect(defaults.thinkingLevels?.map((level) => level.id)).toContain("xhigh"); + expect(row.thinkingLevels?.map((level) => level.id)).toContain("xhigh"); + }); + test("session defaults use configured thinking default", () => { const defaults = getSessionDefaults({ agents: { diff --git a/src/gateway/sessions-patch.test.ts b/src/gateway/sessions-patch.test.ts index efbad9838e8..498b813f41a 100644 --- a/src/gateway/sessions-patch.test.ts +++ b/src/gateway/sessions-patch.test.ts @@ -384,6 +384,35 @@ describe("gateway sessions patch", () => { expect(entry.thinkingLevel).toBe("medium"); }); + test("accepts xhigh thinking patches from configured catalog compat", async () => { + const entry = expectPatchOk( + await runPatch({ + cfg: { + agents: { + defaults: { + model: { primary: "gmn/gpt-5.4" }, + }, + }, + } as OpenClawConfig, + patch: { + key: MAIN_SESSION_KEY, + thinkingLevel: "xhigh", + }, + loadGatewayModelCatalog: async () => [ + { + provider: "gmn", + id: "gpt-5.4", + name: "GPT 5.4 via GMN", + reasoning: true, + compat: { supportedReasoningEfforts: ["low", "medium", "high", "xhigh"] }, + }, + ], + }), + ); + + expect(entry.thinkingLevel).toBe("xhigh"); + }); + test("sets spawnedBy for ACP sessions", async () => { const entry = expectPatchOk( await runPatch({ diff --git a/src/plugins/runtime/index.test.ts b/src/plugins/runtime/index.test.ts index ae1dcaa5ce9..3fe08ae231c 100644 --- a/src/plugins/runtime/index.test.ts +++ b/src/plugins/runtime/index.test.ts @@ -1,5 +1,10 @@ import { beforeEach, describe, expect, it, vi } from "vitest"; import { DEFAULT_MODEL, DEFAULT_PROVIDER } from "../../agents/defaults.js"; +import { + resetConfigRuntimeState, + setRuntimeConfigSnapshot, + type OpenClawConfig, +} from "../../config/config.js"; import { onAgentEvent } from "../../infra/agent-events.js"; import { requestHeartbeatNow } from "../../infra/heartbeat-wake.js"; import * as execModule from "../../process/exec.js"; @@ -102,6 +107,7 @@ function expectRunCommandOutcome(params: { describe("plugin runtime command execution", () => { beforeEach(() => { vi.restoreAllMocks(); + resetConfigRuntimeState(); clearGatewaySubagentRuntime(); }); @@ -157,6 +163,34 @@ describe("plugin runtime command execution", () => { expectRuntimeValue(readValue, expected); }); + it("resolves thinking policy with configured model compat from runtime config", () => { + setRuntimeConfigSnapshot({ + models: { + providers: { + gmn: { + baseUrl: "https://gmn.example.com/v1", + models: [ + { + id: "gpt-5.4", + name: "GPT 5.4 via GMN", + reasoning: true, + compat: { supportedReasoningEfforts: ["low", "medium", "high", "xhigh"] }, + }, + ], + }, + }, + }, + } as unknown as OpenClawConfig); + + const runtime = createPluginRuntime(); + const policy = runtime.agent.resolveThinkingPolicy({ + provider: "gmn", + model: "gpt-5.4", + }); + + expect(policy.levels.map((level) => level.id)).toContain("xhigh"); + }); + it.each([ { name: "exposes runtime.mediaUnderstanding helpers and keeps stt as an alias", diff --git a/src/plugins/runtime/runtime-agent.ts b/src/plugins/runtime/runtime-agent.ts index 78a6d165d1a..9a1f1a44fc1 100644 --- a/src/plugins/runtime/runtime-agent.ts +++ b/src/plugins/runtime/runtime-agent.ts @@ -1,10 +1,14 @@ import { resolveAgentDir, resolveAgentWorkspaceDir } from "../../agents/agent-scope.js"; import { DEFAULT_MODEL, DEFAULT_PROVIDER } from "../../agents/defaults.js"; import { resolveAgentIdentity } from "../../agents/identity.js"; -import { resolveThinkingDefault } from "../../agents/model-selection.js"; +import { + buildConfiguredModelCatalog, + resolveThinkingDefault, +} from "../../agents/model-selection.js"; import { resolveAgentTimeoutMs } from "../../agents/timeout.js"; import { ensureAgentWorkspace } from "../../agents/workspace.js"; import { normalizeThinkLevel, resolveThinkingProfile } from "../../auto-reply/thinking.js"; +import { getRuntimeConfig } from "../../config/config.js"; import { resolveSessionFilePath, resolveStorePath } from "../../config/sessions/paths.js"; import { loadSessionStore, saveSessionStore } from "../../config/sessions/store.js"; import { createLazyRuntimeMethod, createLazyRuntimeModule } from "../../shared/lazy-runtime.js"; @@ -15,6 +19,16 @@ const loadEmbeddedPiRuntime = createLazyRuntimeModule( () => import("./runtime-embedded-pi.runtime.js"), ); +function resolveRuntimeThinkingCatalog( + params: Parameters[0], +) { + if (params.catalog) { + return params.catalog; + } + const configuredCatalog = buildConfiguredModelCatalog({ cfg: getRuntimeConfig() }); + return configuredCatalog.length > 0 ? configuredCatalog : undefined; +} + export function createRuntimeAgent(): PluginRuntime["agent"] { const agentRuntime = { defaults: { @@ -27,7 +41,10 @@ export function createRuntimeAgent(): PluginRuntime["agent"] { resolveThinkingDefault, normalizeThinkingLevel: normalizeThinkLevel, resolveThinkingPolicy: (params) => { - const profile = resolveThinkingProfile(params); + const profile = resolveThinkingProfile({ + ...params, + catalog: resolveRuntimeThinkingCatalog(params), + }); const policy: Omit< ReturnType, "defaultLevel"