From 554f93a999006d70be50bdbbf4cbbb8fd323efd8 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sat, 25 Apr 2026 01:27:26 +0100 Subject: [PATCH] fix(providers): keep minimax chat models text-only --- CHANGELOG.md | 1 + docs/concepts/model-providers.md | 2 +- docs/gateway/config-tools.md | 2 +- docs/providers/minimax.md | 15 +++++------ extensions/minimax/model-definitions.test.ts | 12 ++++----- extensions/minimax/model-definitions.ts | 4 +-- extensions/minimax/onboard.test.ts | 7 +++++ src/agents/tools/image-tool.test.ts | 27 ++++++++++++++++++++ 8 files changed, 50 insertions(+), 20 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b4a077b46af..23546ef7624 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ Docs: https://docs.openclaw.ai - Control UI/Codex harness: emit native Codex app-server assistant and lifecycle completion events so live webchat runs stop spinning without needing a transcript reload fallback. (#70815) Thanks @lesaai. - Agents/sessions: persist the runtime-resolved context budget from embedded agent runs, so Codex GPT-5.5 sessions keep the catalog/runtime context cap instead of falling back to the generic 200k status value. Fixes #71294. Thanks @tud0r. - Agents/tools: fail runs before model submission when explicit tool allowlists resolve to no callable tools, preventing text-only hallucinated tool results for missing tools such as plugin commands that were not registered. Fixes #71292. +- Providers/MiniMax: keep M2.7 chat model metadata text-only so image tool requests route through `MiniMax-VL-01` instead of the Anthropic-compatible chat endpoint. Fixes #71296. Thanks @ilker-cevikkaya. - Discord/replies: run `message_sending` plugin hooks for Discord reply delivery, including DM targets, so plugins can transform or cancel outbound Discord replies consistently with other channels. Fixes #59350. (#71094) Thanks @wei840222. - Control UI/commands: carry provider-owned thinking option ids/labels in session rows and defaults so fresh sessions show and accept dynamic modes such as `adaptive`, `xhigh`, and `max`. Fixes #71269. Thanks @Young-Khalil. - Image generation: make explicit `model=` overrides exact-only so failed `openai/gpt-image-2` requests no longer fall through to Gemini or other configured providers, and update `image_generate list` to mention OpenAI Codex OAuth as valid auth for `openai/gpt-image-2`. Fixes #71290 and #71231. Thanks @Young-Khalil and @steipete. diff --git a/docs/concepts/model-providers.md b/docs/concepts/model-providers.md index 584f1ad40d0..91fdda73da8 100644 --- a/docs/concepts/model-providers.md +++ b/docs/concepts/model-providers.md @@ -266,7 +266,7 @@ Quirks worth knowing: - **OpenRouter** applies its app-attribution headers and Anthropic `cache_control` markers only on verified `openrouter.ai` routes. As a proxy-style OpenAI-compatible path, it skips native-OpenAI-only shaping (`serviceTier`, Responses `store`, prompt-cache hints, OpenAI reasoning-compat). Gemini-backed refs keep proxy-Gemini thought-signature sanitation only. - **Kilo Gateway** Gemini-backed refs follow the same proxy-Gemini sanitation path; `kilocode/kilo/auto` and other proxy-reasoning-unsupported refs skip proxy reasoning injection. -- **MiniMax** API-key onboarding writes explicit M2.7 model definitions with `input: ["text", "image"]`; the bundled catalog keeps chat refs text-only until that config is materialized. +- **MiniMax** API-key onboarding writes explicit text-only M2.7 chat model definitions; image understanding stays on the plugin-owned `MiniMax-VL-01` media provider. - **xAI** uses the xAI Responses path. `/fast` or `params.fastMode: true` rewrites `grok-3`, `grok-3-mini`, `grok-4`, and `grok-4-0709` to their `*-fast` variants. `tool_stream` defaults on; disable via `agents.defaults.models["xai/"].params.tool_stream=false`. - **Cerebras** GLM models use `zai-glm-4.7` / `zai-glm-4.6`; OpenAI-compatible base URL is `https://api.cerebras.ai/v1`. diff --git a/docs/gateway/config-tools.md b/docs/gateway/config-tools.md index f027520937c..8882c363c92 100644 --- a/docs/gateway/config-tools.md +++ b/docs/gateway/config-tools.md @@ -628,7 +628,7 @@ Base URL should omit `/v1` (Anthropic client appends it). Shortcut: `openclaw on id: "MiniMax-M2.7", name: "MiniMax M2.7", reasoning: true, - input: ["text", "image"], + input: ["text"], cost: { input: 0.3, output: 1.2, cacheRead: 0.06, cacheWrite: 0.375 }, contextWindow: 204800, maxTokens: 131072, diff --git a/docs/providers/minimax.md b/docs/providers/minimax.md index fc0162b05a6..0fa206022d7 100644 --- a/docs/providers/minimax.md +++ b/docs/providers/minimax.md @@ -144,7 +144,7 @@ Choose your preferred auth method and follow the setup steps. id: "MiniMax-M2.7", name: "MiniMax M2.7", reasoning: true, - input: ["text", "image"], + input: ["text"], cost: { input: 0.3, output: 1.2, cacheRead: 0.06, cacheWrite: 0.375 }, contextWindow: 204800, maxTokens: 131072, @@ -153,7 +153,7 @@ Choose your preferred auth method and follow the setup steps. id: "MiniMax-M2.7-highspeed", name: "MiniMax M2.7 Highspeed", reasoning: true, - input: ["text", "image"], + input: ["text"], cost: { input: 0.6, output: 2.4, cacheRead: 0.06, cacheWrite: 0.375 }, contextWindow: 204800, maxTokens: 131072, @@ -237,11 +237,8 @@ the bundled `minimax-portal` auth path instead. When onboarding or API-key setup writes explicit `models.providers.minimax` entries, OpenClaw materializes `MiniMax-M2.7` and -`MiniMax-M2.7-highspeed` with `input: ["text", "image"]`. - -The built-in bundled MiniMax text catalog itself stays text-only metadata until -that explicit provider config exists. Image understanding is exposed separately -through the plugin-owned `MiniMax-VL-01` media provider. +`MiniMax-M2.7-highspeed` as text-only chat models. Image understanding is +exposed separately through the plugin-owned `MiniMax-VL-01` media provider. See [Image Generation](/tools/image-generation) for shared tool parameters, provider selection, and failover behavior. @@ -398,8 +395,8 @@ See [MiniMax Search](/tools/minimax-search) for full web search configuration an - OAuth setup: `minimax-portal/` - Default chat model: `MiniMax-M2.7` - Alternate chat model: `MiniMax-M2.7-highspeed` -- Onboarding and direct API-key setup write explicit model definitions with `input: ["text", "image"]` for both M2.7 variants -- The bundled provider catalog currently exposes the chat refs as text-only metadata until explicit MiniMax provider config exists +- Onboarding and direct API-key setup write text-only model definitions for both M2.7 variants +- Image understanding uses the plugin-owned `MiniMax-VL-01` media provider - Update pricing values in `models.json` if you need exact cost tracking - Use `openclaw models list` to confirm the current provider id, then switch with `openclaw models set minimax/MiniMax-M2.7` or `openclaw models set minimax-portal/MiniMax-M2.7` diff --git a/extensions/minimax/model-definitions.test.ts b/extensions/minimax/model-definitions.test.ts index a0e4867b72d..b6600208ea1 100644 --- a/extensions/minimax/model-definitions.test.ts +++ b/extensions/minimax/model-definitions.test.ts @@ -38,7 +38,7 @@ describe("minimax model definitions", () => { id: "MiniMax-M2.7", name: "MiniMax M2.7", reasoning: true, - input: ["text", "image"], // M2.7 supports images + input: ["text"], }); }); @@ -62,7 +62,7 @@ describe("minimax model definitions", () => { expect(model.cost).toEqual(MINIMAX_API_COST); expect(model.contextWindow).toBe(DEFAULT_MINIMAX_CONTEXT_WINDOW); expect(model.maxTokens).toBe(DEFAULT_MINIMAX_MAX_TOKENS); - expect(model.input).toEqual(["text", "image"]); + expect(model.input).toEqual(["text"]); }); it("falls back to generated name for unknown model id", () => { @@ -71,14 +71,14 @@ describe("minimax model definitions", () => { expect(model.reasoning).toBe(false); }); - it("M2.7 model includes image input", () => { + it("keeps M2.7 text-only on the Anthropic-compatible chat path", () => { const model = buildMinimaxApiModelDefinition("MiniMax-M2.7"); - expect(model.input).toEqual(["text", "image"]); + expect(model.input).toEqual(["text"]); }); - it("M2.7-highspeed model includes image input", () => { + it("keeps M2.7-highspeed text-only on the Anthropic-compatible chat path", () => { const model = buildMinimaxApiModelDefinition("MiniMax-M2.7-highspeed"); - expect(model.input).toEqual(["text", "image"]); + expect(model.input).toEqual(["text"]); expect(model.cost).toEqual(MINIMAX_API_HIGHSPEED_COST); }); diff --git a/extensions/minimax/model-definitions.ts b/extensions/minimax/model-definitions.ts index 26d31a70968..2ce490c10cb 100644 --- a/extensions/minimax/model-definitions.ts +++ b/extensions/minimax/model-definitions.ts @@ -70,13 +70,11 @@ export function buildMinimaxModelDefinition(params: { maxTokens: number; }): ModelDefinitionConfig { const catalog = MINIMAX_TEXT_MODEL_CATALOG[params.id as MinimaxCatalogId]; - // MiniMax-M2.7 supports image input - const isImageCapable = params.id === "MiniMax-M2.7" || params.id.startsWith("MiniMax-M2.7-"); return { id: params.id, name: params.name ?? catalog?.name ?? `MiniMax ${params.id}`, reasoning: params.reasoning ?? catalog?.reasoning ?? false, - input: isImageCapable ? ["text", "image"] : ["text"], + input: ["text"], cost: params.cost, contextWindow: params.contextWindow, maxTokens: params.maxTokens, diff --git a/extensions/minimax/onboard.test.ts b/extensions/minimax/onboard.test.ts index 143c0cc8092..0bdcfa5f249 100644 --- a/extensions/minimax/onboard.test.ts +++ b/extensions/minimax/onboard.test.ts @@ -21,6 +21,13 @@ describe("minimax onboard", () => { expect(cfg.models?.providers?.minimax?.models[0]?.reasoning).toBe(true); }); + it("keeps MiniMax chat models text-only so image tools use MiniMax-VL-01", () => { + const cfg = applyMinimaxApiConfig({}, "MiniMax-M2.7-highspeed"); + expect(cfg.models?.providers?.minimax?.models).toEqual([ + expect.objectContaining({ id: "MiniMax-M2.7-highspeed", input: ["text"] }), + ]); + }); + it("preserves existing model params when adding alias", () => { const cfg = applyMinimaxApiConfig( { diff --git a/src/agents/tools/image-tool.test.ts b/src/agents/tools/image-tool.test.ts index a55fdafa675..0b446ef92be 100644 --- a/src/agents/tools/image-tool.test.ts +++ b/src/agents/tools/image-tool.test.ts @@ -638,6 +638,33 @@ describe("image tool implicit imageModel config", () => { }); }); + it("does not treat configured MiniMax M2.7 chat metadata as the image model", async () => { + await withTempAgentDir(async (agentDir) => { + vi.stubEnv("MINIMAX_API_KEY", "minimax-test"); + vi.stubEnv("OPENAI_API_KEY", "openai-test"); + vi.stubEnv("ANTHROPIC_API_KEY", "anthropic-test"); + const cfg: OpenClawConfig = { + agents: { defaults: { model: { primary: "minimax/MiniMax-M2.7" } } }, + models: { + mode: "merge", + providers: { + minimax: { + baseUrl: "https://api.minimax.io/anthropic", + apiKey: "${MINIMAX_API_KEY}", + api: "anthropic-messages", + models: [makeModelDefinition("MiniMax-M2.7", ["text"])], + }, + }, + }, + }; + expect(resolveImageModelConfigForTool({ cfg, agentDir })).toEqual({ + ...createDefaultImageFallbackExpectation("minimax/MiniMax-VL-01"), + fallbacks: ["openai/gpt-5.4-mini", "anthropic/claude-opus-4-6"], + }); + expect(createImageTool({ config: cfg, agentDir })).not.toBeNull(); + }); + }); + it("pairs minimax-portal primary with MiniMax-VL-01 (and fallbacks) when auth exists", async () => { await withTempAgentDir(async (agentDir) => { await writeAuthProfiles(agentDir, {