From 7564af24e6a6c0b1332cdc84d775ac5e31937deb Mon Sep 17 00:00:00 2001
From: Peter Steinberger <steipete@gmail.com>
Date: Mon, 27 Apr 2026 05:46:46 +0100
Subject: [PATCH] fix(providers): preserve configured model input modalities

---
 CHANGELOG.md                           |  1 +
 src/agents/models-config.merge.test.ts | 39 +++++++++++++++++++++++++-
 src/agents/models-config.merge.ts      |  2 +-
 3 files changed, 40 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4606b77a90c..40a2e0c6cda 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -51,6 +51,7 @@ Docs: https://docs.openclaw.ai
 - Providers/Ollama: scope synthetic local auth and embedding bearer headers to declared Ollama host boundaries so cloud keys are not sent to local/self-hosted embedding endpoints and remote/cloud Ollama endpoints no longer receive the `ollama-local` marker as if it were a real token. Supersedes #69261 and #69857; refs #43945. Thanks @hyspacex, @maxramsay, and @Meli73.
 - Providers/Ollama: resolve custom-named local Ollama providers such as `ollama-remote` through the Ollama synthetic-auth hook so subagents no longer miss `ollama-local` auth and silently fall back to cloud models. Fixes #43945. Thanks @Meli73 and @maxramsay.
 - Providers/Ollama: add provider-scoped model request timeouts, thread them through guarded fetch connect/header/body/abort handling, and document `params.keep_alive` for cold local models so first-turn Ollama loads no longer require global agent timeout changes. Fixes #64541 and #68796; supersedes #65143 and #66511. Thanks @LittleJakub, @Juankcba, @uninhibite-scholar, and @yfge.
+- Providers/Ollama: preserve explicit configured model input modalities when merging discovered provider metadata so custom vision models keep image support instead of silently dropping attachments. Fixes #39690; carries forward #39785. Thanks @Skrblik and @Mriris.
 - Providers/PDF/Ollama: add bounded network timeouts for Ollama model pulls and native Anthropic/Gemini PDF analysis requests so unresponsive provider endpoints no longer hang sessions indefinitely. Fixes #54142; supersedes #54144 and #54145. Thanks @jinduwang1001-max and @arkyu2077.
 - Memory/doctor: treat Ollama memory embeddings as key-optional so `openclaw doctor` no longer warns about a missing API key when the gateway reports embeddings are ready. Fixes #46584. Thanks @fengly78.
 - Agents/Ollama: apply provider-owned replay turn normalization to native Ollama chat so Cloud models no longer reject non-alternating replay history in agent/Gateway runs. Fixes #71697. Thanks @ismael-81.
diff --git a/src/agents/models-config.merge.test.ts b/src/agents/models-config.merge.test.ts
index b76928c6d5e..fa4aabfd048 100644
--- a/src/agents/models-config.merge.test.ts
+++ b/src/agents/models-config.merge.test.ts
@@ -79,7 +79,6 @@ describe("models-config merge helpers", () => {
           {
             id: "gpt-5.4",
             name: "GPT-5.4",
-            input: ["image"],
             reasoning: false,
             cost: { input: 123, output: 456, cacheRead: 0, cacheWrite: 0 },
             contextWindow: 2_000_000,
@@ -101,6 +100,44 @@ describe("models-config merge helpers", () => {
     ]);
   });
 
+  it("preserves explicit input modality overrides when implicit metadata has the same model id", async () => {
+    const merged = mergeProviderModels(
+      {
+        api: "ollama",
+        models: [
+          {
+            id: "qwen3-vl:latest",
+            name: "Qwen3 VL",
+            input: ["text"],
+            reasoning: true,
+            contextWindow: 128_000,
+            maxTokens: 8192,
+          },
+        ],
+      } as ProviderConfig,
+      {
+        api: "ollama",
+        models: [
+          {
+            id: "qwen3-vl:latest",
+            name: "Qwen3 VL",
+            input: ["text", "image"],
+            contextWindow: 128_000,
+            maxTokens: 8192,
+          },
+        ],
+      } as ProviderConfig,
+    );
+
+    expect(merged.models?.[0]).toEqual(
+      expect.objectContaining({
+        id: "qwen3-vl:latest",
+        input: ["text", "image"],
+        reasoning: true,
+      }),
+    );
+  });
+
   it("merges explicit providers onto trimmed keys", async () => {
     const merged = mergeProviders({
       explicit: {
diff --git a/src/agents/models-config.merge.ts b/src/agents/models-config.merge.ts
index fac281b5c26..c925b2d7463 100644
--- a/src/agents/models-config.merge.ts
+++ b/src/agents/models-config.merge.ts
@@ -101,7 +101,7 @@ export function mergeProviderModels(
       {},
       explicitModel,
       {
-        input: implicitModel.input,
+        input: "input" in explicitModel ? explicitModel.input : implicitModel.input,
         reasoning: `reasoning` in explicitModel ? explicitModel.reasoning : implicitModel.reasoning,
       },
       contextWindow === undefined ? {} : { contextWindow },