From 5acec7f79b7c0461e7c0f3ada3add5bdcd09072a Mon Sep 17 00:00:00 2001 From: Pablo Nunez Date: Tue, 3 Feb 2026 01:41:57 +0100 Subject: [PATCH] fix: wire agents.defaults.imageModel into media understanding auto-discovery MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit resolveAutoEntries only checked a hardcoded list of providers (openai, anthropic, google, minimax) when looking for an image model. agents.defaults.imageModel was never consulted by the media understanding pipeline — it was only wired into the explicit `image` tool. Add resolveImageModelFromAgentDefaults that reads the imageModel config (primary + fallbacks) and inserts it into the auto-discovery chain before the hardcoded provider list. runProviderEntry already falls back to describeImageWithModel (via pi-ai) for providers not in the media understanding registry, so no additional provider registration is needed. Co-Authored-By: Claude Sonnet 4.5 (cherry picked from commit b381029ede72a57ef6d12d9413c98fa29501b797) --- src/media-understanding/runner.ts | 44 +++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/src/media-understanding/runner.ts b/src/media-understanding/runner.ts index 51aa8f3593f..bbe23a7b000 100644 --- a/src/media-understanding/runner.ts +++ b/src/media-understanding/runner.ts @@ -391,6 +391,44 @@ async function resolveKeyEntry(params: { return null; } +function resolveImageModelFromAgentDefaults(cfg: OpenClawConfig): MediaUnderstandingModelConfig[] { + const imageModel = cfg.agents?.defaults?.imageModel as + | { primary?: string; fallbacks?: string[] } + | string + | undefined; + if (!imageModel) { + return []; + } + const refs: string[] = []; + if (typeof imageModel === "string") { + if (imageModel.trim()) { + refs.push(imageModel.trim()); + } + } else { + if (imageModel.primary?.trim()) { + refs.push(imageModel.primary.trim()); + } + for (const fb of imageModel.fallbacks ?? []) { + if (fb?.trim()) { + refs.push(fb.trim()); + } + } + } + const entries: MediaUnderstandingModelConfig[] = []; + for (const ref of refs) { + const slashIdx = ref.indexOf("/"); + if (slashIdx <= 0 || slashIdx >= ref.length - 1) { + continue; + } + entries.push({ + type: "provider", + provider: ref.slice(0, slashIdx), + model: ref.slice(slashIdx + 1), + }); + } + return entries; +} + async function resolveAutoEntries(params: { cfg: OpenClawConfig; agentDir?: string; @@ -408,6 +446,12 @@ async function resolveAutoEntries(params: { return [localAudio]; } } + if (params.capability === "image") { + const imageModelEntries = resolveImageModelFromAgentDefaults(params.cfg); + if (imageModelEntries.length > 0) { + return imageModelEntries; + } + } const gemini = await resolveGeminiCliEntry(params.capability); if (gemini) { return [gemini];