fix: guard media image auto model resolution

This commit is contained in:
Shakker
2026-04-01 20:11:48 +01:00
committed by Peter Steinberger
parent 4b79ae7ad8
commit 50069bcb59
4 changed files with 197 additions and 10 deletions

View File

@@ -35,4 +35,10 @@ describe("DEFAULT_IMAGE_MODELS", () => {
it("includes the MiniMax portal vision default", () => {
expect(DEFAULT_IMAGE_MODELS["minimax-portal"]).toBe("MiniMax-VL-01");
});
it("includes bundled image-provider defaults beyond the core provider set", () => {
expect(DEFAULT_IMAGE_MODELS["openai-codex"]).toBe("gpt-5.4");
expect(DEFAULT_IMAGE_MODELS.moonshot).toBe("kimi-k2.5");
expect(DEFAULT_IMAGE_MODELS.openrouter).toBe("auto");
});
});

View File

@@ -52,10 +52,13 @@ export const AUTO_IMAGE_KEY_PROVIDERS = [
export const AUTO_VIDEO_KEY_PROVIDERS = ["google", "moonshot"] as const;
export const DEFAULT_IMAGE_MODELS: Record<string, string> = {
openai: "gpt-5-mini",
"openai-codex": "gpt-5.4",
anthropic: "claude-opus-4-6",
google: "gemini-3-flash-preview",
minimax: "MiniMax-VL-01",
"minimax-portal": "MiniMax-VL-01",
moonshot: "kimi-k2.5",
openrouter: "auto",
zai: "glm-4.6v",
};
export const CLI_OUTPUT_MAX_BUFFER = 5 * MB;

View File

@@ -8,6 +8,7 @@ import {
loadModelCatalog,
modelSupportsVision,
} from "../agents/model-catalog.js";
import { findNormalizedProviderValue } from "../agents/provider-id.js";
import type { MsgContext } from "../auto-reply/templating.js";
import type { OpenClawConfig } from "../config/config.js";
import {
@@ -102,6 +103,68 @@ function resolveConfiguredKeyProviderOrder(params: {
return [...new Set([...configuredProviders, ...params.fallbackProviders])];
}
function resolveConfiguredImageModelId(params: {
cfg: OpenClawConfig;
providerId: string;
}): string | undefined {
const providerCfg = findNormalizedProviderValue(
params.cfg.models?.providers,
params.providerId,
) as
| {
models?: Array<{
id?: string;
input?: string[];
}>;
}
| undefined;
const configured = providerCfg?.models?.find((entry) => {
const id = entry?.id?.trim();
return Boolean(id) && entry?.input?.includes("image");
});
const id = configured?.id?.trim();
return id || undefined;
}
function resolveCatalogImageModelId(params: {
providerId: string;
catalog: Awaited<ReturnType<typeof loadModelCatalog>>;
}): string | undefined {
const matches = params.catalog.filter(
(entry) =>
normalizeMediaProviderId(entry.provider) === params.providerId && modelSupportsVision(entry),
);
if (matches.length === 0) {
return undefined;
}
const autoEntry = matches.find((entry) => entry.id.trim().toLowerCase() === "auto");
return (autoEntry ?? matches[0])?.id.trim() || undefined;
}
async function resolveAutoImageModelId(params: {
cfg: OpenClawConfig;
providerId: string;
explicitModel?: string;
}): Promise<string | undefined> {
const explicit = params.explicitModel?.trim();
if (explicit) {
return explicit;
}
const configuredModel = resolveConfiguredImageModelId(params);
if (configuredModel) {
return configuredModel;
}
const defaultModel = DEFAULT_IMAGE_MODELS[params.providerId];
if (defaultModel) {
return defaultModel;
}
const catalog = await loadModelCatalog({ config: params.cfg });
return resolveCatalogImageModelId({
providerId: params.providerId,
catalog,
});
}
export function buildProviderRegistry(
overrides?: Record<string, MediaUnderstandingProvider>,
cfg?: OpenClawConfig,
@@ -390,7 +453,14 @@ async function resolveKeyEntry(params: {
) {
return null;
}
return { type: "provider" as const, provider: providerId, model };
const resolvedModel =
capability === "image"
? await resolveAutoImageModelId({ cfg, providerId, explicitModel: model })
: model;
if (capability === "image" && !resolvedModel) {
return null;
}
return { type: "provider" as const, provider: providerId, model: resolvedModel };
};
if (capability === "image") {
@@ -407,8 +477,7 @@ async function resolveKeyEntry(params: {
capability,
fallbackProviders: AUTO_IMAGE_KEY_PROVIDERS,
})) {
const model = DEFAULT_IMAGE_MODELS[providerId];
const entry = await checkProvider(providerId, model);
const entry = await checkProvider(providerId);
if (entry) {
return entry;
}
@@ -533,11 +602,8 @@ export async function resolveAutoImageModel(params: {
return null;
}
const provider = entry.provider;
if (!provider) {
return null;
}
const model = entry.model ?? DEFAULT_IMAGE_MODELS[provider];
if (!model) {
const model = entry.model?.trim();
if (!provider || !model) {
return null;
}
return { provider, model };
@@ -599,10 +665,21 @@ async function resolveActiveModelEntry(params: {
if (!hasAuth) {
return null;
}
const model =
params.capability === "image"
? await resolveAutoImageModelId({
cfg: params.cfg,
providerId,
explicitModel: params.activeModel?.model,
})
: params.activeModel?.model;
if (params.capability === "image" && !model) {
return null;
}
return {
type: "provider",
provider: providerId,
model: params.activeModel?.model,
model,
};
}

View File

@@ -11,8 +11,9 @@ import { loadPluginManifestRegistry } from "../plugins/manifest-registry.js";
import { createEmptyPluginRegistry } from "../plugins/registry.js";
import { setActivePluginRegistry } from "../plugins/runtime.js";
import { createMediaAttachmentCache, normalizeMediaAttachments } from "./runner.attachments.js";
import { withMediaFixture } from "./runner.test-utils.js";
const catalog = [
const baseCatalog = [
{
id: "gpt-4.1",
name: "GPT-4.1",
@@ -20,6 +21,7 @@ const catalog = [
input: ["text", "image"] as const,
},
];
let catalog = [...baseCatalog];
const loadModelCatalog = vi.hoisted(() => vi.fn(async () => catalog));
@@ -85,6 +87,7 @@ describe("runCapability image skip", () => {
});
beforeEach(() => {
catalog = [...baseCatalog];
loadModelCatalog.mockClear();
setActivePluginRegistry(createEmptyPluginRegistry());
vi.unstubAllEnvs();
@@ -150,4 +153,102 @@ describe("runCapability image skip", () => {
vi.unstubAllEnvs();
}
});
it("auto-selects configured OpenRouter image providers with a resolved model", async () => {
let seenModel: string | undefined;
await withMediaFixture(
{
filePrefix: "openclaw-image-openrouter",
extension: "png",
mediaType: "image/png",
fileContents: Buffer.from("image"),
},
async ({ ctx, media, cache }) => {
const cfg = {
models: {
providers: {
openrouter: {
apiKey: "test-openrouter-key", // pragma: allowlist secret
models: [],
},
},
},
} as unknown as OpenClawConfig;
const result = await runCapability({
capability: "image",
cfg,
ctx,
attachments: cache,
media,
agentDir: "/tmp",
providerRegistry: new Map([
[
"openrouter",
{
id: "openrouter",
capabilities: ["image"],
describeImage: async (req) => {
seenModel = req.model;
return { text: "openrouter ok", model: req.model };
},
},
],
]),
});
expect(result.decision.outcome).toBe("success");
expect(result.outputs[0]?.provider).toBe("openrouter");
expect(result.outputs[0]?.model).toBe("auto");
expect(result.outputs[0]?.text).toBe("openrouter ok");
expect(seenModel).toBe("auto");
},
);
});
it("skips configured image providers without an auto-resolvable model", async () => {
await withMediaFixture(
{
filePrefix: "openclaw-image-custom-skip",
extension: "png",
mediaType: "image/png",
fileContents: Buffer.from("image"),
},
async ({ ctx, media, cache }) => {
const cfg = {
models: {
providers: {
"custom-image": {
apiKey: "test-custom-key", // pragma: allowlist secret
models: [],
},
},
},
} as unknown as OpenClawConfig;
const result = await runCapability({
capability: "image",
cfg,
ctx,
attachments: cache,
media,
agentDir: "/tmp",
providerRegistry: new Map([
[
"custom-image",
{
id: "custom-image",
capabilities: ["image"],
describeImage: async () => ({ text: "custom ok" }),
},
],
]),
});
expect(result.outputs).toHaveLength(0);
expect(result.decision.outcome).toBe("skipped");
expect(result.decision.attachments).toEqual([{ attachmentIndex: 0, attempts: [] }]);
},
);
});
});