mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 16:40:49 +00:00
fix: honor explicit media image model routing
This commit is contained in:
@@ -17,6 +17,7 @@ Docs: https://docs.openclaw.ai
|
|||||||
|
|
||||||
### Fixes
|
### Fixes
|
||||||
|
|
||||||
|
- Media understanding: honor explicit image-model configuration before native-vision skips, including `agents.defaults.imageModel`, `tools.media.image.models`, and provider image defaults such as MiniMax VL when the active chat model is text-only. Fixes #47614, #63722, #69171.
|
||||||
- Codex/media understanding: support `codex/*` image models through bounded Codex app-server image turns, while keeping `openai-codex/*` on the OpenAI Codex OAuth route and validating app-server responses against generated protocol contracts. Fixes #70201.
|
- Codex/media understanding: support `codex/*` image models through bounded Codex app-server image turns, while keeping `openai-codex/*` on the OpenAI Codex OAuth route and validating app-server responses against generated protocol contracts. Fixes #70201.
|
||||||
- Providers/OpenAI Codex: synthesize the `openai-codex/gpt-5.5` OAuth model row when Codex catalog discovery omits it, so cron and subagent runs do not fail with `Unknown model` while the account is authenticated.
|
- Providers/OpenAI Codex: synthesize the `openai-codex/gpt-5.5` OAuth model row when Codex catalog discovery omits it, so cron and subagent runs do not fail with `Unknown model` while the account is authenticated.
|
||||||
- Models/CLI: keep `openclaw models list` read-only while still showing eligible configured-provider rows, so listing models no longer rewrites per-agent `models.json`. (#70847) Thanks @shakkernerd.
|
- Models/CLI: keep `openclaw models list` read-only while still showing eligible configured-provider rows, so listing models no longer rewrites per-agent `models.json`. (#70847) Thanks @shakkernerd.
|
||||||
|
|||||||
@@ -85,6 +85,15 @@ function resolveConfiguredImageModelId(params: {
|
|||||||
cfg: OpenClawConfig;
|
cfg: OpenClawConfig;
|
||||||
providerId: string;
|
providerId: string;
|
||||||
}): string | undefined {
|
}): string | undefined {
|
||||||
|
const configured = resolveConfiguredImageModel(params);
|
||||||
|
const id = configured?.id?.trim();
|
||||||
|
return id || undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
function resolveConfiguredImageModel(params: {
|
||||||
|
cfg: OpenClawConfig;
|
||||||
|
providerId: string;
|
||||||
|
}): { id?: string; input?: string[] } | undefined {
|
||||||
const providerCfg = findNormalizedProviderValue(
|
const providerCfg = findNormalizedProviderValue(
|
||||||
params.cfg.models?.providers,
|
params.cfg.models?.providers,
|
||||||
params.providerId,
|
params.providerId,
|
||||||
@@ -96,12 +105,10 @@ function resolveConfiguredImageModelId(params: {
|
|||||||
}>;
|
}>;
|
||||||
}
|
}
|
||||||
| undefined;
|
| undefined;
|
||||||
const configured = providerCfg?.models?.find((entry) => {
|
return providerCfg?.models?.find((entry) => {
|
||||||
const id = entry?.id?.trim();
|
const id = entry?.id?.trim();
|
||||||
return Boolean(id) && entry?.input?.includes("image");
|
return Boolean(id) && entry?.input?.includes("image");
|
||||||
});
|
});
|
||||||
const id = configured?.id?.trim();
|
|
||||||
return id || undefined;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function resolveCatalogImageModelId(params: {
|
function resolveCatalogImageModelId(params: {
|
||||||
@@ -119,6 +126,23 @@ function resolveCatalogImageModelId(params: {
|
|||||||
return normalizeOptionalString((autoEntry ?? matches[0])?.id);
|
return normalizeOptionalString((autoEntry ?? matches[0])?.id);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function explicitImageModelVisionStatus(params: {
|
||||||
|
cfg: OpenClawConfig;
|
||||||
|
providerId: string;
|
||||||
|
model: string;
|
||||||
|
}): Promise<"supported" | "unsupported" | "unknown"> {
|
||||||
|
const configured = resolveConfiguredImageModel(params);
|
||||||
|
if (configured?.id?.trim() === params.model && configured.input?.includes("image")) {
|
||||||
|
return "supported";
|
||||||
|
}
|
||||||
|
const catalog = await loadModelCatalog({ config: params.cfg });
|
||||||
|
const entry = findModelInCatalog(catalog, params.providerId, params.model);
|
||||||
|
if (!entry) {
|
||||||
|
return "unknown";
|
||||||
|
}
|
||||||
|
return modelSupportsVision(entry) ? "supported" : "unsupported";
|
||||||
|
}
|
||||||
|
|
||||||
async function resolveAutoImageModelId(params: {
|
async function resolveAutoImageModelId(params: {
|
||||||
cfg: OpenClawConfig;
|
cfg: OpenClawConfig;
|
||||||
providerId: string;
|
providerId: string;
|
||||||
@@ -126,7 +150,14 @@ async function resolveAutoImageModelId(params: {
|
|||||||
}): Promise<string | undefined> {
|
}): Promise<string | undefined> {
|
||||||
const explicit = normalizeOptionalString(params.explicitModel);
|
const explicit = normalizeOptionalString(params.explicitModel);
|
||||||
if (explicit) {
|
if (explicit) {
|
||||||
return explicit;
|
const explicitStatus = await explicitImageModelVisionStatus({
|
||||||
|
cfg: params.cfg,
|
||||||
|
providerId: params.providerId,
|
||||||
|
model: explicit,
|
||||||
|
});
|
||||||
|
if (explicitStatus !== "unsupported") {
|
||||||
|
return explicit;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
const configuredModel = resolveConfiguredImageModelId(params);
|
const configuredModel = resolveConfiguredImageModelId(params);
|
||||||
if (configuredModel) {
|
if (configuredModel) {
|
||||||
@@ -498,6 +529,16 @@ function resolveImageModelFromAgentDefaults(cfg: OpenClawConfig): MediaUnderstan
|
|||||||
return entries;
|
return entries;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function hasExplicitImageUnderstandingConfig(params: {
|
||||||
|
cfg: OpenClawConfig;
|
||||||
|
config?: MediaUnderstandingConfig;
|
||||||
|
}): boolean {
|
||||||
|
return (
|
||||||
|
(params.config?.models?.length ?? 0) > 0 ||
|
||||||
|
resolveImageModelFromAgentDefaults(params.cfg).length > 0
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
async function resolveAutoEntries(params: {
|
async function resolveAutoEntries(params: {
|
||||||
cfg: OpenClawConfig;
|
cfg: OpenClawConfig;
|
||||||
agentDir?: string;
|
agentDir?: string;
|
||||||
@@ -505,6 +546,12 @@ async function resolveAutoEntries(params: {
|
|||||||
capability: MediaUnderstandingCapability;
|
capability: MediaUnderstandingCapability;
|
||||||
activeModel?: ActiveMediaModel;
|
activeModel?: ActiveMediaModel;
|
||||||
}): Promise<MediaUnderstandingModelConfig[]> {
|
}): Promise<MediaUnderstandingModelConfig[]> {
|
||||||
|
if (params.capability === "image") {
|
||||||
|
const imageModelEntries = resolveImageModelFromAgentDefaults(params.cfg);
|
||||||
|
if (imageModelEntries.length > 0) {
|
||||||
|
return imageModelEntries;
|
||||||
|
}
|
||||||
|
}
|
||||||
const activeEntry = await resolveActiveModelEntry(params);
|
const activeEntry = await resolveActiveModelEntry(params);
|
||||||
if (activeEntry) {
|
if (activeEntry) {
|
||||||
return [activeEntry];
|
return [activeEntry];
|
||||||
@@ -519,12 +566,6 @@ async function resolveAutoEntries(params: {
|
|||||||
return [localAudio];
|
return [localAudio];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (params.capability === "image") {
|
|
||||||
const imageModelEntries = resolveImageModelFromAgentDefaults(params.cfg);
|
|
||||||
if (imageModelEntries.length > 0) {
|
|
||||||
return imageModelEntries;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
const gemini = await resolveGeminiCliEntry(params.capability);
|
const gemini = await resolveGeminiCliEntry(params.capability);
|
||||||
if (gemini) {
|
if (gemini) {
|
||||||
return [gemini];
|
return [gemini];
|
||||||
@@ -553,6 +594,12 @@ export async function resolveAutoImageModel(params: {
|
|||||||
}
|
}
|
||||||
return { provider, model };
|
return { provider, model };
|
||||||
};
|
};
|
||||||
|
const configuredImageModel = resolveImageModelFromAgentDefaults(params.cfg)
|
||||||
|
.map((entry) => toActive(entry))
|
||||||
|
.find((entry): entry is ActiveMediaModel => entry !== null);
|
||||||
|
if (configuredImageModel) {
|
||||||
|
return configuredImageModel;
|
||||||
|
}
|
||||||
const activeEntry = await resolveActiveModelEntry({
|
const activeEntry = await resolveActiveModelEntry({
|
||||||
cfg: params.cfg,
|
cfg: params.cfg,
|
||||||
agentDir: params.agentDir,
|
agentDir: params.agentDir,
|
||||||
@@ -772,7 +819,11 @@ export async function runCapability(params: {
|
|||||||
// Skip image understanding when the primary model supports vision natively.
|
// Skip image understanding when the primary model supports vision natively.
|
||||||
// The image will be injected directly into the model context instead.
|
// The image will be injected directly into the model context instead.
|
||||||
const activeProvider = params.activeModel?.provider?.trim();
|
const activeProvider = params.activeModel?.provider?.trim();
|
||||||
if (capability === "image" && activeProvider) {
|
if (
|
||||||
|
capability === "image" &&
|
||||||
|
activeProvider &&
|
||||||
|
!hasExplicitImageUnderstandingConfig({ cfg, config })
|
||||||
|
) {
|
||||||
const catalog = await loadModelCatalog({ config: cfg });
|
const catalog = await loadModelCatalog({ config: cfg });
|
||||||
const entry = findModelInCatalog(catalog, activeProvider, params.activeModel?.model ?? "");
|
const entry = findModelInCatalog(catalog, activeProvider, params.activeModel?.model ?? "");
|
||||||
if (modelSupportsVision(entry)) {
|
if (modelSupportsVision(entry)) {
|
||||||
|
|||||||
@@ -13,7 +13,14 @@ import { setActivePluginRegistry } from "../plugins/runtime.js";
|
|||||||
import { createMediaAttachmentCache, normalizeMediaAttachments } from "./runner.attachments.js";
|
import { createMediaAttachmentCache, normalizeMediaAttachments } from "./runner.attachments.js";
|
||||||
import { withMediaFixture } from "./runner.test-utils.js";
|
import { withMediaFixture } from "./runner.test-utils.js";
|
||||||
|
|
||||||
const baseCatalog = [
|
type TestCatalogEntry = {
|
||||||
|
id: string;
|
||||||
|
name: string;
|
||||||
|
provider: string;
|
||||||
|
input: readonly string[];
|
||||||
|
};
|
||||||
|
|
||||||
|
const baseCatalog: TestCatalogEntry[] = [
|
||||||
{
|
{
|
||||||
id: "gpt-4.1",
|
id: "gpt-4.1",
|
||||||
name: "GPT-4.1",
|
name: "GPT-4.1",
|
||||||
@@ -21,7 +28,7 @@ const baseCatalog = [
|
|||||||
input: ["text", "image"] as const,
|
input: ["text", "image"] as const,
|
||||||
},
|
},
|
||||||
];
|
];
|
||||||
let catalog = [...baseCatalog];
|
let catalog: TestCatalogEntry[] = [...baseCatalog];
|
||||||
|
|
||||||
const loadModelCatalog = vi.hoisted(() => vi.fn(async () => catalog));
|
const loadModelCatalog = vi.hoisted(() => vi.fn(async () => catalog));
|
||||||
|
|
||||||
@@ -141,6 +148,117 @@ describe("runCapability image skip", () => {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("uses explicit media image models instead of native vision skip", async () => {
|
||||||
|
await withMediaFixture(
|
||||||
|
{
|
||||||
|
filePrefix: "openclaw-image-explicit-vision",
|
||||||
|
extension: "png",
|
||||||
|
mediaType: "image/png",
|
||||||
|
fileContents: Buffer.from("image"),
|
||||||
|
},
|
||||||
|
async ({ ctx, media, cache }) => {
|
||||||
|
const cfg = {} as OpenClawConfig;
|
||||||
|
|
||||||
|
const result = await runCapability({
|
||||||
|
capability: "image",
|
||||||
|
cfg,
|
||||||
|
ctx,
|
||||||
|
attachments: cache,
|
||||||
|
media,
|
||||||
|
agentDir: "/tmp",
|
||||||
|
providerRegistry: new Map([
|
||||||
|
[
|
||||||
|
"openrouter",
|
||||||
|
{
|
||||||
|
id: "openrouter",
|
||||||
|
capabilities: ["image"],
|
||||||
|
describeImage: async (req) => ({ text: "explicit ok", model: req.model }),
|
||||||
|
},
|
||||||
|
],
|
||||||
|
]),
|
||||||
|
config: {
|
||||||
|
models: [{ provider: "openrouter", model: "google/gemini-2.5-flash" }],
|
||||||
|
},
|
||||||
|
activeModel: { provider: "openai", model: "gpt-4.1" },
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(result.decision.outcome).toBe("success");
|
||||||
|
expect(result.outputs[0]).toMatchObject({
|
||||||
|
provider: "openrouter",
|
||||||
|
model: "google/gemini-2.5-flash",
|
||||||
|
text: "explicit ok",
|
||||||
|
});
|
||||||
|
},
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("prefers agents.defaults.imageModel over the active model for auto image resolution", async () => {
|
||||||
|
const cfg = {
|
||||||
|
agents: {
|
||||||
|
defaults: {
|
||||||
|
imageModel: { primary: "openrouter/google/gemini-2.5-flash" },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
} as OpenClawConfig;
|
||||||
|
|
||||||
|
await expect(
|
||||||
|
resolveAutoImageModel({
|
||||||
|
cfg,
|
||||||
|
activeModel: { provider: "openai", model: "gpt-4.1" },
|
||||||
|
}),
|
||||||
|
).resolves.toEqual({
|
||||||
|
provider: "openrouter",
|
||||||
|
model: "google/gemini-2.5-flash",
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it("falls back from an active text model to the provider image default", async () => {
|
||||||
|
catalog = [
|
||||||
|
{
|
||||||
|
id: "MiniMax-M2.7",
|
||||||
|
name: "MiniMax M2.7",
|
||||||
|
provider: "minimax-portal",
|
||||||
|
input: ["text"] as const,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: "MiniMax-VL-01",
|
||||||
|
name: "MiniMax VL 01",
|
||||||
|
provider: "minimax-portal",
|
||||||
|
input: ["text", "image"] as const,
|
||||||
|
},
|
||||||
|
];
|
||||||
|
vi.stubEnv("MINIMAX_API_KEY", "test-minimax-key");
|
||||||
|
const cfg = {} as OpenClawConfig;
|
||||||
|
const pluginRegistry = createEmptyPluginRegistry();
|
||||||
|
pluginRegistry.mediaUnderstandingProviders.push({
|
||||||
|
pluginId: "minimax",
|
||||||
|
pluginName: "MiniMax Provider",
|
||||||
|
source: "test",
|
||||||
|
provider: {
|
||||||
|
id: "minimax-portal",
|
||||||
|
capabilities: ["image"],
|
||||||
|
defaultModels: { image: "MiniMax-VL-01" },
|
||||||
|
describeImage: async () => ({ text: "ok" }),
|
||||||
|
},
|
||||||
|
});
|
||||||
|
setCompatibleActiveMediaUnderstandingRegistry(pluginRegistry, cfg);
|
||||||
|
|
||||||
|
try {
|
||||||
|
await expect(
|
||||||
|
resolveAutoImageModel({
|
||||||
|
cfg,
|
||||||
|
activeModel: { provider: "minimax-portal", model: "MiniMax-M2.7" },
|
||||||
|
}),
|
||||||
|
).resolves.toEqual({
|
||||||
|
provider: "minimax-portal",
|
||||||
|
model: "MiniMax-VL-01",
|
||||||
|
});
|
||||||
|
} finally {
|
||||||
|
setActivePluginRegistry(createEmptyPluginRegistry());
|
||||||
|
vi.unstubAllEnvs();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
it("uses active OpenRouter image models for auto image resolution", async () => {
|
it("uses active OpenRouter image models for auto image resolution", async () => {
|
||||||
vi.stubEnv("OPENROUTER_API_KEY", "test-openrouter-key");
|
vi.stubEnv("OPENROUTER_API_KEY", "test-openrouter-key");
|
||||||
const cfg = {} as OpenClawConfig;
|
const cfg = {} as OpenClawConfig;
|
||||||
|
|||||||
Reference in New Issue
Block a user