fix(media-understanding): auto-register image capability for config providers with image input (#51392)

This commit is contained in:
xydt-610
2026-03-21 12:30:05 +08:00
committed by Peter Steinberger
parent 3da187156f
commit 1d8bba7e39
2 changed files with 51 additions and 0 deletions

View File

@@ -58,4 +58,28 @@ describe("media-understanding provider registry", () => {
expect(provider?.id).toBe("google");
});
it("auto-registers media-understanding for config providers with image-capable models (#51392)", () => {
const cfg = {
models: {
providers: {
glm: {
models: [{ id: "glm-4.6v", input: ["text", "image"] }],
},
textOnly: {
models: [{ id: "text-model", input: ["text"] }],
},
},
},
} as never;
const registry = buildMediaUnderstandingRegistry(undefined, cfg);
const glmProvider = getMediaUnderstandingProvider("glm", registry);
const textOnlyProvider = getMediaUnderstandingProvider("textOnly", registry);
expect(glmProvider?.id).toBe("glm");
expect(glmProvider?.capabilities).toEqual(["image"]);
expect(glmProvider?.describeImage).toBeDefined();
expect(glmProvider?.describeImages).toBeDefined();
expect(textOnlyProvider).toBeUndefined();
});
});

View File

@@ -1,5 +1,6 @@
import type { OpenClawConfig } from "../config/config.js";
import { resolvePluginCapabilityProviders } from "../plugins/capability-provider-runtime.js";
import { describeImageWithModel, describeImagesWithModel } from "./image-runtime.js";
import { normalizeMediaProviderId } from "./provider-id.js";
import type { MediaUnderstandingProvider } from "./types.js";
@@ -35,6 +36,32 @@ export function buildMediaUnderstandingRegistry(
})) {
mergeProviderIntoRegistry(registry, provider);
}
// Auto-register media-understanding for config providers with image-capable models (#51392)
const configProviders = cfg?.models?.providers;
if (configProviders && typeof configProviders === "object") {
for (const [providerKey, providerCfg] of Object.entries(configProviders)) {
if (!providerKey?.trim()) {
continue;
}
const normalizedKey = normalizeMediaProviderId(providerKey);
if (registry.has(normalizedKey)) {
continue;
}
const models = (providerCfg as { models?: Array<{ input?: string[] }> })?.models ?? [];
const hasImageModel = models.some(
(m) => Array.isArray(m?.input) && m.input.includes("image"),
);
if (hasImageModel) {
const autoProvider: MediaUnderstandingProvider = {
id: normalizedKey,
capabilities: ["image"],
describeImage: describeImageWithModel,
describeImages: describeImagesWithModel,
};
mergeProviderIntoRegistry(registry, autoProvider);
}
}
}
if (overrides) {
for (const [key, provider] of Object.entries(overrides)) {
const normalizedKey = normalizeMediaProviderId(key);