mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 10:40:43 +00:00
fix: route explicit image describe models
This commit is contained in:
@@ -57,6 +57,10 @@ const mocks = vi.hoisted(() => ({
|
||||
provider: "openai",
|
||||
model: "gpt-4.1-mini",
|
||||
})),
|
||||
describeImageFileWithModel: vi.fn(async () => ({
|
||||
text: "friendly lobster",
|
||||
model: "gpt-4.1-mini",
|
||||
})),
|
||||
generateImage: vi.fn(),
|
||||
generateVideo: vi.fn(),
|
||||
transcribeAudioFile: vi.fn(async () => ({ text: "meeting notes" })),
|
||||
@@ -179,6 +183,8 @@ vi.mock("../gateway/connection-details.js", () => ({
|
||||
vi.mock("../media-understanding/runtime.js", () => ({
|
||||
describeImageFile:
|
||||
mocks.describeImageFile as typeof import("../media-understanding/runtime.js").describeImageFile,
|
||||
describeImageFileWithModel:
|
||||
mocks.describeImageFileWithModel as typeof import("../media-understanding/runtime.js").describeImageFileWithModel,
|
||||
describeVideoFile: vi.fn(),
|
||||
transcribeAudioFile:
|
||||
mocks.transcribeAudioFile as typeof import("../media-understanding/runtime.js").transcribeAudioFile,
|
||||
@@ -289,6 +295,7 @@ describe("capability cli", () => {
|
||||
return {};
|
||||
}) as never);
|
||||
mocks.describeImageFile.mockClear();
|
||||
mocks.describeImageFileWithModel.mockClear();
|
||||
mocks.generateImage.mockReset();
|
||||
mocks.generateVideo.mockReset();
|
||||
mocks.transcribeAudioFile.mockClear();
|
||||
@@ -384,6 +391,37 @@ describe("capability cli", () => {
|
||||
);
|
||||
});
|
||||
|
||||
it("uses the explicit media-understanding provider for image describe model overrides", async () => {
|
||||
await runRegisteredCli({
|
||||
register: registerCapabilityCli as (program: Command) => void,
|
||||
argv: [
|
||||
"capability",
|
||||
"image",
|
||||
"describe",
|
||||
"--file",
|
||||
"photo.jpg",
|
||||
"--model",
|
||||
"ollama/qwen2.5vl:7b",
|
||||
"--json",
|
||||
],
|
||||
});
|
||||
|
||||
expect(mocks.describeImageFileWithModel).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
filePath: expect.stringMatching(/photo\.jpg$/),
|
||||
provider: "ollama",
|
||||
model: "qwen2.5vl:7b",
|
||||
}),
|
||||
);
|
||||
expect(mocks.describeImageFile).not.toHaveBeenCalled();
|
||||
expect(mocks.runtime.writeJson).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
provider: "ollama",
|
||||
model: "gpt-4.1-mini",
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it("fails image describe when no description text is returned", async () => {
|
||||
mocks.describeImageFile.mockResolvedValueOnce({
|
||||
text: undefined,
|
||||
|
||||
@@ -25,6 +25,7 @@ import { generateImage, listRuntimeImageGenerationProviders } from "../image-gen
|
||||
import { buildMediaUnderstandingRegistry } from "../media-understanding/provider-registry.js";
|
||||
import {
|
||||
describeImageFile,
|
||||
describeImageFileWithModel,
|
||||
describeVideoFile,
|
||||
transcribeAudioFile,
|
||||
} from "../media-understanding/runtime.js";
|
||||
@@ -749,21 +750,32 @@ async function runImageDescribe(params: {
|
||||
model?: string;
|
||||
}) {
|
||||
const cfg = loadConfig();
|
||||
const agentDir = resolveAgentDir(cfg, resolveDefaultAgentId(cfg));
|
||||
const activeModel = requireProviderModelOverride(params.model);
|
||||
const outputs = await Promise.all(
|
||||
params.files.map(async (filePath) => {
|
||||
const result = await describeImageFile({
|
||||
filePath: path.resolve(filePath),
|
||||
cfg,
|
||||
activeModel,
|
||||
});
|
||||
const resolvedPath = path.resolve(filePath);
|
||||
const result = activeModel
|
||||
? await describeImageFileWithModel({
|
||||
filePath: resolvedPath,
|
||||
cfg,
|
||||
agentDir,
|
||||
provider: activeModel.provider,
|
||||
model: activeModel.model,
|
||||
prompt: "Describe the image.",
|
||||
})
|
||||
: await describeImageFile({
|
||||
filePath: resolvedPath,
|
||||
cfg,
|
||||
agentDir,
|
||||
});
|
||||
if (!result.text) {
|
||||
throw new Error(`No description returned for image: ${path.resolve(filePath)}`);
|
||||
throw new Error(`No description returned for image: ${resolvedPath}`);
|
||||
}
|
||||
return {
|
||||
path: path.resolve(filePath),
|
||||
path: resolvedPath,
|
||||
text: result.text,
|
||||
provider: result.provider,
|
||||
provider: activeModel?.provider ?? ("provider" in result ? result.provider : undefined),
|
||||
model: result.model,
|
||||
kind: "image.description",
|
||||
};
|
||||
|
||||
@@ -17,6 +17,7 @@ const hoisted = vi.hoisted(() => ({
|
||||
setRuntimeApiKeyMock: vi.fn(),
|
||||
discoverModelsMock: vi.fn(),
|
||||
fetchMock: vi.fn(),
|
||||
registerProviderStreamForModelMock: vi.fn(),
|
||||
}));
|
||||
const {
|
||||
completeMock,
|
||||
@@ -27,6 +28,7 @@ const {
|
||||
setRuntimeApiKeyMock,
|
||||
discoverModelsMock,
|
||||
fetchMock,
|
||||
registerProviderStreamForModelMock,
|
||||
} = hoisted;
|
||||
|
||||
vi.mock("@mariozechner/pi-ai", async () => {
|
||||
@@ -50,6 +52,10 @@ vi.mock("../agents/model-auth.js", () => ({
|
||||
requireApiKey: requireApiKeyMock,
|
||||
}));
|
||||
|
||||
vi.mock("../agents/provider-stream.js", () => ({
|
||||
registerProviderStreamForModel: registerProviderStreamForModelMock,
|
||||
}));
|
||||
|
||||
vi.mock("../agents/pi-model-discovery-runtime.js", () => ({
|
||||
discoverAuthStorage: () => ({
|
||||
setRuntimeApiKey: setRuntimeApiKeyMock,
|
||||
@@ -168,6 +174,16 @@ describe("describeImageWithModel", () => {
|
||||
text: "generic ok",
|
||||
model: "custom-vision",
|
||||
});
|
||||
expect(registerProviderStreamForModelMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
model: expect.objectContaining({
|
||||
provider: "minimax-portal",
|
||||
id: "custom-vision",
|
||||
}),
|
||||
cfg: {},
|
||||
agentDir: "/tmp/openclaw-agent",
|
||||
}),
|
||||
);
|
||||
expect(completeMock).toHaveBeenCalledOnce();
|
||||
expect(fetchMock).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
@@ -9,6 +9,7 @@ import {
|
||||
import { normalizeModelRef } from "../agents/model-selection.js";
|
||||
import { ensureOpenClawModelsJson } from "../agents/models-config.js";
|
||||
import { resolveProviderRequestCapabilities } from "../agents/provider-attribution.js";
|
||||
import { registerProviderStreamForModel } from "../agents/provider-stream.js";
|
||||
import {
|
||||
coerceImageAssistantText,
|
||||
hasImageReasoningOnlyResponse,
|
||||
@@ -245,6 +246,12 @@ export async function describeImagesWithModel(
|
||||
});
|
||||
}
|
||||
|
||||
registerProviderStreamForModel({
|
||||
model,
|
||||
cfg: params.cfg,
|
||||
agentDir: params.agentDir,
|
||||
});
|
||||
|
||||
const context = buildImageContext(prompt, params.images);
|
||||
const controller = new AbortController();
|
||||
const timeout =
|
||||
|
||||
Reference in New Issue
Block a user