fix(cli): report missing infer media providers

This commit is contained in:
Peter Steinberger
2026-05-02 07:47:15 +01:00
parent 798515809c
commit fa7de46261
6 changed files with 145 additions and 6 deletions

View File

@@ -782,6 +782,51 @@ describe("capability cli", () => {
);
});
it("reports missing image understanding configuration for image describe", async () => {
mocks.describeImageFile.mockResolvedValueOnce({
text: undefined,
decision: {
capability: "image",
outcome: "skipped",
attachments: [{ attachmentIndex: 0, attempts: [] }],
},
} as never);
await expect(
runRegisteredCli({
register: registerCapabilityCli as (program: Command) => void,
argv: ["capability", "image", "describe", "--file", "photo.jpg", "--json"],
}),
).rejects.toThrow("exit 1");
expect(mocks.runtime.error).toHaveBeenCalledWith(
expect.stringContaining("No image understanding provider is configured or ready"),
);
expect(mocks.runtime.error).toHaveBeenCalledWith(
expect.stringContaining("agents.defaults.imageModel.primary"),
);
});
it("reports missing image understanding configuration for image describe-many", async () => {
mocks.describeImageFile.mockResolvedValueOnce({
text: undefined,
decision: {
capability: "image",
outcome: "skipped",
attachments: [{ attachmentIndex: 0, attempts: [] }],
},
} as never);
await expect(
runRegisteredCli({
register: registerCapabilityCli as (program: Command) => void,
argv: ["capability", "image", "describe-many", "--file", "photo.jpg", "--json"],
}),
).rejects.toThrow("exit 1");
expect(mocks.runtime.error).toHaveBeenCalledWith(
expect.stringContaining("No image understanding provider is configured or ready"),
);
});
it("rewrites mismatched explicit image output extensions to the detected file type", async () => {
const jpegBase64 =
"/9j/4AAQSkZJRgABAQAAAQABAAD/2wCEAAkGBxAQEBUQEBAVFRUVFRUVFRUVFRUVFRUVFRUXFhUVFRUYHSggGBolHRUVITEhJSkrLi4uFx8zODMsNygtLisBCgoKDg0OGhAQGi0fHyUtLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLf/AABEIAAEAAQMBIgACEQEDEQH/xAAXAAEBAQEAAAAAAAAAAAAAAAAAAQID/8QAFhEBAQEAAAAAAAAAAAAAAAAAAAER/9oADAMBAAIQAxAAAAH2AP/EABgQAQEAAwAAAAAAAAAAAAAAAAEAEQIS/9oACAEBAAEFAk1o7//EABYRAQEBAAAAAAAAAAAAAAAAAAABEf/aAAgBAwEBPwGn/8QAFhEBAQEAAAAAAAAAAAAAAAAAABEB/9oACAECAQE/AYf/xAAaEAACAgMAAAAAAAAAAAAAAAABEQAhMUFh/9oACAEBAAY/AjK9cY2f/8QAGhABAQACAwAAAAAAAAAAAAAAAAERITFBUf/aAAgBAQABPyGQk7W5jVYkA//Z";
@@ -1278,6 +1323,30 @@ describe("capability cli", () => {
);
});
it("reports missing audio transcription configuration for audio transcribe", async () => {
mocks.transcribeAudioFile.mockResolvedValueOnce({
text: undefined,
decision: {
capability: "audio",
outcome: "skipped",
attachments: [{ attachmentIndex: 0, attempts: [] }],
},
} as never);
await expect(
runRegisteredCli({
register: registerCapabilityCli as (program: Command) => void,
argv: ["capability", "audio", "transcribe", "--file", "memo.m4a", "--json"],
}),
).rejects.toThrow("exit 1");
expect(mocks.runtime.error).toHaveBeenCalledWith(
expect.stringContaining("No audio transcription provider is configured or ready"),
);
expect(mocks.runtime.error).toHaveBeenCalledWith(
expect.stringContaining("tools.media.audio.models"),
);
});
it("surfaces the underlying transcription failure for audio transcribe", async () => {
mocks.transcribeAudioFile.mockRejectedValueOnce(
new Error("Audio transcription response missing text"),

View File

@@ -30,6 +30,7 @@ import type {
ImageGenerationOutputFormat,
} from "../image-generation/types.js";
import { buildMediaUnderstandingRegistry } from "../media-understanding/provider-registry.js";
import type { RunMediaUnderstandingFileResult } from "../media-understanding/runtime-types.js";
import {
describeImageFile,
describeImageFileWithModel,
@@ -964,6 +965,11 @@ async function runImageDescribe(params: {
timeoutMs: params.timeoutMs,
});
if (!result.text) {
if (isMissingMediaUnderstandingProvider(result)) {
throw new Error(
"No image understanding provider is configured or ready. Configure tools.media.image.models or agents.defaults.imageModel.primary, or pass --model <provider/model> after configuring that provider's auth/API key.",
);
}
throw new Error(`No description returned for image: ${resolvedPath}`);
}
return {
@@ -986,6 +992,15 @@ async function runImageDescribe(params: {
} satisfies CapabilityEnvelope;
}
function isMissingMediaUnderstandingProvider(result: RunMediaUnderstandingFileResult): boolean {
const decision = result.decision;
return (
decision?.outcome === "skipped" &&
decision.attachments.length > 0 &&
decision.attachments.every((attachment) => attachment.attempts.length === 0)
);
}
async function runAudioTranscribe(params: {
file: string;
language?: string;
@@ -1002,6 +1017,11 @@ async function runAudioTranscribe(params: {
prompt: params.prompt,
});
if (!result.text) {
if (isMissingMediaUnderstandingProvider(result)) {
throw new Error(
"No audio transcription provider is configured or ready. Configure tools.media.audio.models, or pass --model <provider/model> after configuring that provider's auth/API key.",
);
}
throw new Error(`No transcript returned for audio: ${path.resolve(params.file)}`);
}
return {