fix(tools): defer media model resolution

This commit is contained in:
Ayaan Zaidi
2026-05-01 07:34:16 +05:30
parent 354084b1b3
commit 3144e7a729
9 changed files with 77 additions and 63 deletions

View File

@@ -230,9 +230,12 @@ describe("createImageGenerateTool", () => {
vi.unstubAllEnvs();
});
it("returns null when no image-generation model can be inferred", () => {
it("defers image-generation model resolution until execution", async () => {
stubImageGenerationProviders();
expect(createImageGenerateTool({ config: {} })).toBeNull();
const tool = requireImageGenerateTool(createImageGenerateTool({ config: {} }));
await expect(tool.execute("tool-call-1", { prompt: "draw a chart" })).rejects.toThrow(
"No image-generation model configured.",
);
});
it("tells agents how to request transparent OpenAI backgrounds", () => {

View File

@@ -565,18 +565,7 @@ export function createImageGenerateTool(options?: {
workspaceDir?: string;
sandbox?: ImageGenerateSandboxConfig;
fsPolicy?: ToolFsPolicy;
}): AnyAgentTool | null {
const cfg = options?.config ?? getRuntimeConfig();
const imageGenerationModelConfig = resolveImageGenerationModelConfigForTool({
cfg,
agentDir: options?.agentDir,
});
if (!imageGenerationModelConfig) {
return null;
}
const effectiveCfg =
applyImageGenerationModelConfigDefaults(cfg, imageGenerationModelConfig) ?? cfg;
const remoteMediaSsrfPolicy = resolveRemoteMediaSsrfPolicy(effectiveCfg);
}): AnyAgentTool {
const sandboxConfig =
options?.sandbox && options.sandbox.root.trim()
? {
@@ -595,8 +584,9 @@ export function createImageGenerateTool(options?: {
execute: async (_toolCallId, args) => {
const params = args as Record<string, unknown>;
const action = resolveAction(params);
const cfg = options?.config ?? getRuntimeConfig();
if (action === "list") {
const runtimeProviders = listRuntimeImageGenerationProviders({ config: effectiveCfg });
const runtimeProviders = listRuntimeImageGenerationProviders({ config: cfg });
const providers = runtimeProviders.map((provider) =>
Object.assign(
{ id: provider.id },
@@ -607,7 +597,7 @@ export function createImageGenerateTool(options?: {
configured: isCapabilityProviderConfigured({
providers: runtimeProviders,
provider,
cfg: effectiveCfg,
cfg,
agentDir: options?.agentDir,
}),
authEnvVars: getImageGenerationProviderAuthEnvVars(provider.id),
@@ -657,6 +647,16 @@ export function createImageGenerateTool(options?: {
};
}
const imageGenerationModelConfig = resolveImageGenerationModelConfigForTool({
cfg,
agentDir: options?.agentDir,
});
if (!imageGenerationModelConfig) {
throw new ToolInputError("No image-generation model configured.");
}
const effectiveCfg =
applyImageGenerationModelConfigDefaults(cfg, imageGenerationModelConfig) ?? cfg;
const remoteMediaSsrfPolicy = resolveRemoteMediaSsrfPolicy(effectiveCfg);
const prompt = readStringParam(params, "prompt", { required: true });
const imageInputs = normalizeReferenceImages(params);
const model = readStringParam(params, "model");

View File

@@ -617,13 +617,20 @@ describe("image tool implicit imageModel config", () => {
__testing.setProviderDepsForTest();
});
it("stays disabled without auth when no pairing is possible", async () => {
it("defers image model pairing until execution", async () => {
await withTempAgentDir(async (agentDir) => {
const cfg: OpenClawConfig = {
agents: { defaults: { model: { primary: "openai/gpt-5.4" } } },
};
expect(resolveImageModelConfigForTool({ cfg, agentDir })).toBeNull();
expect(createImageTool({ config: cfg, agentDir })).toBeNull();
const tool = createImageTool({ config: cfg, agentDir });
expect(tool).not.toBeNull();
await expect(
tool?.execute("tool-call-1", {
image: `data:image/png;base64,${ONE_PIXEL_PNG_B64}`,
prompt: "describe it",
}),
).rejects.toThrow("No image model configured.");
});
});

View File

@@ -380,15 +380,6 @@ export function createImageTool(options?: {
}
return null;
}
const imageModelConfig = resolveImageModelConfigForTool({
cfg: options?.config,
agentDir,
});
if (!imageModelConfig) {
return null;
}
const remoteMediaSsrfPolicy = resolveRemoteMediaSsrfPolicy(options?.config);
// If model has native vision, images in the prompt are auto-injected
// so this tool is only needed when image wasn't provided in the prompt
const description = options?.modelHasVision
@@ -462,6 +453,14 @@ export function createImageTool(options?: {
record,
DEFAULT_PROMPT,
);
const imageModelConfig = resolveImageModelConfigForTool({
cfg: options?.config,
agentDir,
});
if (!imageModelConfig) {
throw new Error("No image model configured.");
}
const remoteMediaSsrfPolicy = resolveRemoteMediaSsrfPolicy(options?.config);
const maxBytesMb = typeof record.maxBytesMb === "number" ? record.maxBytesMb : undefined;
const maxBytes = pickMaxBytes(options?.config, maxBytesMb);

View File

@@ -129,9 +129,12 @@ describe("createMusicGenerateTool", () => {
vi.unstubAllEnvs();
});
it("returns null when no music-generation config or auth-backed provider is available", () => {
it("defers music-generation model resolution until execution", async () => {
vi.spyOn(musicGenerationRuntime, "listRuntimeMusicGenerationProviders").mockReturnValue([]);
expect(createMusicGenerateTool({ config: asConfig({}) })).toBeNull();
const tool = createMusicGenerateTool({ config: asConfig({}) });
await expect(tool.execute("tool-call-1", { prompt: "make a loop" })).rejects.toThrow(
"No music-generation model configured.",
);
});
it("registers when music-generation config is present", () => {

View File

@@ -493,16 +493,7 @@ export function createMusicGenerateTool(options?: {
sandbox?: MusicGenerateSandboxConfig;
fsPolicy?: ToolFsPolicy;
scheduleBackgroundWork?: MusicGenerateBackgroundScheduler;
}): AnyAgentTool | null {
const cfg: OpenClawConfig = options?.config ?? getRuntimeConfig();
const musicGenerationModelConfig = resolveMusicGenerationModelConfigForTool({
cfg,
agentDir: options?.agentDir,
});
if (!musicGenerationModelConfig) {
return null;
}
}): AnyAgentTool {
const sandboxConfig = options?.sandbox
? {
root: options.sandbox.root,
@@ -523,17 +514,26 @@ export function createMusicGenerateTool(options?: {
execute: async (_toolCallId, rawArgs) => {
const args = rawArgs as Record<string, unknown>;
const action = resolveAction(args);
const effectiveCfg =
applyMusicGenerationModelConfigDefaults(cfg, musicGenerationModelConfig) ?? cfg;
const cfg: OpenClawConfig = options?.config ?? getRuntimeConfig();
if (action === "list") {
return createMusicGenerateListActionResult(effectiveCfg);
return createMusicGenerateListActionResult(cfg);
}
if (action === "status") {
return createMusicGenerateStatusActionResult(options?.agentSessionKey);
}
const musicGenerationModelConfig = resolveMusicGenerationModelConfigForTool({
cfg,
agentDir: options?.agentDir,
});
if (!musicGenerationModelConfig) {
throw new ToolInputError("No music-generation model configured.");
}
const effectiveCfg =
applyMusicGenerationModelConfigDefaults(cfg, musicGenerationModelConfig) ?? cfg;
const duplicateGuardResult = createMusicGenerateDuplicateGuardResult(
options?.agentSessionKey,
);

View File

@@ -257,11 +257,6 @@ export function createPdfTool(options?: {
return null;
}
const pdfModelConfig = resolvePdfModelConfigForTool({ cfg: options?.config, agentDir });
if (!pdfModelConfig) {
return null;
}
const maxBytesMbDefault = (
options?.config?.agents?.defaults as Record<string, unknown> | undefined
)?.pdfMaxBytesMb;
@@ -308,6 +303,10 @@ export function createPdfTool(options?: {
record,
DEFAULT_PROMPT,
);
const pdfModelConfig = resolvePdfModelConfigForTool({ cfg: options?.config, agentDir });
if (!pdfModelConfig) {
throw new Error("No PDF model configured.");
}
const maxBytesMbRaw = typeof record.maxBytesMb === "number" ? record.maxBytesMb : undefined;
const maxBytesMb =
typeof maxBytesMbRaw === "number" && Number.isFinite(maxBytesMbRaw) && maxBytesMbRaw > 0

View File

@@ -93,10 +93,13 @@ describe("createVideoGenerateTool", () => {
vi.unstubAllEnvs();
});
it("returns null when no video-generation config or auth-backed provider is available", () => {
it("defers video-generation model resolution until execution", async () => {
vi.spyOn(videoGenerationRuntime, "listRuntimeVideoGenerationProviders").mockReturnValue([]);
expect(createVideoGenerateTool({ config: asConfig({}) })).toBeNull();
const tool = createVideoGenerateTool({ config: asConfig({}) });
await expect(tool.execute("tool-call-1", { prompt: "make a clip" })).rejects.toThrow(
"No video-generation model configured.",
);
});
it("registers when video-generation config is present", () => {

View File

@@ -800,16 +800,7 @@ export function createVideoGenerateTool(options?: {
sandbox?: VideoGenerateSandboxConfig;
fsPolicy?: ToolFsPolicy;
scheduleBackgroundWork?: VideoGenerateBackgroundScheduler;
}): AnyAgentTool | null {
const cfg: OpenClawConfig = options?.config ?? getRuntimeConfig();
const videoGenerationModelConfig = resolveVideoGenerationModelConfigForTool({
cfg,
agentDir: options?.agentDir,
});
if (!videoGenerationModelConfig) {
return null;
}
}): AnyAgentTool {
const sandboxConfig = options?.sandbox
? {
root: options.sandbox.root,
@@ -830,18 +821,27 @@ export function createVideoGenerateTool(options?: {
execute: async (_toolCallId, rawArgs) => {
const args = rawArgs as Record<string, unknown>;
const action = resolveAction(args);
const effectiveCfg =
applyVideoGenerationModelConfigDefaults(cfg, videoGenerationModelConfig) ?? cfg;
const remoteMediaSsrfPolicy = resolveRemoteMediaSsrfPolicy(effectiveCfg);
const cfg: OpenClawConfig = options?.config ?? getRuntimeConfig();
if (action === "list") {
return createVideoGenerateListActionResult(effectiveCfg);
return createVideoGenerateListActionResult(cfg);
}
if (action === "status") {
return createVideoGenerateStatusActionResult(options?.agentSessionKey);
}
const videoGenerationModelConfig = resolveVideoGenerationModelConfigForTool({
cfg,
agentDir: options?.agentDir,
});
if (!videoGenerationModelConfig) {
throw new ToolInputError("No video-generation model configured.");
}
const effectiveCfg =
applyVideoGenerationModelConfigDefaults(cfg, videoGenerationModelConfig) ?? cfg;
const remoteMediaSsrfPolicy = resolveRemoteMediaSsrfPolicy(effectiveCfg);
const duplicateGuardResult = createVideoGenerateDuplicateGuardResult(
options?.agentSessionKey,
);