mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 05:40:44 +00:00
fix: defer image tool auto discovery
This commit is contained in:
@@ -325,6 +325,7 @@ export function createOpenClawTools(
|
||||
sandbox,
|
||||
fsPolicy: options?.fsPolicy,
|
||||
modelHasVision: options?.modelHasVision,
|
||||
deferAutoModelResolution: true,
|
||||
})
|
||||
: null;
|
||||
options?.recordToolPrepStage?.("openclaw-tools:image-tool");
|
||||
|
||||
@@ -628,6 +628,38 @@ describe("image tool implicit imageModel config", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("defers implicit image model discovery during hot-path tool registration", async () => {
|
||||
await withTempAgentDir(async (agentDir) => {
|
||||
const resolveDefaultMediaModelSpy = vi.fn(() => "gpt-5.4-mini");
|
||||
const resolveAutoMediaKeyProvidersSpy = vi.fn(() => ["openai"]);
|
||||
__testing.setProviderDepsForTest({
|
||||
buildProviderRegistry: (overrides?: Record<string, MediaUnderstandingProvider>) =>
|
||||
imageProviderHarness.buildProviderRegistry(overrides),
|
||||
getMediaUnderstandingProvider: (
|
||||
id: string,
|
||||
registry: Map<string, MediaUnderstandingProvider>,
|
||||
) => imageProviderHarness.getMediaUnderstandingProvider(id, registry),
|
||||
describeImageWithModel: describeGenericImageWithModel,
|
||||
describeImagesWithModel: describeGenericImagesWithModel,
|
||||
resolveDefaultMediaModel: resolveDefaultMediaModelSpy,
|
||||
resolveAutoMediaKeyProviders: resolveAutoMediaKeyProvidersSpy,
|
||||
});
|
||||
const cfg: OpenClawConfig = {
|
||||
agents: { defaults: { model: { primary: "openai/gpt-5.4" } } },
|
||||
};
|
||||
|
||||
const tool = createImageTool({
|
||||
config: cfg,
|
||||
agentDir,
|
||||
deferAutoModelResolution: true,
|
||||
});
|
||||
|
||||
expect(tool).not.toBeNull();
|
||||
expect(resolveDefaultMediaModelSpy).not.toHaveBeenCalled();
|
||||
expect(resolveAutoMediaKeyProvidersSpy).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
it("pairs minimax primary with MiniMax-VL-01 (and fallbacks) when auth exists", async () => {
|
||||
await withTempAgentDir(async (agentDir) => {
|
||||
vi.stubEnv("MINIMAX_API_KEY", "minimax-test");
|
||||
|
||||
@@ -379,22 +379,37 @@ export function createImageTool(options?: {
|
||||
fsPolicy?: ToolFsPolicy;
|
||||
/** If true, the model has native vision capability and images in the prompt are auto-injected */
|
||||
modelHasVision?: boolean;
|
||||
/**
|
||||
* Avoid resolving auto image-provider/model candidates while registering the
|
||||
* tool. The concrete image model is still resolved before execution.
|
||||
*/
|
||||
deferAutoModelResolution?: boolean;
|
||||
}): AnyAgentTool | null {
|
||||
const agentDir = options?.agentDir?.trim();
|
||||
const explicit = coerceImageModelConfig(options?.config);
|
||||
if (!agentDir) {
|
||||
const explicit = coerceImageModelConfig(options?.config);
|
||||
if (hasToolModelConfig(explicit)) {
|
||||
throw new Error("createImageTool requires agentDir when enabled");
|
||||
}
|
||||
return null;
|
||||
}
|
||||
const imageModelConfig = resolveImageModelConfigForTool({
|
||||
cfg: options?.config,
|
||||
agentDir,
|
||||
workspaceDir: options?.workspaceDir,
|
||||
authStore: options?.authProfileStore,
|
||||
});
|
||||
if (!imageModelConfig) {
|
||||
const explicitImageModelConfig = hasToolModelConfig(explicit)
|
||||
? resolveConfiguredImageModelRefs({
|
||||
cfg: options?.config,
|
||||
imageModelConfig: explicit,
|
||||
})
|
||||
: null;
|
||||
const shouldResolveAutoImageModel =
|
||||
!explicitImageModelConfig && !options?.deferAutoModelResolution;
|
||||
const resolvedImageModelConfig = shouldResolveAutoImageModel
|
||||
? resolveImageModelConfigForTool({
|
||||
cfg: options?.config,
|
||||
agentDir,
|
||||
workspaceDir: options?.workspaceDir,
|
||||
authStore: options?.authProfileStore,
|
||||
})
|
||||
: explicitImageModelConfig;
|
||||
if (!resolvedImageModelConfig && !options?.deferAutoModelResolution) {
|
||||
return null;
|
||||
}
|
||||
const remoteMediaSsrfPolicy = resolveRemoteMediaSsrfPolicy(options?.config);
|
||||
@@ -403,7 +418,9 @@ export function createImageTool(options?: {
|
||||
// so this tool is only needed when image wasn't provided in the prompt
|
||||
const description = options?.modelHasVision
|
||||
? "Analyze one or more images with a vision model. Use image for a single path/URL, or images for multiple (up to 20). Only use this tool when images were NOT already provided in the user's message. Images mentioned in the prompt are automatically visible to you."
|
||||
: "Analyze one or more images with the configured image model (agents.defaults.imageModel). Use image for a single path/URL, or images for multiple (up to 20). Provide a prompt describing what to analyze.";
|
||||
: explicitImageModelConfig
|
||||
? "Analyze one or more images with the configured image model (agents.defaults.imageModel). Use image for a single path/URL, or images for multiple (up to 20). Provide a prompt describing what to analyze."
|
||||
: "Analyze one or more images with an available vision model. Use image for a single path/URL, or images for multiple (up to 20). Provide a prompt describing what to analyze.";
|
||||
|
||||
return {
|
||||
label: "Image",
|
||||
@@ -603,6 +620,19 @@ export function createImageTool(options?: {
|
||||
}
|
||||
|
||||
// MARK: - Run image prompt with all loaded images
|
||||
const imageModelConfig =
|
||||
resolvedImageModelConfig ??
|
||||
resolveImageModelConfigForTool({
|
||||
cfg: options?.config,
|
||||
agentDir,
|
||||
workspaceDir: options?.workspaceDir,
|
||||
authStore: options?.authProfileStore,
|
||||
});
|
||||
if (!imageModelConfig) {
|
||||
throw new Error(
|
||||
"No image model is configured. Set agents.defaults.imageModel or configure an image-capable provider.",
|
||||
);
|
||||
}
|
||||
const result = await runImagePrompt({
|
||||
cfg: options?.config,
|
||||
agentDir,
|
||||
|
||||
Reference in New Issue
Block a user