diff --git a/CHANGELOG.md b/CHANGELOG.md index 8c4c0185e1d..71a79f0ebf5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -65,6 +65,10 @@ Docs: https://docs.openclaw.ai - Agents/subagents: keep queued subagent announces session-only when the requester has no external channel target, avoiding ambiguous multi-channel delivery failures. Fixes #59201. Thanks @larrylhollan. +- Image understanding: preserve configured provider-prefixed vision model + metadata when callers request the model without the provider prefix, so custom + image models keep their `input: ["text", "image"]` capability. Fixes #33185. + Thanks @Kobe9312 and @vincentkoc. - Gateway/subagents: keep direct-loopback backend RPCs authenticated with the shared gateway token/password off stale CLI paired-device scope baselines, so internal calls no longer hit `scope-upgrade` pairing prompts while remote, diff --git a/src/agents/pi-embedded-runner/model.test.ts b/src/agents/pi-embedded-runner/model.test.ts index f4377edd46e..b8fef65e0ff 100644 --- a/src/agents/pi-embedded-runner/model.test.ts +++ b/src/agents/pi-embedded-runner/model.test.ts @@ -421,6 +421,122 @@ describe("resolveModel", () => { expect(result.model?.input).toEqual(["text", "image"]); }); + it("propagates image input when configured model ids include the provider prefix", () => { + const cfg = { + models: { + providers: { + custom: { + baseUrl: "http://localhost:9000", + api: "openai-completions", + models: [ + { + ...makeModel("custom/vision-model"), + input: ["text", "image"], + }, + ], + }, + }, + }, + } as unknown as OpenClawConfig; + + const result = resolveModelForTest("custom", "vision-model", "/tmp/agent", cfg); + + expect(result.error).toBeUndefined(); + expect(result.model).toMatchObject({ + provider: "custom", + id: "custom/vision-model", + input: ["text", "image"], + }); + }); + + it("matches provider-prefixed configured model ids through provider aliases", () => { + const cfg = { + models: { + providers: { + volcengine: { + baseUrl: "http://localhost:9000", + api: "openai-completions", + models: [ + { + ...makeModel("volcengine/vision-model"), + input: ["text", "image"], + }, + ], + }, + }, + }, + } as unknown as OpenClawConfig; + + const result = resolveModelForTest("bytedance", "vision-model", "/tmp/agent", cfg); + + expect(result.error).toBeUndefined(); + expect(result.model).toMatchObject({ + id: "volcengine/vision-model", + input: ["text", "image"], + }); + }); + + it("does not treat arbitrary namespaced model ids as provider prefixes", () => { + const cfg = { + models: { + providers: { + custom: { + baseUrl: "http://localhost:9000", + api: "openai-completions", + models: [ + { + ...makeModel("meta/vision-model"), + input: ["text", "image"], + }, + ], + }, + }, + }, + } as unknown as OpenClawConfig; + + const result = resolveModelForTest("custom", "vision-model", "/tmp/agent", cfg); + + expect(result.model?.id).toBe("vision-model"); + expect(result.model?.input).toEqual(["text"]); + }); + + it("prefers provider-prefixed configured metadata over discovered text-only models", () => { + mockDiscoveredModel(discoverModels, { + provider: "custom", + modelId: "vision-model", + templateModel: { + ...makeModel("vision-model"), + provider: "custom", + input: ["text"], + }, + }); + const cfg = { + models: { + providers: { + custom: { + baseUrl: "http://localhost:9000", + api: "openai-completions", + models: [ + { + ...makeModel("custom/vision-model"), + input: ["text", "image"], + }, + ], + }, + }, + }, + } as unknown as OpenClawConfig; + + const result = resolveModelForTest("custom", "vision-model", "/tmp/agent", cfg); + + expect(result.error).toBeUndefined(); + expect(result.model).toMatchObject({ + provider: "custom", + id: "custom/vision-model", + input: ["text", "image"], + }); + }); + it("keeps unknown fallback models text-only instead of borrowing image input from another configured model", () => { const cfg = { models: { diff --git a/src/agents/pi-embedded-runner/model.ts b/src/agents/pi-embedded-runner/model.ts index 6521430fb1d..c8791fd9b91 100644 --- a/src/agents/pi-embedded-runner/model.ts +++ b/src/agents/pi-embedded-runner/model.ts @@ -260,22 +260,48 @@ function resolveProviderTransport(params: { }; } +function matchesProviderScopedModelId(params: { + candidateId?: string; + provider: string; + modelId: string; +}): boolean { + const { candidateId, provider, modelId } = params; + if (candidateId === modelId) { + return true; + } + const slashIndex = candidateId?.indexOf("/") ?? -1; + if (!candidateId || slashIndex <= 0) { + return false; + } + const candidateProvider = candidateId.slice(0, slashIndex); + const candidateModelId = candidateId.slice(slashIndex + 1); + return ( + candidateModelId === modelId && + normalizeProviderId(candidateProvider) === normalizeProviderId(provider) + ); +} + function findInlineModelMatch(params: { providers: Record; provider: string; modelId: string; }) { + const matchesModelId = (entry: { provider: string; id?: string }) => + matchesProviderScopedModelId({ + candidateId: entry.id, + provider: entry.provider, + modelId: params.modelId, + }); const inlineModels = buildInlineProviderModels(params.providers); const exact = inlineModels.find( - (entry) => entry.provider === params.provider && entry.id === params.modelId, + (entry) => entry.provider === params.provider && matchesModelId(entry), ); if (exact) { return exact; } const normalizedProvider = normalizeProviderId(params.provider); return inlineModels.find( - (entry) => - normalizeProviderId(entry.provider) === normalizedProvider && entry.id === params.modelId, + (entry) => normalizeProviderId(entry.provider) === normalizedProvider && matchesModelId(entry), ); } @@ -306,6 +332,20 @@ function isModelsAddMetadataModel(params: { ); } +function findConfiguredProviderModel( + providerConfig: InlineProviderConfig | undefined, + provider: string, + modelId: string, +) { + return providerConfig?.models?.find((candidate) => + matchesProviderScopedModelId({ + candidateId: candidate.id, + provider, + modelId, + }), + ); +} + function applyConfiguredProviderOverrides(params: { provider: string; discoveredModel: ProviderRuntimeModel; @@ -324,9 +364,9 @@ function applyConfiguredProviderOverrides(params: { }; } const configuredModel = - providerConfig.models?.find((candidate) => candidate.id === modelId) ?? + findConfiguredProviderModel(providerConfig, params.provider, modelId) ?? (discoveredModel.id !== modelId - ? providerConfig.models?.find((candidate) => candidate.id === discoveredModel.id) + ? findConfiguredProviderModel(providerConfig, params.provider, discoveredModel.id) : undefined); const metadataOverrideModel = params.preferDiscoveredModelMetadata && @@ -546,7 +586,7 @@ function resolveConfiguredFallbackModel(params: { }): Model | undefined { const { provider, modelId, cfg, agentDir, runtimeHooks } = params; const providerConfig = resolveConfiguredProviderConfig(cfg, provider); - const configuredModel = providerConfig?.models?.find((candidate) => candidate.id === modelId); + const configuredModel = findConfiguredProviderModel(providerConfig, provider, modelId); const providerHeaders = sanitizeModelHeaders(providerConfig?.headers, { stripSecretRefMarkers: true, });