diff --git a/src/extension-host/media-runtime-backends.test.ts b/src/extension-host/media-runtime-backends.test.ts new file mode 100644 index 00000000000..d5acac9661b --- /dev/null +++ b/src/extension-host/media-runtime-backends.test.ts @@ -0,0 +1,60 @@ +import { describe, expect, it } from "vitest"; +import { + buildExtensionHostMediaRuntimeSelectorKeys, + listExtensionHostMediaAutoRuntimeBackendSeedIds, + listExtensionHostMediaRuntimeBackendIds, + listExtensionHostMediaUnderstandingProviders, + normalizeExtensionHostMediaProviderId, + resolveExtensionHostMediaRuntimeDefaultModelMetadata, +} from "./media-runtime-backends.js"; + +describe("extension host media runtime backends", () => { + it("publishes the built-in media providers once", () => { + const providers = listExtensionHostMediaUnderstandingProviders(); + + expect(providers.some((provider) => provider.id === "openai")).toBe(true); + expect(providers.some((provider) => provider.id === "deepgram")).toBe(true); + }); + + it("keeps media-specific provider normalization and selector aliases", () => { + expect(normalizeExtensionHostMediaProviderId("gemini")).toBe("google"); + expect(buildExtensionHostMediaRuntimeSelectorKeys("google")).toEqual(["google", "gemini"]); + }); + + it("keeps auto-seeded runtime backends ordered ahead of the rest", () => { + expect(listExtensionHostMediaAutoRuntimeBackendSeedIds("image")).toEqual([ + "openai", + "anthropic", + "google", + "minimax", + "minimax-portal", + "zai", + ]); + expect(listExtensionHostMediaRuntimeBackendIds("audio").slice(0, 3)).toEqual([ + "openai", + "groq", + "deepgram", + ]); + expect(listExtensionHostMediaRuntimeBackendIds("image").slice(0, 4)).toEqual([ + "openai", + "anthropic", + "google", + "minimax", + ]); + }); + + it("keeps default-model metadata with the shared backend definitions", () => { + expect( + resolveExtensionHostMediaRuntimeDefaultModelMetadata({ + capability: "image", + backendId: "openai", + }), + ).toBe("gpt-5-mini"); + expect( + resolveExtensionHostMediaRuntimeDefaultModelMetadata({ + capability: "video", + backendId: "openai", + }), + ).toBeUndefined(); + }); +}); diff --git a/src/extension-host/media-runtime-backends.ts b/src/extension-host/media-runtime-backends.ts new file mode 100644 index 00000000000..22eccc8bb04 --- /dev/null +++ b/src/extension-host/media-runtime-backends.ts @@ -0,0 +1,118 @@ +import { normalizeProviderId } from "../agents/provider-id.js"; +import { + AUTO_AUDIO_KEY_PROVIDERS, + AUTO_IMAGE_KEY_PROVIDERS, + AUTO_VIDEO_KEY_PROVIDERS, + DEFAULT_AUDIO_MODELS, + DEFAULT_IMAGE_MODELS, +} from "../media-understanding/defaults.js"; +import { anthropicProvider } from "../media-understanding/providers/anthropic/index.js"; +import { deepgramProvider } from "../media-understanding/providers/deepgram/index.js"; +import { googleProvider } from "../media-understanding/providers/google/index.js"; +import { groqProvider } from "../media-understanding/providers/groq/index.js"; +import { + minimaxPortalProvider, + minimaxProvider, +} from "../media-understanding/providers/minimax/index.js"; +import { mistralProvider } from "../media-understanding/providers/mistral/index.js"; +import { moonshotProvider } from "../media-understanding/providers/moonshot/index.js"; +import { openaiProvider } from "../media-understanding/providers/openai/index.js"; +import { zaiProvider } from "../media-understanding/providers/zai/index.js"; +import type { + MediaUnderstandingCapability, + MediaUnderstandingProvider, +} from "../media-understanding/types.js"; + +const EXTENSION_HOST_MEDIA_UNDERSTANDING_PROVIDERS: readonly MediaUnderstandingProvider[] = [ + groqProvider, + openaiProvider, + googleProvider, + anthropicProvider, + minimaxProvider, + minimaxPortalProvider, + moonshotProvider, + mistralProvider, + zaiProvider, + deepgramProvider, +]; + +const EXTENSION_HOST_MEDIA_AUTO_RUNTIME_BACKEND_IDS: Record< + MediaUnderstandingCapability, + readonly string[] +> = { + audio: AUTO_AUDIO_KEY_PROVIDERS, + image: AUTO_IMAGE_KEY_PROVIDERS, + video: AUTO_VIDEO_KEY_PROVIDERS, +}; + +export function listExtensionHostMediaUnderstandingProviders(): readonly MediaUnderstandingProvider[] { + return EXTENSION_HOST_MEDIA_UNDERSTANDING_PROVIDERS; +} + +export function normalizeExtensionHostMediaProviderId(id: string): string { + const normalized = normalizeProviderId(id); + if (normalized === "gemini") { + return "google"; + } + return normalized; +} + +export function buildExtensionHostMediaRuntimeSelectorKeys(providerId: string): readonly string[] { + const normalized = normalizeExtensionHostMediaProviderId(providerId); + if (normalized === "google") { + return [providerId, "gemini"]; + } + return normalized === providerId ? [providerId] : [providerId, normalized]; +} + +export function listExtensionHostMediaAutoRuntimeBackendSeedIds( + capability: MediaUnderstandingCapability, +): readonly string[] { + return EXTENSION_HOST_MEDIA_AUTO_RUNTIME_BACKEND_IDS[capability]; +} + +export function listExtensionHostMediaRuntimeBackendIds( + capability: MediaUnderstandingCapability, +): readonly string[] { + const ordered: string[] = []; + const seen = new Set(); + const pushProvider = (provider: MediaUnderstandingProvider | undefined) => { + if (!provider || !(provider.capabilities ?? []).includes(capability)) { + return; + } + const normalized = normalizeExtensionHostMediaProviderId(provider.id); + if (seen.has(normalized)) { + return; + } + seen.add(normalized); + ordered.push(normalized); + }; + + const providersById = new Map( + listExtensionHostMediaUnderstandingProviders().map((provider) => [ + normalizeExtensionHostMediaProviderId(provider.id), + provider, + ]), + ); + + for (const providerId of listExtensionHostMediaAutoRuntimeBackendSeedIds(capability)) { + pushProvider(providersById.get(normalizeExtensionHostMediaProviderId(providerId))); + } + for (const provider of providersById.values()) { + pushProvider(provider); + } + return ordered; +} + +export function resolveExtensionHostMediaRuntimeDefaultModelMetadata(params: { + capability: MediaUnderstandingCapability; + backendId: string; +}): string | undefined { + if (params.capability === "audio") { + return DEFAULT_AUDIO_MODELS[params.backendId]; + } + if (params.capability === "image") { + return DEFAULT_IMAGE_MODELS[params.backendId]; + } + return undefined; +} diff --git a/src/extension-host/media-runtime-registry.ts b/src/extension-host/media-runtime-registry.ts index 7b6479dab61..00a15ed4a1a 100644 --- a/src/extension-host/media-runtime-registry.ts +++ b/src/extension-host/media-runtime-registry.ts @@ -1,49 +1,21 @@ -import { normalizeProviderId } from "../agents/provider-id.js"; -import { anthropicProvider } from "../media-understanding/providers/anthropic/index.js"; -import { deepgramProvider } from "../media-understanding/providers/deepgram/index.js"; -import { googleProvider } from "../media-understanding/providers/google/index.js"; -import { groqProvider } from "../media-understanding/providers/groq/index.js"; -import { - minimaxPortalProvider, - minimaxProvider, -} from "../media-understanding/providers/minimax/index.js"; -import { mistralProvider } from "../media-understanding/providers/mistral/index.js"; -import { moonshotProvider } from "../media-understanding/providers/moonshot/index.js"; -import { openaiProvider } from "../media-understanding/providers/openai/index.js"; -import { zaiProvider } from "../media-understanding/providers/zai/index.js"; import type { MediaUnderstandingProvider } from "../media-understanding/types.js"; - -const EXTENSION_HOST_MEDIA_PROVIDERS: readonly MediaUnderstandingProvider[] = [ - groqProvider, - openaiProvider, - googleProvider, - anthropicProvider, - minimaxProvider, - minimaxPortalProvider, - moonshotProvider, - mistralProvider, - zaiProvider, - deepgramProvider, -]; +import { + listExtensionHostMediaUnderstandingProviders, + normalizeExtensionHostMediaProviderId, +} from "./media-runtime-backends.js"; export type ExtensionHostMediaUnderstandingProviderRegistry = Map< string, MediaUnderstandingProvider >; -export function normalizeExtensionHostMediaProviderId(id: string): string { - const normalized = normalizeProviderId(id); - if (normalized === "gemini") { - return "google"; - } - return normalized; -} +export { normalizeExtensionHostMediaProviderId } from "./media-runtime-backends.js"; export function buildExtensionHostMediaUnderstandingRegistry( overrides?: Record, ): ExtensionHostMediaUnderstandingProviderRegistry { const registry: ExtensionHostMediaUnderstandingProviderRegistry = new Map(); - for (const provider of EXTENSION_HOST_MEDIA_PROVIDERS) { + for (const provider of listExtensionHostMediaUnderstandingProviders()) { registry.set(normalizeExtensionHostMediaProviderId(provider.id), provider); } if (!overrides) { diff --git a/src/extension-host/runtime-backend-catalog.test.ts b/src/extension-host/runtime-backend-catalog.test.ts index eddaf5633ac..ce45d62291a 100644 --- a/src/extension-host/runtime-backend-catalog.test.ts +++ b/src/extension-host/runtime-backend-catalog.test.ts @@ -15,36 +15,33 @@ vi.mock("./embedding-runtime-backends.js", () => ({ ), })); -vi.mock("./media-runtime-registry.js", () => ({ - buildExtensionHostMediaUnderstandingRegistry: vi.fn( - () => - new Map([ - [ - "openai", - { - id: "openai", - capabilities: ["image", "video"], - }, - ], - [ - "google", - { - id: "google", - capabilities: ["image"], - }, - ], - [ - "deepgram", - { - id: "deepgram", - capabilities: ["audio"], - }, - ], - ]), +vi.mock("./media-runtime-backends.js", () => ({ + buildExtensionHostMediaRuntimeSelectorKeys: vi.fn((id: string) => + id === "google" ? ["google", "gemini"] : [id], + ), + listExtensionHostMediaAutoRuntimeBackendSeedIds: vi.fn( + (capability: "audio" | "image" | "video") => + ({ + audio: ["deepgram"], + image: ["openai", "google"], + video: ["openai"], + })[capability], + ), + listExtensionHostMediaRuntimeBackendIds: vi.fn( + (capability: "audio" | "image" | "video") => + ({ + audio: ["deepgram"], + image: ["openai", "google"], + video: ["openai"], + })[capability], ), normalizeExtensionHostMediaProviderId: vi.fn((id: string) => id.trim().toLowerCase() === "gemini" ? "google" : id.trim().toLowerCase(), ), + resolveExtensionHostMediaRuntimeDefaultModelMetadata: vi.fn( + (params: { capability: "audio" | "image" | "video"; backendId: string }) => + params.capability === "image" && params.backendId === "openai" ? "gpt-5-mini" : undefined, + ), })); vi.mock("./tts-runtime-backends.js", () => ({ diff --git a/src/extension-host/runtime-backend-catalog.ts b/src/extension-host/runtime-backend-catalog.ts index 3ccebd9fcb7..821ebdc4348 100644 --- a/src/extension-host/runtime-backend-catalog.ts +++ b/src/extension-host/runtime-backend-catalog.ts @@ -1,11 +1,4 @@ import type { TtsProvider } from "../config/types.tts.js"; -import { - AUTO_AUDIO_KEY_PROVIDERS, - AUTO_IMAGE_KEY_PROVIDERS, - AUTO_VIDEO_KEY_PROVIDERS, - DEFAULT_AUDIO_MODELS, - DEFAULT_IMAGE_MODELS, -} from "../media-understanding/defaults.js"; import type { MediaUnderstandingCapability } from "../media-understanding/types.js"; import { EXTENSION_HOST_EMBEDDING_RUNTIME_BACKEND_IDS, @@ -13,9 +6,12 @@ import { } from "./embedding-runtime-backends.js"; import type { EmbeddingProviderId } from "./embedding-runtime-types.js"; import { - buildExtensionHostMediaUnderstandingRegistry, + buildExtensionHostMediaRuntimeSelectorKeys, + listExtensionHostMediaAutoRuntimeBackendSeedIds, + listExtensionHostMediaRuntimeBackendIds as listExtensionHostMediaRuntimeBackendIdsFromDefinitions, normalizeExtensionHostMediaProviderId, -} from "./media-runtime-registry.js"; + resolveExtensionHostMediaRuntimeDefaultModelMetadata, +} from "./media-runtime-backends.js"; import { listExtensionHostTtsRuntimeBackends } from "./tts-runtime-backends.js"; export const EXTENSION_HOST_RUNTIME_BACKEND_FAMILY = "capability.runtime-backend"; @@ -46,15 +42,6 @@ type ExtensionHostMediaRuntimeSubsystemId = Extract< "media.audio" | "media.image" | "media.video" >; -const EXTENSION_HOST_MEDIA_AUTO_PROVIDER_IDS: Record< - MediaUnderstandingCapability, - readonly string[] -> = { - audio: AUTO_AUDIO_KEY_PROVIDERS, - image: AUTO_IMAGE_KEY_PROVIDERS, - video: AUTO_VIDEO_KEY_PROVIDERS, -}; - function buildRuntimeBackendCatalogId( subsystemId: ExtensionHostRuntimeBackendSubsystemId, backendId: string, @@ -74,52 +61,6 @@ function mapMediaCapabilityToSubsystem( return "media.image"; } -function buildMediaSelectorKeys(providerId: string): readonly string[] { - const normalized = normalizeExtensionHostMediaProviderId(providerId); - if (normalized === "google") { - return [providerId, "gemini"]; - } - return normalized === providerId ? [providerId] : [providerId, normalized]; -} - -function buildExtensionHostMediaRuntimeProviderIds( - capability: MediaUnderstandingCapability, -): readonly string[] { - const registry = buildExtensionHostMediaUnderstandingRegistry(); - const ordered: string[] = []; - const seen = new Set(); - const pushProvider = (providerId: string) => { - const normalized = normalizeExtensionHostMediaProviderId(providerId); - const provider = registry.get(normalized); - if (!provider || seen.has(normalized) || !(provider.capabilities ?? []).includes(capability)) { - return; - } - seen.add(normalized); - ordered.push(normalized); - }; - - for (const providerId of EXTENSION_HOST_MEDIA_AUTO_PROVIDER_IDS[capability]) { - pushProvider(providerId); - } - for (const provider of registry.values()) { - pushProvider(provider.id); - } - return ordered; -} - -function resolveExtensionHostMediaRuntimeDefaultModelFromDefaults(params: { - capability: MediaUnderstandingCapability; - backendId: string; -}): string | undefined { - if (params.capability === "audio") { - return DEFAULT_AUDIO_MODELS[params.backendId]; - } - if (params.capability === "image") { - return DEFAULT_IMAGE_MODELS[params.backendId]; - } - return undefined; -} - export function listExtensionHostEmbeddingRuntimeBackendCatalogEntries(): readonly ExtensionHostRuntimeBackendCatalogEntry[] { return EXTENSION_HOST_EMBEDDING_RUNTIME_BACKEND_IDS.map((backendId, defaultRank) => ({ id: buildRuntimeBackendCatalogId("embedding", backendId), @@ -144,29 +85,26 @@ export function listExtensionHostEmbeddingRemoteRuntimeBackendIds(): readonly Em export function listExtensionHostMediaRuntimeBackendCatalogEntries(): readonly ExtensionHostRuntimeBackendCatalogEntry[] { const entries: ExtensionHostRuntimeBackendCatalogEntry[] = []; - const registry = buildExtensionHostMediaUnderstandingRegistry(); for (const capability of ["audio", "image", "video"] as const) { - const providerIds = buildExtensionHostMediaRuntimeProviderIds(capability); + const providerIds = listExtensionHostMediaRuntimeBackendIdsFromDefinitions(capability); for (const [defaultRank, providerId] of providerIds.entries()) { - const provider = registry.get(providerId); - if (!provider) { - continue; - } - const defaultModel = resolveExtensionHostMediaRuntimeDefaultModelFromDefaults({ + const defaultModel = resolveExtensionHostMediaRuntimeDefaultModelMetadata({ capability, backendId: providerId, }); entries.push({ - id: buildRuntimeBackendCatalogId(mapMediaCapabilityToSubsystem(capability), provider.id), + id: buildRuntimeBackendCatalogId(mapMediaCapabilityToSubsystem(capability), providerId), family: EXTENSION_HOST_RUNTIME_BACKEND_FAMILY, subsystemId: mapMediaCapabilityToSubsystem(capability), - backendId: provider.id, + backendId: providerId, source: "builtin", defaultRank, - selectorKeys: buildMediaSelectorKeys(provider.id), + selectorKeys: buildExtensionHostMediaRuntimeSelectorKeys(providerId), capabilities: [capability], metadata: { - autoSelectable: EXTENSION_HOST_MEDIA_AUTO_PROVIDER_IDS[capability].includes(provider.id), + autoSelectable: listExtensionHostMediaAutoRuntimeBackendSeedIds(capability).includes( + normalizeExtensionHostMediaProviderId(providerId), + ), ...(defaultModel ? { defaultModel } : {}), }, });