Media: share runtime backend definitions

This commit is contained in:
Gustavo Madeira Santana
2026-03-15 21:23:26 +00:00
parent 286f5ac99e
commit 61d8a3b71a
5 changed files with 220 additions and 135 deletions

View File

@@ -0,0 +1,60 @@
import { describe, expect, it } from "vitest";
import {
buildExtensionHostMediaRuntimeSelectorKeys,
listExtensionHostMediaAutoRuntimeBackendSeedIds,
listExtensionHostMediaRuntimeBackendIds,
listExtensionHostMediaUnderstandingProviders,
normalizeExtensionHostMediaProviderId,
resolveExtensionHostMediaRuntimeDefaultModelMetadata,
} from "./media-runtime-backends.js";
describe("extension host media runtime backends", () => {
it("publishes the built-in media providers once", () => {
const providers = listExtensionHostMediaUnderstandingProviders();
expect(providers.some((provider) => provider.id === "openai")).toBe(true);
expect(providers.some((provider) => provider.id === "deepgram")).toBe(true);
});
it("keeps media-specific provider normalization and selector aliases", () => {
expect(normalizeExtensionHostMediaProviderId("gemini")).toBe("google");
expect(buildExtensionHostMediaRuntimeSelectorKeys("google")).toEqual(["google", "gemini"]);
});
it("keeps auto-seeded runtime backends ordered ahead of the rest", () => {
expect(listExtensionHostMediaAutoRuntimeBackendSeedIds("image")).toEqual([
"openai",
"anthropic",
"google",
"minimax",
"minimax-portal",
"zai",
]);
expect(listExtensionHostMediaRuntimeBackendIds("audio").slice(0, 3)).toEqual([
"openai",
"groq",
"deepgram",
]);
expect(listExtensionHostMediaRuntimeBackendIds("image").slice(0, 4)).toEqual([
"openai",
"anthropic",
"google",
"minimax",
]);
});
it("keeps default-model metadata with the shared backend definitions", () => {
expect(
resolveExtensionHostMediaRuntimeDefaultModelMetadata({
capability: "image",
backendId: "openai",
}),
).toBe("gpt-5-mini");
expect(
resolveExtensionHostMediaRuntimeDefaultModelMetadata({
capability: "video",
backendId: "openai",
}),
).toBeUndefined();
});
});

View File

@@ -0,0 +1,118 @@
import { normalizeProviderId } from "../agents/provider-id.js";
import {
AUTO_AUDIO_KEY_PROVIDERS,
AUTO_IMAGE_KEY_PROVIDERS,
AUTO_VIDEO_KEY_PROVIDERS,
DEFAULT_AUDIO_MODELS,
DEFAULT_IMAGE_MODELS,
} from "../media-understanding/defaults.js";
import { anthropicProvider } from "../media-understanding/providers/anthropic/index.js";
import { deepgramProvider } from "../media-understanding/providers/deepgram/index.js";
import { googleProvider } from "../media-understanding/providers/google/index.js";
import { groqProvider } from "../media-understanding/providers/groq/index.js";
import {
minimaxPortalProvider,
minimaxProvider,
} from "../media-understanding/providers/minimax/index.js";
import { mistralProvider } from "../media-understanding/providers/mistral/index.js";
import { moonshotProvider } from "../media-understanding/providers/moonshot/index.js";
import { openaiProvider } from "../media-understanding/providers/openai/index.js";
import { zaiProvider } from "../media-understanding/providers/zai/index.js";
import type {
MediaUnderstandingCapability,
MediaUnderstandingProvider,
} from "../media-understanding/types.js";
const EXTENSION_HOST_MEDIA_UNDERSTANDING_PROVIDERS: readonly MediaUnderstandingProvider[] = [
groqProvider,
openaiProvider,
googleProvider,
anthropicProvider,
minimaxProvider,
minimaxPortalProvider,
moonshotProvider,
mistralProvider,
zaiProvider,
deepgramProvider,
];
const EXTENSION_HOST_MEDIA_AUTO_RUNTIME_BACKEND_IDS: Record<
MediaUnderstandingCapability,
readonly string[]
> = {
audio: AUTO_AUDIO_KEY_PROVIDERS,
image: AUTO_IMAGE_KEY_PROVIDERS,
video: AUTO_VIDEO_KEY_PROVIDERS,
};
export function listExtensionHostMediaUnderstandingProviders(): readonly MediaUnderstandingProvider[] {
return EXTENSION_HOST_MEDIA_UNDERSTANDING_PROVIDERS;
}
export function normalizeExtensionHostMediaProviderId(id: string): string {
const normalized = normalizeProviderId(id);
if (normalized === "gemini") {
return "google";
}
return normalized;
}
export function buildExtensionHostMediaRuntimeSelectorKeys(providerId: string): readonly string[] {
const normalized = normalizeExtensionHostMediaProviderId(providerId);
if (normalized === "google") {
return [providerId, "gemini"];
}
return normalized === providerId ? [providerId] : [providerId, normalized];
}
export function listExtensionHostMediaAutoRuntimeBackendSeedIds(
capability: MediaUnderstandingCapability,
): readonly string[] {
return EXTENSION_HOST_MEDIA_AUTO_RUNTIME_BACKEND_IDS[capability];
}
export function listExtensionHostMediaRuntimeBackendIds(
capability: MediaUnderstandingCapability,
): readonly string[] {
const ordered: string[] = [];
const seen = new Set<string>();
const pushProvider = (provider: MediaUnderstandingProvider | undefined) => {
if (!provider || !(provider.capabilities ?? []).includes(capability)) {
return;
}
const normalized = normalizeExtensionHostMediaProviderId(provider.id);
if (seen.has(normalized)) {
return;
}
seen.add(normalized);
ordered.push(normalized);
};
const providersById = new Map(
listExtensionHostMediaUnderstandingProviders().map((provider) => [
normalizeExtensionHostMediaProviderId(provider.id),
provider,
]),
);
for (const providerId of listExtensionHostMediaAutoRuntimeBackendSeedIds(capability)) {
pushProvider(providersById.get(normalizeExtensionHostMediaProviderId(providerId)));
}
for (const provider of providersById.values()) {
pushProvider(provider);
}
return ordered;
}
export function resolveExtensionHostMediaRuntimeDefaultModelMetadata(params: {
capability: MediaUnderstandingCapability;
backendId: string;
}): string | undefined {
if (params.capability === "audio") {
return DEFAULT_AUDIO_MODELS[params.backendId];
}
if (params.capability === "image") {
return DEFAULT_IMAGE_MODELS[params.backendId];
}
return undefined;
}

View File

@@ -1,49 +1,21 @@
import { normalizeProviderId } from "../agents/provider-id.js";
import { anthropicProvider } from "../media-understanding/providers/anthropic/index.js";
import { deepgramProvider } from "../media-understanding/providers/deepgram/index.js";
import { googleProvider } from "../media-understanding/providers/google/index.js";
import { groqProvider } from "../media-understanding/providers/groq/index.js";
import {
minimaxPortalProvider,
minimaxProvider,
} from "../media-understanding/providers/minimax/index.js";
import { mistralProvider } from "../media-understanding/providers/mistral/index.js";
import { moonshotProvider } from "../media-understanding/providers/moonshot/index.js";
import { openaiProvider } from "../media-understanding/providers/openai/index.js";
import { zaiProvider } from "../media-understanding/providers/zai/index.js";
import type { MediaUnderstandingProvider } from "../media-understanding/types.js";
const EXTENSION_HOST_MEDIA_PROVIDERS: readonly MediaUnderstandingProvider[] = [
groqProvider,
openaiProvider,
googleProvider,
anthropicProvider,
minimaxProvider,
minimaxPortalProvider,
moonshotProvider,
mistralProvider,
zaiProvider,
deepgramProvider,
];
import {
listExtensionHostMediaUnderstandingProviders,
normalizeExtensionHostMediaProviderId,
} from "./media-runtime-backends.js";
export type ExtensionHostMediaUnderstandingProviderRegistry = Map<
string,
MediaUnderstandingProvider
>;
export function normalizeExtensionHostMediaProviderId(id: string): string {
const normalized = normalizeProviderId(id);
if (normalized === "gemini") {
return "google";
}
return normalized;
}
export { normalizeExtensionHostMediaProviderId } from "./media-runtime-backends.js";
export function buildExtensionHostMediaUnderstandingRegistry(
overrides?: Record<string, MediaUnderstandingProvider>,
): ExtensionHostMediaUnderstandingProviderRegistry {
const registry: ExtensionHostMediaUnderstandingProviderRegistry = new Map();
for (const provider of EXTENSION_HOST_MEDIA_PROVIDERS) {
for (const provider of listExtensionHostMediaUnderstandingProviders()) {
registry.set(normalizeExtensionHostMediaProviderId(provider.id), provider);
}
if (!overrides) {

View File

@@ -15,36 +15,33 @@ vi.mock("./embedding-runtime-backends.js", () => ({
),
}));
vi.mock("./media-runtime-registry.js", () => ({
buildExtensionHostMediaUnderstandingRegistry: vi.fn(
() =>
new Map([
[
"openai",
{
id: "openai",
capabilities: ["image", "video"],
},
],
[
"google",
{
id: "google",
capabilities: ["image"],
},
],
[
"deepgram",
{
id: "deepgram",
capabilities: ["audio"],
},
],
]),
vi.mock("./media-runtime-backends.js", () => ({
buildExtensionHostMediaRuntimeSelectorKeys: vi.fn((id: string) =>
id === "google" ? ["google", "gemini"] : [id],
),
listExtensionHostMediaAutoRuntimeBackendSeedIds: vi.fn(
(capability: "audio" | "image" | "video") =>
({
audio: ["deepgram"],
image: ["openai", "google"],
video: ["openai"],
})[capability],
),
listExtensionHostMediaRuntimeBackendIds: vi.fn(
(capability: "audio" | "image" | "video") =>
({
audio: ["deepgram"],
image: ["openai", "google"],
video: ["openai"],
})[capability],
),
normalizeExtensionHostMediaProviderId: vi.fn((id: string) =>
id.trim().toLowerCase() === "gemini" ? "google" : id.trim().toLowerCase(),
),
resolveExtensionHostMediaRuntimeDefaultModelMetadata: vi.fn(
(params: { capability: "audio" | "image" | "video"; backendId: string }) =>
params.capability === "image" && params.backendId === "openai" ? "gpt-5-mini" : undefined,
),
}));
vi.mock("./tts-runtime-backends.js", () => ({

View File

@@ -1,11 +1,4 @@
import type { TtsProvider } from "../config/types.tts.js";
import {
AUTO_AUDIO_KEY_PROVIDERS,
AUTO_IMAGE_KEY_PROVIDERS,
AUTO_VIDEO_KEY_PROVIDERS,
DEFAULT_AUDIO_MODELS,
DEFAULT_IMAGE_MODELS,
} from "../media-understanding/defaults.js";
import type { MediaUnderstandingCapability } from "../media-understanding/types.js";
import {
EXTENSION_HOST_EMBEDDING_RUNTIME_BACKEND_IDS,
@@ -13,9 +6,12 @@ import {
} from "./embedding-runtime-backends.js";
import type { EmbeddingProviderId } from "./embedding-runtime-types.js";
import {
buildExtensionHostMediaUnderstandingRegistry,
buildExtensionHostMediaRuntimeSelectorKeys,
listExtensionHostMediaAutoRuntimeBackendSeedIds,
listExtensionHostMediaRuntimeBackendIds as listExtensionHostMediaRuntimeBackendIdsFromDefinitions,
normalizeExtensionHostMediaProviderId,
} from "./media-runtime-registry.js";
resolveExtensionHostMediaRuntimeDefaultModelMetadata,
} from "./media-runtime-backends.js";
import { listExtensionHostTtsRuntimeBackends } from "./tts-runtime-backends.js";
export const EXTENSION_HOST_RUNTIME_BACKEND_FAMILY = "capability.runtime-backend";
@@ -46,15 +42,6 @@ type ExtensionHostMediaRuntimeSubsystemId = Extract<
"media.audio" | "media.image" | "media.video"
>;
const EXTENSION_HOST_MEDIA_AUTO_PROVIDER_IDS: Record<
MediaUnderstandingCapability,
readonly string[]
> = {
audio: AUTO_AUDIO_KEY_PROVIDERS,
image: AUTO_IMAGE_KEY_PROVIDERS,
video: AUTO_VIDEO_KEY_PROVIDERS,
};
function buildRuntimeBackendCatalogId(
subsystemId: ExtensionHostRuntimeBackendSubsystemId,
backendId: string,
@@ -74,52 +61,6 @@ function mapMediaCapabilityToSubsystem(
return "media.image";
}
function buildMediaSelectorKeys(providerId: string): readonly string[] {
const normalized = normalizeExtensionHostMediaProviderId(providerId);
if (normalized === "google") {
return [providerId, "gemini"];
}
return normalized === providerId ? [providerId] : [providerId, normalized];
}
function buildExtensionHostMediaRuntimeProviderIds(
capability: MediaUnderstandingCapability,
): readonly string[] {
const registry = buildExtensionHostMediaUnderstandingRegistry();
const ordered: string[] = [];
const seen = new Set<string>();
const pushProvider = (providerId: string) => {
const normalized = normalizeExtensionHostMediaProviderId(providerId);
const provider = registry.get(normalized);
if (!provider || seen.has(normalized) || !(provider.capabilities ?? []).includes(capability)) {
return;
}
seen.add(normalized);
ordered.push(normalized);
};
for (const providerId of EXTENSION_HOST_MEDIA_AUTO_PROVIDER_IDS[capability]) {
pushProvider(providerId);
}
for (const provider of registry.values()) {
pushProvider(provider.id);
}
return ordered;
}
function resolveExtensionHostMediaRuntimeDefaultModelFromDefaults(params: {
capability: MediaUnderstandingCapability;
backendId: string;
}): string | undefined {
if (params.capability === "audio") {
return DEFAULT_AUDIO_MODELS[params.backendId];
}
if (params.capability === "image") {
return DEFAULT_IMAGE_MODELS[params.backendId];
}
return undefined;
}
export function listExtensionHostEmbeddingRuntimeBackendCatalogEntries(): readonly ExtensionHostRuntimeBackendCatalogEntry[] {
return EXTENSION_HOST_EMBEDDING_RUNTIME_BACKEND_IDS.map((backendId, defaultRank) => ({
id: buildRuntimeBackendCatalogId("embedding", backendId),
@@ -144,29 +85,26 @@ export function listExtensionHostEmbeddingRemoteRuntimeBackendIds(): readonly Em
export function listExtensionHostMediaRuntimeBackendCatalogEntries(): readonly ExtensionHostRuntimeBackendCatalogEntry[] {
const entries: ExtensionHostRuntimeBackendCatalogEntry[] = [];
const registry = buildExtensionHostMediaUnderstandingRegistry();
for (const capability of ["audio", "image", "video"] as const) {
const providerIds = buildExtensionHostMediaRuntimeProviderIds(capability);
const providerIds = listExtensionHostMediaRuntimeBackendIdsFromDefinitions(capability);
for (const [defaultRank, providerId] of providerIds.entries()) {
const provider = registry.get(providerId);
if (!provider) {
continue;
}
const defaultModel = resolveExtensionHostMediaRuntimeDefaultModelFromDefaults({
const defaultModel = resolveExtensionHostMediaRuntimeDefaultModelMetadata({
capability,
backendId: providerId,
});
entries.push({
id: buildRuntimeBackendCatalogId(mapMediaCapabilityToSubsystem(capability), provider.id),
id: buildRuntimeBackendCatalogId(mapMediaCapabilityToSubsystem(capability), providerId),
family: EXTENSION_HOST_RUNTIME_BACKEND_FAMILY,
subsystemId: mapMediaCapabilityToSubsystem(capability),
backendId: provider.id,
backendId: providerId,
source: "builtin",
defaultRank,
selectorKeys: buildMediaSelectorKeys(provider.id),
selectorKeys: buildExtensionHostMediaRuntimeSelectorKeys(providerId),
capabilities: [capability],
metadata: {
autoSelectable: EXTENSION_HOST_MEDIA_AUTO_PROVIDER_IDS[capability].includes(provider.id),
autoSelectable: listExtensionHostMediaAutoRuntimeBackendSeedIds(capability).includes(
normalizeExtensionHostMediaProviderId(providerId),
),
...(defaultModel ? { defaultModel } : {}),
},
});