mirror of
https://github.com/openclaw/openclaw.git
synced 2026-03-25 00:42:24 +00:00
Media: share runtime backend definitions
This commit is contained in:
60
src/extension-host/media-runtime-backends.test.ts
Normal file
60
src/extension-host/media-runtime-backends.test.ts
Normal file
@@ -0,0 +1,60 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import {
|
||||
buildExtensionHostMediaRuntimeSelectorKeys,
|
||||
listExtensionHostMediaAutoRuntimeBackendSeedIds,
|
||||
listExtensionHostMediaRuntimeBackendIds,
|
||||
listExtensionHostMediaUnderstandingProviders,
|
||||
normalizeExtensionHostMediaProviderId,
|
||||
resolveExtensionHostMediaRuntimeDefaultModelMetadata,
|
||||
} from "./media-runtime-backends.js";
|
||||
|
||||
describe("extension host media runtime backends", () => {
|
||||
it("publishes the built-in media providers once", () => {
|
||||
const providers = listExtensionHostMediaUnderstandingProviders();
|
||||
|
||||
expect(providers.some((provider) => provider.id === "openai")).toBe(true);
|
||||
expect(providers.some((provider) => provider.id === "deepgram")).toBe(true);
|
||||
});
|
||||
|
||||
it("keeps media-specific provider normalization and selector aliases", () => {
|
||||
expect(normalizeExtensionHostMediaProviderId("gemini")).toBe("google");
|
||||
expect(buildExtensionHostMediaRuntimeSelectorKeys("google")).toEqual(["google", "gemini"]);
|
||||
});
|
||||
|
||||
it("keeps auto-seeded runtime backends ordered ahead of the rest", () => {
|
||||
expect(listExtensionHostMediaAutoRuntimeBackendSeedIds("image")).toEqual([
|
||||
"openai",
|
||||
"anthropic",
|
||||
"google",
|
||||
"minimax",
|
||||
"minimax-portal",
|
||||
"zai",
|
||||
]);
|
||||
expect(listExtensionHostMediaRuntimeBackendIds("audio").slice(0, 3)).toEqual([
|
||||
"openai",
|
||||
"groq",
|
||||
"deepgram",
|
||||
]);
|
||||
expect(listExtensionHostMediaRuntimeBackendIds("image").slice(0, 4)).toEqual([
|
||||
"openai",
|
||||
"anthropic",
|
||||
"google",
|
||||
"minimax",
|
||||
]);
|
||||
});
|
||||
|
||||
it("keeps default-model metadata with the shared backend definitions", () => {
|
||||
expect(
|
||||
resolveExtensionHostMediaRuntimeDefaultModelMetadata({
|
||||
capability: "image",
|
||||
backendId: "openai",
|
||||
}),
|
||||
).toBe("gpt-5-mini");
|
||||
expect(
|
||||
resolveExtensionHostMediaRuntimeDefaultModelMetadata({
|
||||
capability: "video",
|
||||
backendId: "openai",
|
||||
}),
|
||||
).toBeUndefined();
|
||||
});
|
||||
});
|
||||
118
src/extension-host/media-runtime-backends.ts
Normal file
118
src/extension-host/media-runtime-backends.ts
Normal file
@@ -0,0 +1,118 @@
|
||||
import { normalizeProviderId } from "../agents/provider-id.js";
|
||||
import {
|
||||
AUTO_AUDIO_KEY_PROVIDERS,
|
||||
AUTO_IMAGE_KEY_PROVIDERS,
|
||||
AUTO_VIDEO_KEY_PROVIDERS,
|
||||
DEFAULT_AUDIO_MODELS,
|
||||
DEFAULT_IMAGE_MODELS,
|
||||
} from "../media-understanding/defaults.js";
|
||||
import { anthropicProvider } from "../media-understanding/providers/anthropic/index.js";
|
||||
import { deepgramProvider } from "../media-understanding/providers/deepgram/index.js";
|
||||
import { googleProvider } from "../media-understanding/providers/google/index.js";
|
||||
import { groqProvider } from "../media-understanding/providers/groq/index.js";
|
||||
import {
|
||||
minimaxPortalProvider,
|
||||
minimaxProvider,
|
||||
} from "../media-understanding/providers/minimax/index.js";
|
||||
import { mistralProvider } from "../media-understanding/providers/mistral/index.js";
|
||||
import { moonshotProvider } from "../media-understanding/providers/moonshot/index.js";
|
||||
import { openaiProvider } from "../media-understanding/providers/openai/index.js";
|
||||
import { zaiProvider } from "../media-understanding/providers/zai/index.js";
|
||||
import type {
|
||||
MediaUnderstandingCapability,
|
||||
MediaUnderstandingProvider,
|
||||
} from "../media-understanding/types.js";
|
||||
|
||||
const EXTENSION_HOST_MEDIA_UNDERSTANDING_PROVIDERS: readonly MediaUnderstandingProvider[] = [
|
||||
groqProvider,
|
||||
openaiProvider,
|
||||
googleProvider,
|
||||
anthropicProvider,
|
||||
minimaxProvider,
|
||||
minimaxPortalProvider,
|
||||
moonshotProvider,
|
||||
mistralProvider,
|
||||
zaiProvider,
|
||||
deepgramProvider,
|
||||
];
|
||||
|
||||
const EXTENSION_HOST_MEDIA_AUTO_RUNTIME_BACKEND_IDS: Record<
|
||||
MediaUnderstandingCapability,
|
||||
readonly string[]
|
||||
> = {
|
||||
audio: AUTO_AUDIO_KEY_PROVIDERS,
|
||||
image: AUTO_IMAGE_KEY_PROVIDERS,
|
||||
video: AUTO_VIDEO_KEY_PROVIDERS,
|
||||
};
|
||||
|
||||
export function listExtensionHostMediaUnderstandingProviders(): readonly MediaUnderstandingProvider[] {
|
||||
return EXTENSION_HOST_MEDIA_UNDERSTANDING_PROVIDERS;
|
||||
}
|
||||
|
||||
export function normalizeExtensionHostMediaProviderId(id: string): string {
|
||||
const normalized = normalizeProviderId(id);
|
||||
if (normalized === "gemini") {
|
||||
return "google";
|
||||
}
|
||||
return normalized;
|
||||
}
|
||||
|
||||
export function buildExtensionHostMediaRuntimeSelectorKeys(providerId: string): readonly string[] {
|
||||
const normalized = normalizeExtensionHostMediaProviderId(providerId);
|
||||
if (normalized === "google") {
|
||||
return [providerId, "gemini"];
|
||||
}
|
||||
return normalized === providerId ? [providerId] : [providerId, normalized];
|
||||
}
|
||||
|
||||
export function listExtensionHostMediaAutoRuntimeBackendSeedIds(
|
||||
capability: MediaUnderstandingCapability,
|
||||
): readonly string[] {
|
||||
return EXTENSION_HOST_MEDIA_AUTO_RUNTIME_BACKEND_IDS[capability];
|
||||
}
|
||||
|
||||
export function listExtensionHostMediaRuntimeBackendIds(
|
||||
capability: MediaUnderstandingCapability,
|
||||
): readonly string[] {
|
||||
const ordered: string[] = [];
|
||||
const seen = new Set<string>();
|
||||
const pushProvider = (provider: MediaUnderstandingProvider | undefined) => {
|
||||
if (!provider || !(provider.capabilities ?? []).includes(capability)) {
|
||||
return;
|
||||
}
|
||||
const normalized = normalizeExtensionHostMediaProviderId(provider.id);
|
||||
if (seen.has(normalized)) {
|
||||
return;
|
||||
}
|
||||
seen.add(normalized);
|
||||
ordered.push(normalized);
|
||||
};
|
||||
|
||||
const providersById = new Map(
|
||||
listExtensionHostMediaUnderstandingProviders().map((provider) => [
|
||||
normalizeExtensionHostMediaProviderId(provider.id),
|
||||
provider,
|
||||
]),
|
||||
);
|
||||
|
||||
for (const providerId of listExtensionHostMediaAutoRuntimeBackendSeedIds(capability)) {
|
||||
pushProvider(providersById.get(normalizeExtensionHostMediaProviderId(providerId)));
|
||||
}
|
||||
for (const provider of providersById.values()) {
|
||||
pushProvider(provider);
|
||||
}
|
||||
return ordered;
|
||||
}
|
||||
|
||||
export function resolveExtensionHostMediaRuntimeDefaultModelMetadata(params: {
|
||||
capability: MediaUnderstandingCapability;
|
||||
backendId: string;
|
||||
}): string | undefined {
|
||||
if (params.capability === "audio") {
|
||||
return DEFAULT_AUDIO_MODELS[params.backendId];
|
||||
}
|
||||
if (params.capability === "image") {
|
||||
return DEFAULT_IMAGE_MODELS[params.backendId];
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
@@ -1,49 +1,21 @@
|
||||
import { normalizeProviderId } from "../agents/provider-id.js";
|
||||
import { anthropicProvider } from "../media-understanding/providers/anthropic/index.js";
|
||||
import { deepgramProvider } from "../media-understanding/providers/deepgram/index.js";
|
||||
import { googleProvider } from "../media-understanding/providers/google/index.js";
|
||||
import { groqProvider } from "../media-understanding/providers/groq/index.js";
|
||||
import {
|
||||
minimaxPortalProvider,
|
||||
minimaxProvider,
|
||||
} from "../media-understanding/providers/minimax/index.js";
|
||||
import { mistralProvider } from "../media-understanding/providers/mistral/index.js";
|
||||
import { moonshotProvider } from "../media-understanding/providers/moonshot/index.js";
|
||||
import { openaiProvider } from "../media-understanding/providers/openai/index.js";
|
||||
import { zaiProvider } from "../media-understanding/providers/zai/index.js";
|
||||
import type { MediaUnderstandingProvider } from "../media-understanding/types.js";
|
||||
|
||||
const EXTENSION_HOST_MEDIA_PROVIDERS: readonly MediaUnderstandingProvider[] = [
|
||||
groqProvider,
|
||||
openaiProvider,
|
||||
googleProvider,
|
||||
anthropicProvider,
|
||||
minimaxProvider,
|
||||
minimaxPortalProvider,
|
||||
moonshotProvider,
|
||||
mistralProvider,
|
||||
zaiProvider,
|
||||
deepgramProvider,
|
||||
];
|
||||
import {
|
||||
listExtensionHostMediaUnderstandingProviders,
|
||||
normalizeExtensionHostMediaProviderId,
|
||||
} from "./media-runtime-backends.js";
|
||||
|
||||
export type ExtensionHostMediaUnderstandingProviderRegistry = Map<
|
||||
string,
|
||||
MediaUnderstandingProvider
|
||||
>;
|
||||
|
||||
export function normalizeExtensionHostMediaProviderId(id: string): string {
|
||||
const normalized = normalizeProviderId(id);
|
||||
if (normalized === "gemini") {
|
||||
return "google";
|
||||
}
|
||||
return normalized;
|
||||
}
|
||||
export { normalizeExtensionHostMediaProviderId } from "./media-runtime-backends.js";
|
||||
|
||||
export function buildExtensionHostMediaUnderstandingRegistry(
|
||||
overrides?: Record<string, MediaUnderstandingProvider>,
|
||||
): ExtensionHostMediaUnderstandingProviderRegistry {
|
||||
const registry: ExtensionHostMediaUnderstandingProviderRegistry = new Map();
|
||||
for (const provider of EXTENSION_HOST_MEDIA_PROVIDERS) {
|
||||
for (const provider of listExtensionHostMediaUnderstandingProviders()) {
|
||||
registry.set(normalizeExtensionHostMediaProviderId(provider.id), provider);
|
||||
}
|
||||
if (!overrides) {
|
||||
|
||||
@@ -15,36 +15,33 @@ vi.mock("./embedding-runtime-backends.js", () => ({
|
||||
),
|
||||
}));
|
||||
|
||||
vi.mock("./media-runtime-registry.js", () => ({
|
||||
buildExtensionHostMediaUnderstandingRegistry: vi.fn(
|
||||
() =>
|
||||
new Map([
|
||||
[
|
||||
"openai",
|
||||
{
|
||||
id: "openai",
|
||||
capabilities: ["image", "video"],
|
||||
},
|
||||
],
|
||||
[
|
||||
"google",
|
||||
{
|
||||
id: "google",
|
||||
capabilities: ["image"],
|
||||
},
|
||||
],
|
||||
[
|
||||
"deepgram",
|
||||
{
|
||||
id: "deepgram",
|
||||
capabilities: ["audio"],
|
||||
},
|
||||
],
|
||||
]),
|
||||
vi.mock("./media-runtime-backends.js", () => ({
|
||||
buildExtensionHostMediaRuntimeSelectorKeys: vi.fn((id: string) =>
|
||||
id === "google" ? ["google", "gemini"] : [id],
|
||||
),
|
||||
listExtensionHostMediaAutoRuntimeBackendSeedIds: vi.fn(
|
||||
(capability: "audio" | "image" | "video") =>
|
||||
({
|
||||
audio: ["deepgram"],
|
||||
image: ["openai", "google"],
|
||||
video: ["openai"],
|
||||
})[capability],
|
||||
),
|
||||
listExtensionHostMediaRuntimeBackendIds: vi.fn(
|
||||
(capability: "audio" | "image" | "video") =>
|
||||
({
|
||||
audio: ["deepgram"],
|
||||
image: ["openai", "google"],
|
||||
video: ["openai"],
|
||||
})[capability],
|
||||
),
|
||||
normalizeExtensionHostMediaProviderId: vi.fn((id: string) =>
|
||||
id.trim().toLowerCase() === "gemini" ? "google" : id.trim().toLowerCase(),
|
||||
),
|
||||
resolveExtensionHostMediaRuntimeDefaultModelMetadata: vi.fn(
|
||||
(params: { capability: "audio" | "image" | "video"; backendId: string }) =>
|
||||
params.capability === "image" && params.backendId === "openai" ? "gpt-5-mini" : undefined,
|
||||
),
|
||||
}));
|
||||
|
||||
vi.mock("./tts-runtime-backends.js", () => ({
|
||||
|
||||
@@ -1,11 +1,4 @@
|
||||
import type { TtsProvider } from "../config/types.tts.js";
|
||||
import {
|
||||
AUTO_AUDIO_KEY_PROVIDERS,
|
||||
AUTO_IMAGE_KEY_PROVIDERS,
|
||||
AUTO_VIDEO_KEY_PROVIDERS,
|
||||
DEFAULT_AUDIO_MODELS,
|
||||
DEFAULT_IMAGE_MODELS,
|
||||
} from "../media-understanding/defaults.js";
|
||||
import type { MediaUnderstandingCapability } from "../media-understanding/types.js";
|
||||
import {
|
||||
EXTENSION_HOST_EMBEDDING_RUNTIME_BACKEND_IDS,
|
||||
@@ -13,9 +6,12 @@ import {
|
||||
} from "./embedding-runtime-backends.js";
|
||||
import type { EmbeddingProviderId } from "./embedding-runtime-types.js";
|
||||
import {
|
||||
buildExtensionHostMediaUnderstandingRegistry,
|
||||
buildExtensionHostMediaRuntimeSelectorKeys,
|
||||
listExtensionHostMediaAutoRuntimeBackendSeedIds,
|
||||
listExtensionHostMediaRuntimeBackendIds as listExtensionHostMediaRuntimeBackendIdsFromDefinitions,
|
||||
normalizeExtensionHostMediaProviderId,
|
||||
} from "./media-runtime-registry.js";
|
||||
resolveExtensionHostMediaRuntimeDefaultModelMetadata,
|
||||
} from "./media-runtime-backends.js";
|
||||
import { listExtensionHostTtsRuntimeBackends } from "./tts-runtime-backends.js";
|
||||
|
||||
export const EXTENSION_HOST_RUNTIME_BACKEND_FAMILY = "capability.runtime-backend";
|
||||
@@ -46,15 +42,6 @@ type ExtensionHostMediaRuntimeSubsystemId = Extract<
|
||||
"media.audio" | "media.image" | "media.video"
|
||||
>;
|
||||
|
||||
const EXTENSION_HOST_MEDIA_AUTO_PROVIDER_IDS: Record<
|
||||
MediaUnderstandingCapability,
|
||||
readonly string[]
|
||||
> = {
|
||||
audio: AUTO_AUDIO_KEY_PROVIDERS,
|
||||
image: AUTO_IMAGE_KEY_PROVIDERS,
|
||||
video: AUTO_VIDEO_KEY_PROVIDERS,
|
||||
};
|
||||
|
||||
function buildRuntimeBackendCatalogId(
|
||||
subsystemId: ExtensionHostRuntimeBackendSubsystemId,
|
||||
backendId: string,
|
||||
@@ -74,52 +61,6 @@ function mapMediaCapabilityToSubsystem(
|
||||
return "media.image";
|
||||
}
|
||||
|
||||
function buildMediaSelectorKeys(providerId: string): readonly string[] {
|
||||
const normalized = normalizeExtensionHostMediaProviderId(providerId);
|
||||
if (normalized === "google") {
|
||||
return [providerId, "gemini"];
|
||||
}
|
||||
return normalized === providerId ? [providerId] : [providerId, normalized];
|
||||
}
|
||||
|
||||
function buildExtensionHostMediaRuntimeProviderIds(
|
||||
capability: MediaUnderstandingCapability,
|
||||
): readonly string[] {
|
||||
const registry = buildExtensionHostMediaUnderstandingRegistry();
|
||||
const ordered: string[] = [];
|
||||
const seen = new Set<string>();
|
||||
const pushProvider = (providerId: string) => {
|
||||
const normalized = normalizeExtensionHostMediaProviderId(providerId);
|
||||
const provider = registry.get(normalized);
|
||||
if (!provider || seen.has(normalized) || !(provider.capabilities ?? []).includes(capability)) {
|
||||
return;
|
||||
}
|
||||
seen.add(normalized);
|
||||
ordered.push(normalized);
|
||||
};
|
||||
|
||||
for (const providerId of EXTENSION_HOST_MEDIA_AUTO_PROVIDER_IDS[capability]) {
|
||||
pushProvider(providerId);
|
||||
}
|
||||
for (const provider of registry.values()) {
|
||||
pushProvider(provider.id);
|
||||
}
|
||||
return ordered;
|
||||
}
|
||||
|
||||
function resolveExtensionHostMediaRuntimeDefaultModelFromDefaults(params: {
|
||||
capability: MediaUnderstandingCapability;
|
||||
backendId: string;
|
||||
}): string | undefined {
|
||||
if (params.capability === "audio") {
|
||||
return DEFAULT_AUDIO_MODELS[params.backendId];
|
||||
}
|
||||
if (params.capability === "image") {
|
||||
return DEFAULT_IMAGE_MODELS[params.backendId];
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
export function listExtensionHostEmbeddingRuntimeBackendCatalogEntries(): readonly ExtensionHostRuntimeBackendCatalogEntry[] {
|
||||
return EXTENSION_HOST_EMBEDDING_RUNTIME_BACKEND_IDS.map((backendId, defaultRank) => ({
|
||||
id: buildRuntimeBackendCatalogId("embedding", backendId),
|
||||
@@ -144,29 +85,26 @@ export function listExtensionHostEmbeddingRemoteRuntimeBackendIds(): readonly Em
|
||||
|
||||
export function listExtensionHostMediaRuntimeBackendCatalogEntries(): readonly ExtensionHostRuntimeBackendCatalogEntry[] {
|
||||
const entries: ExtensionHostRuntimeBackendCatalogEntry[] = [];
|
||||
const registry = buildExtensionHostMediaUnderstandingRegistry();
|
||||
for (const capability of ["audio", "image", "video"] as const) {
|
||||
const providerIds = buildExtensionHostMediaRuntimeProviderIds(capability);
|
||||
const providerIds = listExtensionHostMediaRuntimeBackendIdsFromDefinitions(capability);
|
||||
for (const [defaultRank, providerId] of providerIds.entries()) {
|
||||
const provider = registry.get(providerId);
|
||||
if (!provider) {
|
||||
continue;
|
||||
}
|
||||
const defaultModel = resolveExtensionHostMediaRuntimeDefaultModelFromDefaults({
|
||||
const defaultModel = resolveExtensionHostMediaRuntimeDefaultModelMetadata({
|
||||
capability,
|
||||
backendId: providerId,
|
||||
});
|
||||
entries.push({
|
||||
id: buildRuntimeBackendCatalogId(mapMediaCapabilityToSubsystem(capability), provider.id),
|
||||
id: buildRuntimeBackendCatalogId(mapMediaCapabilityToSubsystem(capability), providerId),
|
||||
family: EXTENSION_HOST_RUNTIME_BACKEND_FAMILY,
|
||||
subsystemId: mapMediaCapabilityToSubsystem(capability),
|
||||
backendId: provider.id,
|
||||
backendId: providerId,
|
||||
source: "builtin",
|
||||
defaultRank,
|
||||
selectorKeys: buildMediaSelectorKeys(provider.id),
|
||||
selectorKeys: buildExtensionHostMediaRuntimeSelectorKeys(providerId),
|
||||
capabilities: [capability],
|
||||
metadata: {
|
||||
autoSelectable: EXTENSION_HOST_MEDIA_AUTO_PROVIDER_IDS[capability].includes(provider.id),
|
||||
autoSelectable: listExtensionHostMediaAutoRuntimeBackendSeedIds(capability).includes(
|
||||
normalizeExtensionHostMediaProviderId(providerId),
|
||||
),
|
||||
...(defaultModel ? { defaultModel } : {}),
|
||||
},
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user