refactor: move voice-call realtime providers into extensions

This commit is contained in:
Peter Steinberger
2026-04-04 12:04:37 +09:00
parent 61f93540b2
commit a23ab9b906
90 changed files with 3134 additions and 792 deletions

View File

@@ -28,6 +28,8 @@ export type BuildPluginApiParams = {
| "registerCliBackend"
| "registerProvider"
| "registerSpeechProvider"
| "registerRealtimeTranscriptionProvider"
| "registerRealtimeVoiceProvider"
| "registerMediaUnderstandingProvider"
| "registerImageGenerationProvider"
| "registerWebFetchProvider"
@@ -55,6 +57,10 @@ const noopRegisterService: OpenClawPluginApi["registerService"] = () => {};
const noopRegisterCliBackend: OpenClawPluginApi["registerCliBackend"] = () => {};
const noopRegisterProvider: OpenClawPluginApi["registerProvider"] = () => {};
const noopRegisterSpeechProvider: OpenClawPluginApi["registerSpeechProvider"] = () => {};
const noopRegisterRealtimeTranscriptionProvider: OpenClawPluginApi["registerRealtimeTranscriptionProvider"] =
() => {};
const noopRegisterRealtimeVoiceProvider: OpenClawPluginApi["registerRealtimeVoiceProvider"] =
() => {};
const noopRegisterMediaUnderstandingProvider: OpenClawPluginApi["registerMediaUnderstandingProvider"] =
() => {};
const noopRegisterImageGenerationProvider: OpenClawPluginApi["registerImageGenerationProvider"] =
@@ -97,6 +103,10 @@ export function buildPluginApi(params: BuildPluginApiParams): OpenClawPluginApi
registerCliBackend: handlers.registerCliBackend ?? noopRegisterCliBackend,
registerProvider: handlers.registerProvider ?? noopRegisterProvider,
registerSpeechProvider: handlers.registerSpeechProvider ?? noopRegisterSpeechProvider,
registerRealtimeTranscriptionProvider:
handlers.registerRealtimeTranscriptionProvider ?? noopRegisterRealtimeTranscriptionProvider,
registerRealtimeVoiceProvider:
handlers.registerRealtimeVoiceProvider ?? noopRegisterRealtimeVoiceProvider,
registerMediaUnderstandingProvider:
handlers.registerMediaUnderstandingProvider ?? noopRegisterMediaUnderstandingProvider,
registerImageGenerationProvider:

View File

@@ -28,6 +28,10 @@ describe("bundled capability metadata", () => {
cliBackendIds: uniqueStrings(manifest.cliBackends),
providerIds: uniqueStrings(manifest.providers),
speechProviderIds: uniqueStrings(manifest.contracts?.speechProviders),
realtimeTranscriptionProviderIds: uniqueStrings(
manifest.contracts?.realtimeTranscriptionProviders,
),
realtimeVoiceProviderIds: uniqueStrings(manifest.contracts?.realtimeVoiceProviders),
mediaUnderstandingProviderIds: uniqueStrings(
manifest.contracts?.mediaUnderstandingProviders,
),
@@ -41,6 +45,8 @@ describe("bundled capability metadata", () => {
entry.cliBackendIds.length > 0 ||
entry.providerIds.length > 0 ||
entry.speechProviderIds.length > 0 ||
entry.realtimeTranscriptionProviderIds.length > 0 ||
entry.realtimeVoiceProviderIds.length > 0 ||
entry.mediaUnderstandingProviderIds.length > 0 ||
entry.imageGenerationProviderIds.length > 0 ||
entry.webFetchProviderIds.length > 0 ||

View File

@@ -5,6 +5,8 @@ export type BundledPluginContractSnapshot = {
cliBackendIds: string[];
providerIds: string[];
speechProviderIds: string[];
realtimeTranscriptionProviderIds: string[];
realtimeVoiceProviderIds: string[];
mediaUnderstandingProviderIds: string[];
imageGenerationProviderIds: string[];
webFetchProviderIds: string[];
@@ -37,6 +39,10 @@ export const BUNDLED_PLUGIN_CONTRACT_SNAPSHOTS: readonly BundledPluginContractSn
cliBackendIds: uniqueStrings(manifest.cliBackends),
providerIds: uniqueStrings(manifest.providers),
speechProviderIds: uniqueStrings(manifest.contracts?.speechProviders),
realtimeTranscriptionProviderIds: uniqueStrings(
manifest.contracts?.realtimeTranscriptionProviders,
),
realtimeVoiceProviderIds: uniqueStrings(manifest.contracts?.realtimeVoiceProviders),
mediaUnderstandingProviderIds: uniqueStrings(manifest.contracts?.mediaUnderstandingProviders),
imageGenerationProviderIds: uniqueStrings(manifest.contracts?.imageGenerationProviders),
webFetchProviderIds: uniqueStrings(manifest.contracts?.webFetchProviders),
@@ -48,6 +54,8 @@ export const BUNDLED_PLUGIN_CONTRACT_SNAPSHOTS: readonly BundledPluginContractSn
entry.cliBackendIds.length > 0 ||
entry.providerIds.length > 0 ||
entry.speechProviderIds.length > 0 ||
entry.realtimeTranscriptionProviderIds.length > 0 ||
entry.realtimeVoiceProviderIds.length > 0 ||
entry.mediaUnderstandingProviderIds.length > 0 ||
entry.imageGenerationProviderIds.length > 0 ||
entry.webFetchProviderIds.length > 0 ||
@@ -68,6 +76,14 @@ export const BUNDLED_PROVIDER_PLUGIN_IDS = collectPluginIds((entry) => entry.pro
export const BUNDLED_SPEECH_PLUGIN_IDS = collectPluginIds((entry) => entry.speechProviderIds);
export const BUNDLED_REALTIME_TRANSCRIPTION_PLUGIN_IDS = collectPluginIds(
(entry) => entry.realtimeTranscriptionProviderIds,
);
export const BUNDLED_REALTIME_VOICE_PLUGIN_IDS = collectPluginIds(
(entry) => entry.realtimeVoiceProviderIds,
);
export const BUNDLED_MEDIA_UNDERSTANDING_PLUGIN_IDS = collectPluginIds(
(entry) => entry.mediaUnderstandingProviderIds,
);
@@ -84,6 +100,8 @@ export const BUNDLED_RUNTIME_CONTRACT_PLUGIN_IDS = [
(entry) =>
entry.providerIds.length > 0 ||
entry.speechProviderIds.length > 0 ||
entry.realtimeTranscriptionProviderIds.length > 0 ||
entry.realtimeVoiceProviderIds.length > 0 ||
entry.mediaUnderstandingProviderIds.length > 0 ||
entry.imageGenerationProviderIds.length > 0 ||
entry.webFetchProviderIds.length > 0 ||

View File

@@ -122,6 +122,8 @@ function createCapabilityPluginRecord(params: {
cliBackendIds: [],
providerIds: [],
speechProviderIds: [],
realtimeTranscriptionProviderIds: [],
realtimeVoiceProviderIds: [],
mediaUnderstandingProviderIds: [],
imageGenerationProviderIds: [],
webFetchProviderIds: [],
@@ -272,6 +274,12 @@ export function loadBundledCapabilityRuntimeRegistry(params: {
record.cliBackendIds.push(...captured.cliBackends.map((entry) => entry.id));
record.providerIds.push(...captured.providers.map((entry) => entry.id));
record.speechProviderIds.push(...captured.speechProviders.map((entry) => entry.id));
record.realtimeTranscriptionProviderIds.push(
...captured.realtimeTranscriptionProviders.map((entry) => entry.id),
);
record.realtimeVoiceProviderIds.push(
...captured.realtimeVoiceProviders.map((entry) => entry.id),
);
record.mediaUnderstandingProviderIds.push(
...captured.mediaUnderstandingProviders.map((entry) => entry.id),
);
@@ -309,6 +317,24 @@ export function loadBundledCapabilityRuntimeRegistry(params: {
rootDir: record.rootDir,
})),
);
registry.realtimeTranscriptionProviders.push(
...captured.realtimeTranscriptionProviders.map((provider) => ({
pluginId: record.id,
pluginName: record.name,
provider,
source: record.source,
rootDir: record.rootDir,
})),
);
registry.realtimeVoiceProviders.push(
...captured.realtimeVoiceProviders.map((provider) => ({
pluginId: record.id,
pluginName: record.name,
provider,
source: record.source,
rootDir: record.rootDir,
})),
);
registry.mediaUnderstandingProviders.push(
...captured.mediaUnderstandingProviders.map((provider) => ({
pluginId: record.id,

View File

@@ -102,7 +102,12 @@ function setBundledCapabilityFixture(contractKey: string) {
}
function expectCompatChainApplied(params: {
key: "speechProviders" | "mediaUnderstandingProviders" | "imageGenerationProviders";
key:
| "speechProviders"
| "realtimeTranscriptionProviders"
| "realtimeVoiceProviders"
| "mediaUnderstandingProviders"
| "imageGenerationProviders";
contractKey: string;
cfg: OpenClawConfig;
enablementCompat: {
@@ -201,6 +206,8 @@ describe("resolvePluginCapabilityProviders", () => {
it.each([
["speechProviders", "speechProviders"],
["realtimeTranscriptionProviders", "realtimeTranscriptionProviders"],
["realtimeVoiceProviders", "realtimeVoiceProviders"],
["mediaUnderstandingProviders", "mediaUnderstandingProviders"],
["imageGenerationProviders", "imageGenerationProviders"],
] as const)("applies bundled compat before fallback loading for %s", (key, contractKey) => {

View File

@@ -9,11 +9,15 @@ import type { PluginRegistry } from "./registry.js";
type CapabilityProviderRegistryKey =
| "speechProviders"
| "realtimeTranscriptionProviders"
| "realtimeVoiceProviders"
| "mediaUnderstandingProviders"
| "imageGenerationProviders";
type CapabilityContractKey =
| "speechProviders"
| "realtimeTranscriptionProviders"
| "realtimeVoiceProviders"
| "mediaUnderstandingProviders"
| "imageGenerationProviders";
@@ -22,6 +26,8 @@ type CapabilityProviderForKey<K extends CapabilityProviderRegistryKey> =
const CAPABILITY_CONTRACT_KEY: Record<CapabilityProviderRegistryKey, CapabilityContractKey> = {
speechProviders: "speechProviders",
realtimeTranscriptionProviders: "realtimeTranscriptionProviders",
realtimeVoiceProviders: "realtimeVoiceProviders",
mediaUnderstandingProviders: "mediaUnderstandingProviders",
imageGenerationProviders: "imageGenerationProviders",
};

View File

@@ -10,6 +10,8 @@ import type {
OpenClawPluginCliCommandDescriptor,
OpenClawPluginCliRegistrar,
ProviderPlugin,
RealtimeTranscriptionProviderPlugin,
RealtimeVoiceProviderPlugin,
SpeechProviderPlugin,
WebFetchProviderPlugin,
WebSearchProviderPlugin,
@@ -27,6 +29,8 @@ export type CapturedPluginRegistration = {
cliRegistrars: CapturedPluginCliRegistration[];
cliBackends: CliBackendPlugin[];
speechProviders: SpeechProviderPlugin[];
realtimeTranscriptionProviders: RealtimeTranscriptionProviderPlugin[];
realtimeVoiceProviders: RealtimeVoiceProviderPlugin[];
mediaUnderstandingProviders: MediaUnderstandingProviderPlugin[];
imageGenerationProviders: ImageGenerationProviderPlugin[];
webFetchProviders: WebFetchProviderPlugin[];
@@ -42,6 +46,8 @@ export function createCapturedPluginRegistration(params?: {
const cliRegistrars: CapturedPluginCliRegistration[] = [];
const cliBackends: CliBackendPlugin[] = [];
const speechProviders: SpeechProviderPlugin[] = [];
const realtimeTranscriptionProviders: RealtimeTranscriptionProviderPlugin[] = [];
const realtimeVoiceProviders: RealtimeVoiceProviderPlugin[] = [];
const mediaUnderstandingProviders: MediaUnderstandingProviderPlugin[] = [];
const imageGenerationProviders: ImageGenerationProviderPlugin[] = [];
const webFetchProviders: WebFetchProviderPlugin[] = [];
@@ -59,6 +65,8 @@ export function createCapturedPluginRegistration(params?: {
cliRegistrars,
cliBackends,
speechProviders,
realtimeTranscriptionProviders,
realtimeVoiceProviders,
mediaUnderstandingProviders,
imageGenerationProviders,
webFetchProviders,
@@ -106,6 +114,12 @@ export function createCapturedPluginRegistration(params?: {
registerSpeechProvider(provider: SpeechProviderPlugin) {
speechProviders.push(provider);
},
registerRealtimeTranscriptionProvider(provider: RealtimeTranscriptionProviderPlugin) {
realtimeTranscriptionProviders.push(provider);
},
registerRealtimeVoiceProvider(provider: RealtimeVoiceProviderPlugin) {
realtimeVoiceProviders.push(provider);
},
registerMediaUnderstandingProvider(provider: MediaUnderstandingProviderPlugin) {
mediaUnderstandingProviders.push(provider);
},

View File

@@ -155,9 +155,10 @@ async function loadPluginCliCommandRegistry(
export async function getPluginCliCommandDescriptors(
cfg?: OpenClawConfig,
env?: NodeJS.ProcessEnv,
loaderOptions?: Pick<PluginLoadOptions, "pluginSdkResolution">,
): Promise<OpenClawPluginCliCommandDescriptor[]> {
try {
const { registry } = await loadPluginCliMetadataRegistry(cfg, env);
const { registry } = await loadPluginCliMetadataRegistry(cfg, env, loaderOptions);
const seen = new Set<string>();
const descriptors: OpenClawPluginCliCommandDescriptor[] = [];
for (const entry of registry.cliRegistrars) {

View File

@@ -8,6 +8,8 @@ import {
pluginRegistrationContractRegistry,
providerContractLoadError,
providerContractPluginIds,
realtimeTranscriptionProviderContractRegistry,
realtimeVoiceProviderContractRegistry,
resolveWebFetchProviderContractEntriesForPluginId,
resolveWebSearchProviderContractEntriesForPluginId,
speechProviderContractRegistry,
@@ -27,7 +29,11 @@ describe("plugin contract registry", () => {
predicate: (plugin: {
origin: string;
providers: unknown[];
contracts?: { speechProviders?: unknown[] };
contracts?: {
speechProviders?: unknown[];
realtimeTranscriptionProviders?: unknown[];
realtimeVoiceProviders?: unknown[];
};
}) => boolean;
}) {
expect(uniqueSortedStrings(params.actualPluginIds)).toEqual(
@@ -39,7 +45,11 @@ describe("plugin contract registry", () => {
predicate: (plugin: {
origin: string;
providers: unknown[];
contracts?: { speechProviders?: unknown[] };
contracts?: {
speechProviders?: unknown[];
realtimeTranscriptionProviders?: unknown[];
realtimeVoiceProviders?: unknown[];
};
}) => boolean,
) {
return loadPluginManifestRegistry({})
@@ -70,6 +80,14 @@ describe("plugin contract registry", () => {
name: "does not duplicate bundled media provider ids",
ids: () => mediaUnderstandingProviderContractRegistry.map((entry) => entry.provider.id),
},
{
name: "does not duplicate bundled realtime transcription provider ids",
ids: () => realtimeTranscriptionProviderContractRegistry.map((entry) => entry.provider.id),
},
{
name: "does not duplicate bundled realtime voice provider ids",
ids: () => realtimeVoiceProviderContractRegistry.map((entry) => entry.provider.id),
},
{
name: "does not duplicate bundled image-generation provider ids",
ids: () => imageGenerationProviderContractRegistry.map((entry) => entry.provider.id),
@@ -101,6 +119,23 @@ describe("plugin contract registry", () => {
});
});
it("covers every bundled realtime voice plugin discovered from manifests", () => {
expectRegistryPluginIds({
actualPluginIds: realtimeVoiceProviderContractRegistry.map((entry) => entry.pluginId),
predicate: (plugin) =>
plugin.origin === "bundled" && (plugin.contracts?.realtimeVoiceProviders?.length ?? 0) > 0,
});
});
it("covers every bundled realtime transcription plugin discovered from manifests", () => {
expectRegistryPluginIds({
actualPluginIds: realtimeTranscriptionProviderContractRegistry.map((entry) => entry.pluginId),
predicate: (plugin) =>
plugin.origin === "bundled" &&
(plugin.contracts?.realtimeTranscriptionProviders?.length ?? 0) > 0,
});
});
it("covers every bundled web fetch plugin from the shared resolver", () => {
const bundledWebFetchPluginIds = resolveBundledWebFetchPluginIds({});

View File

@@ -3,6 +3,8 @@ import {
BUNDLED_MEDIA_UNDERSTANDING_PLUGIN_IDS,
BUNDLED_PLUGIN_CONTRACT_SNAPSHOTS,
BUNDLED_PROVIDER_PLUGIN_IDS,
BUNDLED_REALTIME_TRANSCRIPTION_PLUGIN_IDS,
BUNDLED_REALTIME_VOICE_PLUGIN_IDS,
BUNDLED_SPEECH_PLUGIN_IDS,
BUNDLED_WEB_FETCH_PLUGIN_IDS,
BUNDLED_WEB_SEARCH_PLUGIN_IDS,
@@ -12,6 +14,8 @@ import type {
ImageGenerationProviderPlugin,
MediaUnderstandingProviderPlugin,
ProviderPlugin,
RealtimeTranscriptionProviderPlugin,
RealtimeVoiceProviderPlugin,
SpeechProviderPlugin,
WebFetchProviderPlugin,
WebSearchProviderPlugin,
@@ -19,6 +23,8 @@ import type {
import {
loadVitestImageGenerationProviderContractRegistry,
loadVitestMediaUnderstandingProviderContractRegistry,
loadVitestRealtimeTranscriptionProviderContractRegistry,
loadVitestRealtimeVoiceProviderContractRegistry,
loadVitestSpeechProviderContractRegistry,
} from "./speech-vitest-registry.js";
@@ -38,6 +44,9 @@ type WebFetchProviderContractEntry = CapabilityContractEntry<WebFetchProviderPlu
};
type SpeechProviderContractEntry = CapabilityContractEntry<SpeechProviderPlugin>;
type RealtimeTranscriptionProviderContractEntry =
CapabilityContractEntry<RealtimeTranscriptionProviderPlugin>;
type RealtimeVoiceProviderContractEntry = CapabilityContractEntry<RealtimeVoiceProviderPlugin>;
type MediaUnderstandingProviderContractEntry =
CapabilityContractEntry<MediaUnderstandingProviderPlugin>;
type ImageGenerationProviderContractEntry = CapabilityContractEntry<ImageGenerationProviderPlugin>;
@@ -47,6 +56,8 @@ type PluginRegistrationContractEntry = {
cliBackendIds: string[];
providerIds: string[];
speechProviderIds: string[];
realtimeTranscriptionProviderIds: string[];
realtimeVoiceProviderIds: string[];
mediaUnderstandingProviderIds: string[];
imageGenerationProviderIds: string[];
webFetchProviderIds: string[];
@@ -94,6 +105,10 @@ let webSearchProviderContractRegistryByPluginIdCache: Map<
WebSearchProviderContractEntry[]
> | null = null;
let speechProviderContractRegistryCache: SpeechProviderContractEntry[] | null = null;
let realtimeTranscriptionProviderContractRegistryCache:
| RealtimeTranscriptionProviderContractEntry[]
| null = null;
let realtimeVoiceProviderContractRegistryCache: RealtimeVoiceProviderContractEntry[] | null = null;
let mediaUnderstandingProviderContractRegistryCache:
| MediaUnderstandingProviderContractEntry[]
| null = null;
@@ -387,6 +402,36 @@ function loadSpeechProviderContractRegistry(): SpeechProviderContractEntry[] {
return speechProviderContractRegistryCache;
}
function loadRealtimeVoiceProviderContractRegistry(): RealtimeVoiceProviderContractEntry[] {
if (!realtimeVoiceProviderContractRegistryCache) {
realtimeVoiceProviderContractRegistryCache = process.env.VITEST
? loadVitestRealtimeVoiceProviderContractRegistry()
: loadBundledCapabilityRuntimeRegistry({
pluginIds: BUNDLED_REALTIME_VOICE_PLUGIN_IDS,
pluginSdkResolution: "dist",
}).realtimeVoiceProviders.map((entry) => ({
pluginId: entry.pluginId,
provider: entry.provider,
}));
}
return realtimeVoiceProviderContractRegistryCache;
}
function loadRealtimeTranscriptionProviderContractRegistry(): RealtimeTranscriptionProviderContractEntry[] {
if (!realtimeTranscriptionProviderContractRegistryCache) {
realtimeTranscriptionProviderContractRegistryCache = process.env.VITEST
? loadVitestRealtimeTranscriptionProviderContractRegistry()
: loadBundledCapabilityRuntimeRegistry({
pluginIds: BUNDLED_REALTIME_TRANSCRIPTION_PLUGIN_IDS,
pluginSdkResolution: "dist",
}).realtimeTranscriptionProviders.map((entry) => ({
pluginId: entry.pluginId,
provider: entry.provider,
}));
}
return realtimeTranscriptionProviderContractRegistryCache;
}
function loadMediaUnderstandingProviderContractRegistry(): MediaUnderstandingProviderContractEntry[] {
if (!mediaUnderstandingProviderContractRegistryCache) {
mediaUnderstandingProviderContractRegistryCache = process.env.VITEST
@@ -519,6 +564,12 @@ export const speechProviderContractRegistry: SpeechProviderContractEntry[] = cre
loadSpeechProviderContractRegistry,
);
export const realtimeTranscriptionProviderContractRegistry: RealtimeTranscriptionProviderContractEntry[] =
createLazyArrayView(loadRealtimeTranscriptionProviderContractRegistry);
export const realtimeVoiceProviderContractRegistry: RealtimeVoiceProviderContractEntry[] =
createLazyArrayView(loadRealtimeVoiceProviderContractRegistry);
export const mediaUnderstandingProviderContractRegistry: MediaUnderstandingProviderContractEntry[] =
createLazyArrayView(loadMediaUnderstandingProviderContractRegistry);
@@ -531,6 +582,8 @@ function loadPluginRegistrationContractRegistry(): PluginRegistrationContractEnt
cliBackendIds: uniqueStrings(entry.cliBackendIds),
providerIds: uniqueStrings(entry.providerIds),
speechProviderIds: uniqueStrings(entry.speechProviderIds),
realtimeTranscriptionProviderIds: uniqueStrings(entry.realtimeTranscriptionProviderIds),
realtimeVoiceProviderIds: uniqueStrings(entry.realtimeVoiceProviderIds),
mediaUnderstandingProviderIds: uniqueStrings(entry.mediaUnderstandingProviderIds),
imageGenerationProviderIds: uniqueStrings(entry.imageGenerationProviderIds),
webFetchProviderIds: uniqueStrings(entry.webFetchProviderIds),

View File

@@ -5,6 +5,8 @@ import { createJiti } from "jiti";
import {
BUNDLED_IMAGE_GENERATION_PLUGIN_IDS,
BUNDLED_MEDIA_UNDERSTANDING_PLUGIN_IDS,
BUNDLED_REALTIME_TRANSCRIPTION_PLUGIN_IDS,
BUNDLED_REALTIME_VOICE_PLUGIN_IDS,
BUNDLED_SPEECH_PLUGIN_IDS,
} from "../bundled-capability-metadata.js";
import { loadBundledCapabilityRuntimeRegistry } from "../bundled-capability-runtime.js";
@@ -13,6 +15,8 @@ import { buildPluginLoaderAliasMap, buildPluginLoaderJitiOptions } from "../sdk-
import type {
ImageGenerationProviderPlugin,
MediaUnderstandingProviderPlugin,
RealtimeTranscriptionProviderPlugin,
RealtimeVoiceProviderPlugin,
SpeechProviderPlugin,
} from "../types.js";
@@ -26,6 +30,16 @@ export type MediaUnderstandingProviderContractEntry = {
provider: MediaUnderstandingProviderPlugin;
};
export type RealtimeVoiceProviderContractEntry = {
pluginId: string;
provider: RealtimeVoiceProviderPlugin;
};
export type RealtimeTranscriptionProviderContractEntry = {
pluginId: string;
provider: RealtimeTranscriptionProviderPlugin;
};
export type ImageGenerationProviderContractEntry = {
pluginId: string;
provider: ImageGenerationProviderPlugin;
@@ -190,6 +204,96 @@ export function loadVitestMediaUnderstandingProviderContractRegistry(): MediaUnd
return registrations;
}
export function loadVitestRealtimeVoiceProviderContractRegistry(): RealtimeVoiceProviderContractEntry[] {
const registrations: RealtimeVoiceProviderContractEntry[] = [];
const { manifests, unresolvedPluginIds } = resolveTestApiModuleRecords(
BUNDLED_REALTIME_VOICE_PLUGIN_IDS,
);
for (const plugin of manifests) {
if (!plugin.rootDir) {
continue;
}
const testApiPath = path.join(plugin.rootDir, "test-api.ts");
if (!fs.existsSync(testApiPath)) {
continue;
}
const builder = resolveNamedBuilder<RealtimeVoiceProviderPlugin>(
createVitestCapabilityLoader(testApiPath)(testApiPath),
/^build.+RealtimeVoiceProvider$/u,
);
if (!builder) {
continue;
}
registrations.push({
pluginId: plugin.id,
provider: builder(),
});
unresolvedPluginIds.delete(plugin.id);
}
if (unresolvedPluginIds.size === 0) {
return registrations;
}
const runtimeRegistry = loadBundledCapabilityRuntimeRegistry({
pluginIds: [...unresolvedPluginIds],
pluginSdkResolution: "dist",
});
registrations.push(
...runtimeRegistry.realtimeVoiceProviders.map((entry) => ({
pluginId: entry.pluginId,
provider: entry.provider,
})),
);
return registrations;
}
export function loadVitestRealtimeTranscriptionProviderContractRegistry(): RealtimeTranscriptionProviderContractEntry[] {
const registrations: RealtimeTranscriptionProviderContractEntry[] = [];
const { manifests, unresolvedPluginIds } = resolveTestApiModuleRecords(
BUNDLED_REALTIME_TRANSCRIPTION_PLUGIN_IDS,
);
for (const plugin of manifests) {
if (!plugin.rootDir) {
continue;
}
const testApiPath = path.join(plugin.rootDir, "test-api.ts");
if (!fs.existsSync(testApiPath)) {
continue;
}
const builder = resolveNamedBuilder<RealtimeTranscriptionProviderPlugin>(
createVitestCapabilityLoader(testApiPath)(testApiPath),
/^build.+RealtimeTranscriptionProvider$/u,
);
if (!builder) {
continue;
}
registrations.push({
pluginId: plugin.id,
provider: builder(),
});
unresolvedPluginIds.delete(plugin.id);
}
if (unresolvedPluginIds.size === 0) {
return registrations;
}
const runtimeRegistry = loadBundledCapabilityRuntimeRegistry({
pluginIds: [...unresolvedPluginIds],
pluginSdkResolution: "dist",
});
registrations.push(
...runtimeRegistry.realtimeTranscriptionProviders.map((entry) => ({
pluginId: entry.pluginId,
provider: entry.provider,
})),
);
return registrations;
}
export function loadVitestImageGenerationProviderContractRegistry(): ImageGenerationProviderContractEntry[] {
const registrations: ImageGenerationProviderContractEntry[] = [];
const { manifests, unresolvedPluginIds } = resolveTestApiModuleRecords(

View File

@@ -590,6 +590,8 @@ function createPluginRecord(params: {
cliBackendIds: [],
providerIds: [],
speechProviderIds: [],
realtimeTranscriptionProviderIds: [],
realtimeVoiceProviderIds: [],
mediaUnderstandingProviderIds: [],
imageGenerationProviderIds: [],
webFetchProviderIds: [],

View File

@@ -52,6 +52,8 @@ export type PluginManifest = {
export type PluginManifestContracts = {
speechProviders?: string[];
realtimeTranscriptionProviders?: string[];
realtimeVoiceProviders?: string[];
mediaUnderstandingProviders?: string[];
imageGenerationProviders?: string[];
webFetchProviders?: string[];
@@ -125,6 +127,8 @@ function normalizeManifestContracts(value: unknown): PluginManifestContracts | u
}
const speechProviders = normalizeStringList(value.speechProviders);
const realtimeTranscriptionProviders = normalizeStringList(value.realtimeTranscriptionProviders);
const realtimeVoiceProviders = normalizeStringList(value.realtimeVoiceProviders);
const mediaUnderstandingProviders = normalizeStringList(value.mediaUnderstandingProviders);
const imageGenerationProviders = normalizeStringList(value.imageGenerationProviders);
const webFetchProviders = normalizeStringList(value.webFetchProviders);
@@ -132,6 +136,8 @@ function normalizeManifestContracts(value: unknown): PluginManifestContracts | u
const tools = normalizeStringList(value.tools);
const contracts = {
...(speechProviders.length > 0 ? { speechProviders } : {}),
...(realtimeTranscriptionProviders.length > 0 ? { realtimeTranscriptionProviders } : {}),
...(realtimeVoiceProviders.length > 0 ? { realtimeVoiceProviders } : {}),
...(mediaUnderstandingProviders.length > 0 ? { mediaUnderstandingProviders } : {}),
...(imageGenerationProviders.length > 0 ? { imageGenerationProviders } : {}),
...(webFetchProviders.length > 0 ? { webFetchProviders } : {}),

View File

@@ -11,6 +11,8 @@ export function createEmptyPluginRegistry(): PluginRegistry {
providers: [],
cliBackends: [],
speechProviders: [],
realtimeTranscriptionProviders: [],
realtimeVoiceProviders: [],
mediaUnderstandingProviders: [],
imageGenerationProviders: [],
webFetchProviders: [],

View File

@@ -38,7 +38,7 @@ import {
import type {
CliBackendPlugin,
ImageGenerationProviderPlugin,
WebFetchProviderPlugin,
RealtimeTranscriptionProviderPlugin,
OpenClawPluginApi,
OpenClawPluginChannelRegistration,
OpenClawPluginCliCommandDescriptor,
@@ -52,6 +52,7 @@ import type {
OpenClawPluginHookOptions,
MediaUnderstandingProviderPlugin,
ProviderPlugin,
RealtimeVoiceProviderPlugin,
OpenClawPluginService,
OpenClawPluginToolContext,
OpenClawPluginToolFactory,
@@ -67,6 +68,7 @@ import type {
PluginHookHandlerMap,
PluginHookRegistration as TypedPluginHookRegistration,
SpeechProviderPlugin,
WebFetchProviderPlugin,
WebSearchProviderPlugin,
} from "./types.js";
@@ -142,6 +144,10 @@ type PluginOwnedProviderRegistration<T extends { id: string }> = {
export type PluginSpeechProviderRegistration =
PluginOwnedProviderRegistration<SpeechProviderPlugin>;
export type PluginRealtimeTranscriptionProviderRegistration =
PluginOwnedProviderRegistration<RealtimeTranscriptionProviderPlugin>;
export type PluginRealtimeVoiceProviderRegistration =
PluginOwnedProviderRegistration<RealtimeVoiceProviderPlugin>;
export type PluginMediaUnderstandingProviderRegistration =
PluginOwnedProviderRegistration<MediaUnderstandingProviderPlugin>;
export type PluginImageGenerationProviderRegistration =
@@ -213,6 +219,8 @@ export type PluginRecord = {
cliBackendIds: string[];
providerIds: string[];
speechProviderIds: string[];
realtimeTranscriptionProviderIds: string[];
realtimeVoiceProviderIds: string[];
mediaUnderstandingProviderIds: string[];
imageGenerationProviderIds: string[];
webFetchProviderIds: string[];
@@ -239,6 +247,8 @@ export type PluginRegistry = {
providers: PluginProviderRegistration[];
cliBackends?: PluginCliBackendRegistration[];
speechProviders: PluginSpeechProviderRegistration[];
realtimeTranscriptionProviders: PluginRealtimeTranscriptionProviderRegistration[];
realtimeVoiceProviders: PluginRealtimeVoiceProviderRegistration[];
mediaUnderstandingProviders: PluginMediaUnderstandingProviderRegistration[];
imageGenerationProviders: PluginImageGenerationProviderRegistration[];
webFetchProviders: PluginWebFetchProviderRegistration[];
@@ -699,6 +709,32 @@ export function createPluginRegistry(registryParams: PluginRegistryParams) {
});
};
const registerRealtimeTranscriptionProvider = (
record: PluginRecord,
provider: RealtimeTranscriptionProviderPlugin,
) => {
registerUniqueProviderLike({
record,
provider,
kindLabel: "realtime transcription provider",
registrations: registry.realtimeTranscriptionProviders,
ownedIds: record.realtimeTranscriptionProviderIds,
});
};
const registerRealtimeVoiceProvider = (
record: PluginRecord,
provider: RealtimeVoiceProviderPlugin,
) => {
registerUniqueProviderLike({
record,
provider,
kindLabel: "realtime voice provider",
registrations: registry.realtimeVoiceProviders,
ownedIds: record.realtimeVoiceProviderIds,
});
};
const registerMediaUnderstandingProvider = (
record: PluginRecord,
provider: MediaUnderstandingProviderPlugin,
@@ -1009,6 +1045,10 @@ export function createPluginRegistry(registryParams: PluginRegistryParams) {
registerHttpRoute: (routeParams) => registerHttpRoute(record, routeParams),
registerProvider: (provider) => registerProvider(record, provider),
registerSpeechProvider: (provider) => registerSpeechProvider(record, provider),
registerRealtimeTranscriptionProvider: (provider) =>
registerRealtimeTranscriptionProvider(record, provider),
registerRealtimeVoiceProvider: (provider) =>
registerRealtimeVoiceProvider(record, provider),
registerMediaUnderstandingProvider: (provider) =>
registerMediaUnderstandingProvider(record, provider),
registerImageGenerationProvider: (provider) =>
@@ -1198,6 +1238,8 @@ export function createPluginRegistry(registryParams: PluginRegistryParams) {
registerProvider,
registerCliBackend,
registerSpeechProvider,
registerRealtimeTranscriptionProvider,
registerRealtimeVoiceProvider,
registerMediaUnderstandingProvider,
registerImageGenerationProvider,
registerWebSearchProvider,

View File

@@ -199,6 +199,8 @@ describe("setActivePluginRegistry", () => {
cliBackendIds: [],
providerIds: [],
speechProviderIds: [],
realtimeTranscriptionProviderIds: [],
realtimeVoiceProviderIds: [],
mediaUnderstandingProviderIds: [],
imageGenerationProviderIds: [],
webFetchProviderIds: [],
@@ -225,6 +227,8 @@ describe("setActivePluginRegistry", () => {
cliBackendIds: [],
providerIds: [],
speechProviderIds: [],
realtimeTranscriptionProviderIds: [],
realtimeVoiceProviderIds: [],
mediaUnderstandingProviderIds: [],
imageGenerationProviderIds: [],
webFetchProviderIds: [],

View File

@@ -51,6 +51,8 @@ export function createPluginRecord(
cliBackendIds: [],
providerIds: [],
speechProviderIds: [],
realtimeTranscriptionProviderIds: [],
realtimeVoiceProviderIds: [],
mediaUnderstandingProviderIds: [],
imageGenerationProviderIds: [],
webFetchProviderIds: [],
@@ -107,7 +109,7 @@ export function createCustomHook(params: {
export function createPluginLoadResult(
overrides: Partial<PluginLoadResult> & Pick<PluginLoadResult, "plugins"> = { plugins: [] },
): PluginLoadResult {
const { plugins, ...rest } = overrides;
const { plugins, realtimeTranscriptionProviders, realtimeVoiceProviders, ...rest } = overrides;
return {
plugins,
diagnostics: [],
@@ -129,6 +131,8 @@ export function createPluginLoadResult(
commands: [],
conversationBindingResolvedHandlers: [],
...rest,
realtimeTranscriptionProviders: realtimeTranscriptionProviders ?? [],
realtimeVoiceProviders: realtimeVoiceProviders ?? [],
};
}

View File

@@ -28,6 +28,8 @@ export type PluginCapabilityKind =
| "cli-backend"
| "text-inference"
| "speech"
| "realtime-transcription"
| "realtime-voice"
| "media-understanding"
| "image-generation"
| "web-search"
@@ -233,6 +235,8 @@ function buildCapabilityEntries(plugin: PluginRegistry["plugins"][number]) {
{ kind: "cli-backend" as const, ids: plugin.cliBackendIds ?? [] },
{ kind: "text-inference" as const, ids: plugin.providerIds },
{ kind: "speech" as const, ids: plugin.speechProviderIds },
{ kind: "realtime-transcription" as const, ids: plugin.realtimeTranscriptionProviderIds },
{ kind: "realtime-voice" as const, ids: plugin.realtimeVoiceProviderIds },
{ kind: "media-understanding" as const, ids: plugin.mediaUnderstandingProviderIds },
{ kind: "image-generation" as const, ids: plugin.imageGenerationProviderIds },
{ kind: "web-search" as const, ids: plugin.webSearchProviderIds },

View File

@@ -30,6 +30,22 @@ import type { HookEntry } from "../hooks/types.js";
import type { ImageGenerationProvider } from "../image-generation/types.js";
import type { ProviderUsageSnapshot } from "../infra/provider-usage.types.js";
import type { MediaUnderstandingProvider } from "../media-understanding/types.js";
import type {
RealtimeTranscriptionProviderConfig,
RealtimeTranscriptionProviderConfiguredContext,
RealtimeTranscriptionProviderId,
RealtimeTranscriptionProviderResolveConfigContext,
RealtimeTranscriptionSession,
RealtimeTranscriptionSessionCreateRequest,
} from "../realtime-transcription/provider-types.js";
import type {
RealtimeVoiceBridge,
RealtimeVoiceBridgeCreateRequest,
RealtimeVoiceProviderConfig,
RealtimeVoiceProviderConfiguredContext,
RealtimeVoiceProviderId,
RealtimeVoiceProviderResolveConfigContext,
} from "../realtime-voice/provider-types.js";
import type { RuntimeEnv } from "../runtime.js";
import type {
RuntimeWebFetchMetadata,
@@ -1526,6 +1542,38 @@ export type PluginSpeechProviderEntry = SpeechProviderPlugin & {
pluginId: string;
};
/** Realtime transcription capability registered by a plugin. */
export type RealtimeTranscriptionProviderPlugin = {
id: RealtimeTranscriptionProviderId;
label: string;
aliases?: string[];
autoSelectOrder?: number;
resolveConfig?: (
ctx: RealtimeTranscriptionProviderResolveConfigContext,
) => RealtimeTranscriptionProviderConfig;
isConfigured: (ctx: RealtimeTranscriptionProviderConfiguredContext) => boolean;
createSession: (req: RealtimeTranscriptionSessionCreateRequest) => RealtimeTranscriptionSession;
};
export type PluginRealtimeTranscriptionProviderEntry = RealtimeTranscriptionProviderPlugin & {
pluginId: string;
};
/** Realtime voice capability registered by a plugin. */
export type RealtimeVoiceProviderPlugin = {
id: RealtimeVoiceProviderId;
label: string;
aliases?: string[];
autoSelectOrder?: number;
resolveConfig?: (ctx: RealtimeVoiceProviderResolveConfigContext) => RealtimeVoiceProviderConfig;
isConfigured: (ctx: RealtimeVoiceProviderConfiguredContext) => boolean;
createBridge: (req: RealtimeVoiceBridgeCreateRequest) => RealtimeVoiceBridge;
};
export type PluginRealtimeVoiceProviderEntry = RealtimeVoiceProviderPlugin & {
pluginId: string;
};
export type MediaUnderstandingProviderPlugin = MediaUnderstandingProvider;
export type ImageGenerationProviderPlugin = ImageGenerationProvider;
@@ -1850,6 +1898,10 @@ export type OpenClawPluginApi = {
registerProvider: (provider: ProviderPlugin) => void;
/** Register a speech synthesis provider (speech capability). */
registerSpeechProvider: (provider: SpeechProviderPlugin) => void;
/** Register a realtime transcription provider (streaming STT capability). */
registerRealtimeTranscriptionProvider: (provider: RealtimeTranscriptionProviderPlugin) => void;
/** Register a realtime voice provider (duplex voice capability). */
registerRealtimeVoiceProvider: (provider: RealtimeVoiceProviderPlugin) => void;
/** Register a media understanding provider (media understanding capability). */
registerMediaUnderstandingProvider: (provider: MediaUnderstandingProviderPlugin) => void;
/** Register an image generation provider (image generation capability). */