refactor: move voice-call realtime providers into extensions

This commit is contained in:
Peter Steinberger
2026-04-04 12:04:37 +09:00
parent 61f93540b2
commit a23ab9b906
90 changed files with 3134 additions and 792 deletions

View File

@@ -1,16 +1,14 @@
import { Command } from "commander";
import { getPluginCliCommandDescriptors } from "../../plugins/cli.js";
import type { OpenClawPluginCliCommandDescriptor } from "../../plugins/types.js";
import type { PluginLoadOptions } from "../../plugins/loader.js";
import { VERSION } from "../../version.js";
import { getCoreCliCommandDescriptors } from "./core-command-descriptors.js";
import { configureProgramHelp } from "./help.js";
import { getSubCliEntries } from "./subcli-descriptors.js";
type RootHelpRenderOptions = {
pluginDescriptors?: OpenClawPluginCliCommandDescriptor[] | null;
};
type RootHelpLoaderOptions = Pick<PluginLoadOptions, "pluginSdkResolution">;
async function buildRootHelpProgram(options?: RootHelpRenderOptions): Promise<Command> {
async function buildRootHelpProgram(loaderOptions?: RootHelpLoaderOptions): Promise<Command> {
const program = new Command();
configureProgramHelp(program, {
programVersion: VERSION,
@@ -31,11 +29,7 @@ async function buildRootHelpProgram(options?: RootHelpRenderOptions): Promise<Co
program.command(command.name).description(command.description);
existingCommands.add(command.name);
}
const pluginDescriptors =
options && "pluginDescriptors" in options
? (options.pluginDescriptors ?? [])
: await getPluginCliCommandDescriptors();
for (const command of pluginDescriptors) {
for (const command of await getPluginCliCommandDescriptors(undefined, undefined, loaderOptions)) {
if (existingCommands.has(command.name)) {
continue;
}
@@ -46,8 +40,8 @@ async function buildRootHelpProgram(options?: RootHelpRenderOptions): Promise<Co
return program;
}
export async function renderRootHelpText(options?: RootHelpRenderOptions): Promise<string> {
const program = await buildRootHelpProgram(options);
export async function renderRootHelpText(loaderOptions?: RootHelpLoaderOptions): Promise<string> {
const program = await buildRootHelpProgram(loaderOptions);
let output = "";
const originalWrite = process.stdout.write.bind(process.stdout);
const captureWrite: typeof process.stdout.write = ((chunk: string | Uint8Array) => {
@@ -63,6 +57,6 @@ export async function renderRootHelpText(options?: RootHelpRenderOptions): Promi
return output;
}
export async function outputRootHelp(options?: RootHelpRenderOptions): Promise<void> {
process.stdout.write(await renderRootHelpText(options));
export async function outputRootHelp(loaderOptions?: RootHelpLoaderOptions): Promise<void> {
process.stdout.write(await renderRootHelpText(loaderOptions));
}

View File

@@ -69,6 +69,8 @@ const createRegistry = (diagnostics: PluginDiagnostic[]): PluginRegistry => ({
commands: [],
providers: [],
speechProviders: [],
realtimeTranscriptionProviders: [],
realtimeVoiceProviders: [],
mediaUnderstandingProviders: [],
imageGenerationProviders: [],
webFetchProviders: [],

View File

@@ -201,6 +201,8 @@ const createStubPluginRegistry = (): PluginRegistry => ({
}),
},
],
realtimeTranscriptionProviders: [],
realtimeVoiceProviders: [],
mediaUnderstandingProviders: [],
imageGenerationProviders: [],
webFetchProviders: [],

View File

@@ -66,6 +66,7 @@ export type {
ProviderReplaySessionState,
ProviderResolveDynamicModelContext,
ProviderResolvedUsageAuth,
RealtimeTranscriptionProviderPlugin,
ProviderSanitizeReplayHistoryContext,
ProviderToolSchemaDiagnostic,
ProviderResolveUsageAuthContext,

View File

@@ -51,6 +51,7 @@ export type {
ProviderAuthContext,
ProviderAuthResult,
ProviderRuntimeModel,
RealtimeTranscriptionProviderPlugin,
SpeechProviderPlugin,
} from "../plugins/types.js";
export type {

View File

@@ -46,6 +46,7 @@ import type {
ProviderReplayPolicyContext,
ProviderReplaySessionEntry,
ProviderReplaySessionState,
RealtimeTranscriptionProviderPlugin,
ProviderResolvedUsageAuth,
ProviderResolveDynamicModelContext,
ProviderSanitizeReplayHistoryContext,
@@ -102,6 +103,7 @@ export type {
ProviderResolveDynamicModelContext,
ProviderNormalizeResolvedModelContext,
ProviderRuntimeModel,
RealtimeTranscriptionProviderPlugin,
SpeechProviderPlugin,
ProviderThinkingPolicyContext,
ProviderValidateReplayTurnsContext,

View File

@@ -0,0 +1,16 @@
export type { RealtimeTranscriptionProviderPlugin } from "../plugins/types.js";
export type {
RealtimeTranscriptionProviderConfig,
RealtimeTranscriptionProviderConfiguredContext,
RealtimeTranscriptionProviderId,
RealtimeTranscriptionProviderResolveConfigContext,
RealtimeTranscriptionSession,
RealtimeTranscriptionSessionCallbacks,
RealtimeTranscriptionSessionCreateRequest,
} from "../realtime-transcription/provider-types.js";
export {
canonicalizeRealtimeTranscriptionProviderId,
getRealtimeTranscriptionProvider,
listRealtimeTranscriptionProviders,
normalizeRealtimeTranscriptionProviderId,
} from "../realtime-transcription/provider-registry.js";

View File

@@ -0,0 +1,20 @@
export type { RealtimeVoiceProviderPlugin } from "../plugins/types.js";
export type {
RealtimeVoiceBridge,
RealtimeVoiceBridgeCallbacks,
RealtimeVoiceBridgeCreateRequest,
RealtimeVoiceCloseReason,
RealtimeVoiceProviderConfig,
RealtimeVoiceProviderConfiguredContext,
RealtimeVoiceProviderId,
RealtimeVoiceProviderResolveConfigContext,
RealtimeVoiceRole,
RealtimeVoiceTool,
RealtimeVoiceToolCallEvent,
} from "../realtime-voice/provider-types.js";
export {
canonicalizeRealtimeVoiceProviderId,
getRealtimeVoiceProvider,
listRealtimeVoiceProviders,
normalizeRealtimeVoiceProviderId,
} from "../realtime-voice/provider-registry.js";

View File

@@ -1,7 +1,12 @@
import { rmSync } from "node:fs";
import type { OpenClawConfig } from "../config/config.js";
import type { ResolvedTtsConfig } from "../tts/tts.js";
// Public speech helpers for bundled or third-party plugins.
//
// Keep this surface neutral. Provider plugins should not need to know about the
// bundled `speech-core` plugin id just to consume shared speech types/helpers.
// Keep this surface neutral and import-light. Provider builders commonly import
// this module just to get types and a few validation helpers, so avoid pulling
// in the heavy TTS runtime graph at module load time.
export type { SpeechProviderPlugin } from "../plugins/types.js";
export type {
@@ -22,14 +27,6 @@ export type {
TtsDirectiveParseResult,
} from "../tts/provider-types.js";
export {
scheduleCleanup,
summarizeText,
normalizeApplyTextNormalization,
normalizeLanguageCode,
normalizeSeed,
requireInRange,
} from "../tts/tts-core.js";
export { parseTtsDirectives } from "../tts/directives.js";
export {
canonicalizeSpeechProviderId,
@@ -44,3 +41,71 @@ export {
trimToUndefined,
truncateErrorDetail,
} from "../tts/provider-error-utils.js";
const TEMP_FILE_CLEANUP_DELAY_MS = 5 * 60 * 1000; // 5 minutes
export function requireInRange(value: number, min: number, max: number, label: string): void {
if (!Number.isFinite(value) || value < min || value > max) {
throw new Error(`${label} must be between ${min} and ${max}`);
}
}
export function normalizeLanguageCode(code?: string): string | undefined {
const trimmed = code?.trim();
if (!trimmed) {
return undefined;
}
const normalized = trimmed.toLowerCase();
if (!/^[a-z]{2}$/.test(normalized)) {
throw new Error("languageCode must be a 2-letter ISO 639-1 code (e.g. en, de, fr)");
}
return normalized;
}
export function normalizeApplyTextNormalization(mode?: string): "auto" | "on" | "off" | undefined {
const trimmed = mode?.trim();
if (!trimmed) {
return undefined;
}
const normalized = trimmed.toLowerCase();
if (normalized === "auto" || normalized === "on" || normalized === "off") {
return normalized;
}
throw new Error("applyTextNormalization must be one of: auto, on, off");
}
export function normalizeSeed(seed?: number): number | undefined {
if (seed == null) {
return undefined;
}
const next = Math.floor(seed);
if (!Number.isFinite(next) || next < 0 || next > 4_294_967_295) {
throw new Error("seed must be between 0 and 4294967295");
}
return next;
}
export function scheduleCleanup(
tempDir: string,
delayMs: number = TEMP_FILE_CLEANUP_DELAY_MS,
): void {
const timer = setTimeout(() => {
try {
rmSync(tempDir, { recursive: true, force: true });
} catch {
// ignore cleanup errors
}
}, delayMs);
timer.unref();
}
export async function summarizeText(params: {
text: string;
targetLength: number;
cfg: OpenClawConfig;
config: ResolvedTtsConfig;
timeoutMs: number;
}) {
const { summarizeText: summarizeTextRuntime } = await import("../tts/tts-core.js");
return summarizeTextRuntime(params);
}

View File

@@ -28,6 +28,8 @@ export type BuildPluginApiParams = {
| "registerCliBackend"
| "registerProvider"
| "registerSpeechProvider"
| "registerRealtimeTranscriptionProvider"
| "registerRealtimeVoiceProvider"
| "registerMediaUnderstandingProvider"
| "registerImageGenerationProvider"
| "registerWebFetchProvider"
@@ -55,6 +57,10 @@ const noopRegisterService: OpenClawPluginApi["registerService"] = () => {};
const noopRegisterCliBackend: OpenClawPluginApi["registerCliBackend"] = () => {};
const noopRegisterProvider: OpenClawPluginApi["registerProvider"] = () => {};
const noopRegisterSpeechProvider: OpenClawPluginApi["registerSpeechProvider"] = () => {};
const noopRegisterRealtimeTranscriptionProvider: OpenClawPluginApi["registerRealtimeTranscriptionProvider"] =
() => {};
const noopRegisterRealtimeVoiceProvider: OpenClawPluginApi["registerRealtimeVoiceProvider"] =
() => {};
const noopRegisterMediaUnderstandingProvider: OpenClawPluginApi["registerMediaUnderstandingProvider"] =
() => {};
const noopRegisterImageGenerationProvider: OpenClawPluginApi["registerImageGenerationProvider"] =
@@ -97,6 +103,10 @@ export function buildPluginApi(params: BuildPluginApiParams): OpenClawPluginApi
registerCliBackend: handlers.registerCliBackend ?? noopRegisterCliBackend,
registerProvider: handlers.registerProvider ?? noopRegisterProvider,
registerSpeechProvider: handlers.registerSpeechProvider ?? noopRegisterSpeechProvider,
registerRealtimeTranscriptionProvider:
handlers.registerRealtimeTranscriptionProvider ?? noopRegisterRealtimeTranscriptionProvider,
registerRealtimeVoiceProvider:
handlers.registerRealtimeVoiceProvider ?? noopRegisterRealtimeVoiceProvider,
registerMediaUnderstandingProvider:
handlers.registerMediaUnderstandingProvider ?? noopRegisterMediaUnderstandingProvider,
registerImageGenerationProvider:

View File

@@ -28,6 +28,10 @@ describe("bundled capability metadata", () => {
cliBackendIds: uniqueStrings(manifest.cliBackends),
providerIds: uniqueStrings(manifest.providers),
speechProviderIds: uniqueStrings(manifest.contracts?.speechProviders),
realtimeTranscriptionProviderIds: uniqueStrings(
manifest.contracts?.realtimeTranscriptionProviders,
),
realtimeVoiceProviderIds: uniqueStrings(manifest.contracts?.realtimeVoiceProviders),
mediaUnderstandingProviderIds: uniqueStrings(
manifest.contracts?.mediaUnderstandingProviders,
),
@@ -41,6 +45,8 @@ describe("bundled capability metadata", () => {
entry.cliBackendIds.length > 0 ||
entry.providerIds.length > 0 ||
entry.speechProviderIds.length > 0 ||
entry.realtimeTranscriptionProviderIds.length > 0 ||
entry.realtimeVoiceProviderIds.length > 0 ||
entry.mediaUnderstandingProviderIds.length > 0 ||
entry.imageGenerationProviderIds.length > 0 ||
entry.webFetchProviderIds.length > 0 ||

View File

@@ -5,6 +5,8 @@ export type BundledPluginContractSnapshot = {
cliBackendIds: string[];
providerIds: string[];
speechProviderIds: string[];
realtimeTranscriptionProviderIds: string[];
realtimeVoiceProviderIds: string[];
mediaUnderstandingProviderIds: string[];
imageGenerationProviderIds: string[];
webFetchProviderIds: string[];
@@ -37,6 +39,10 @@ export const BUNDLED_PLUGIN_CONTRACT_SNAPSHOTS: readonly BundledPluginContractSn
cliBackendIds: uniqueStrings(manifest.cliBackends),
providerIds: uniqueStrings(manifest.providers),
speechProviderIds: uniqueStrings(manifest.contracts?.speechProviders),
realtimeTranscriptionProviderIds: uniqueStrings(
manifest.contracts?.realtimeTranscriptionProviders,
),
realtimeVoiceProviderIds: uniqueStrings(manifest.contracts?.realtimeVoiceProviders),
mediaUnderstandingProviderIds: uniqueStrings(manifest.contracts?.mediaUnderstandingProviders),
imageGenerationProviderIds: uniqueStrings(manifest.contracts?.imageGenerationProviders),
webFetchProviderIds: uniqueStrings(manifest.contracts?.webFetchProviders),
@@ -48,6 +54,8 @@ export const BUNDLED_PLUGIN_CONTRACT_SNAPSHOTS: readonly BundledPluginContractSn
entry.cliBackendIds.length > 0 ||
entry.providerIds.length > 0 ||
entry.speechProviderIds.length > 0 ||
entry.realtimeTranscriptionProviderIds.length > 0 ||
entry.realtimeVoiceProviderIds.length > 0 ||
entry.mediaUnderstandingProviderIds.length > 0 ||
entry.imageGenerationProviderIds.length > 0 ||
entry.webFetchProviderIds.length > 0 ||
@@ -68,6 +76,14 @@ export const BUNDLED_PROVIDER_PLUGIN_IDS = collectPluginIds((entry) => entry.pro
export const BUNDLED_SPEECH_PLUGIN_IDS = collectPluginIds((entry) => entry.speechProviderIds);
export const BUNDLED_REALTIME_TRANSCRIPTION_PLUGIN_IDS = collectPluginIds(
(entry) => entry.realtimeTranscriptionProviderIds,
);
export const BUNDLED_REALTIME_VOICE_PLUGIN_IDS = collectPluginIds(
(entry) => entry.realtimeVoiceProviderIds,
);
export const BUNDLED_MEDIA_UNDERSTANDING_PLUGIN_IDS = collectPluginIds(
(entry) => entry.mediaUnderstandingProviderIds,
);
@@ -84,6 +100,8 @@ export const BUNDLED_RUNTIME_CONTRACT_PLUGIN_IDS = [
(entry) =>
entry.providerIds.length > 0 ||
entry.speechProviderIds.length > 0 ||
entry.realtimeTranscriptionProviderIds.length > 0 ||
entry.realtimeVoiceProviderIds.length > 0 ||
entry.mediaUnderstandingProviderIds.length > 0 ||
entry.imageGenerationProviderIds.length > 0 ||
entry.webFetchProviderIds.length > 0 ||

View File

@@ -122,6 +122,8 @@ function createCapabilityPluginRecord(params: {
cliBackendIds: [],
providerIds: [],
speechProviderIds: [],
realtimeTranscriptionProviderIds: [],
realtimeVoiceProviderIds: [],
mediaUnderstandingProviderIds: [],
imageGenerationProviderIds: [],
webFetchProviderIds: [],
@@ -272,6 +274,12 @@ export function loadBundledCapabilityRuntimeRegistry(params: {
record.cliBackendIds.push(...captured.cliBackends.map((entry) => entry.id));
record.providerIds.push(...captured.providers.map((entry) => entry.id));
record.speechProviderIds.push(...captured.speechProviders.map((entry) => entry.id));
record.realtimeTranscriptionProviderIds.push(
...captured.realtimeTranscriptionProviders.map((entry) => entry.id),
);
record.realtimeVoiceProviderIds.push(
...captured.realtimeVoiceProviders.map((entry) => entry.id),
);
record.mediaUnderstandingProviderIds.push(
...captured.mediaUnderstandingProviders.map((entry) => entry.id),
);
@@ -309,6 +317,24 @@ export function loadBundledCapabilityRuntimeRegistry(params: {
rootDir: record.rootDir,
})),
);
registry.realtimeTranscriptionProviders.push(
...captured.realtimeTranscriptionProviders.map((provider) => ({
pluginId: record.id,
pluginName: record.name,
provider,
source: record.source,
rootDir: record.rootDir,
})),
);
registry.realtimeVoiceProviders.push(
...captured.realtimeVoiceProviders.map((provider) => ({
pluginId: record.id,
pluginName: record.name,
provider,
source: record.source,
rootDir: record.rootDir,
})),
);
registry.mediaUnderstandingProviders.push(
...captured.mediaUnderstandingProviders.map((provider) => ({
pluginId: record.id,

View File

@@ -102,7 +102,12 @@ function setBundledCapabilityFixture(contractKey: string) {
}
function expectCompatChainApplied(params: {
key: "speechProviders" | "mediaUnderstandingProviders" | "imageGenerationProviders";
key:
| "speechProviders"
| "realtimeTranscriptionProviders"
| "realtimeVoiceProviders"
| "mediaUnderstandingProviders"
| "imageGenerationProviders";
contractKey: string;
cfg: OpenClawConfig;
enablementCompat: {
@@ -201,6 +206,8 @@ describe("resolvePluginCapabilityProviders", () => {
it.each([
["speechProviders", "speechProviders"],
["realtimeTranscriptionProviders", "realtimeTranscriptionProviders"],
["realtimeVoiceProviders", "realtimeVoiceProviders"],
["mediaUnderstandingProviders", "mediaUnderstandingProviders"],
["imageGenerationProviders", "imageGenerationProviders"],
] as const)("applies bundled compat before fallback loading for %s", (key, contractKey) => {

View File

@@ -9,11 +9,15 @@ import type { PluginRegistry } from "./registry.js";
type CapabilityProviderRegistryKey =
| "speechProviders"
| "realtimeTranscriptionProviders"
| "realtimeVoiceProviders"
| "mediaUnderstandingProviders"
| "imageGenerationProviders";
type CapabilityContractKey =
| "speechProviders"
| "realtimeTranscriptionProviders"
| "realtimeVoiceProviders"
| "mediaUnderstandingProviders"
| "imageGenerationProviders";
@@ -22,6 +26,8 @@ type CapabilityProviderForKey<K extends CapabilityProviderRegistryKey> =
const CAPABILITY_CONTRACT_KEY: Record<CapabilityProviderRegistryKey, CapabilityContractKey> = {
speechProviders: "speechProviders",
realtimeTranscriptionProviders: "realtimeTranscriptionProviders",
realtimeVoiceProviders: "realtimeVoiceProviders",
mediaUnderstandingProviders: "mediaUnderstandingProviders",
imageGenerationProviders: "imageGenerationProviders",
};

View File

@@ -10,6 +10,8 @@ import type {
OpenClawPluginCliCommandDescriptor,
OpenClawPluginCliRegistrar,
ProviderPlugin,
RealtimeTranscriptionProviderPlugin,
RealtimeVoiceProviderPlugin,
SpeechProviderPlugin,
WebFetchProviderPlugin,
WebSearchProviderPlugin,
@@ -27,6 +29,8 @@ export type CapturedPluginRegistration = {
cliRegistrars: CapturedPluginCliRegistration[];
cliBackends: CliBackendPlugin[];
speechProviders: SpeechProviderPlugin[];
realtimeTranscriptionProviders: RealtimeTranscriptionProviderPlugin[];
realtimeVoiceProviders: RealtimeVoiceProviderPlugin[];
mediaUnderstandingProviders: MediaUnderstandingProviderPlugin[];
imageGenerationProviders: ImageGenerationProviderPlugin[];
webFetchProviders: WebFetchProviderPlugin[];
@@ -42,6 +46,8 @@ export function createCapturedPluginRegistration(params?: {
const cliRegistrars: CapturedPluginCliRegistration[] = [];
const cliBackends: CliBackendPlugin[] = [];
const speechProviders: SpeechProviderPlugin[] = [];
const realtimeTranscriptionProviders: RealtimeTranscriptionProviderPlugin[] = [];
const realtimeVoiceProviders: RealtimeVoiceProviderPlugin[] = [];
const mediaUnderstandingProviders: MediaUnderstandingProviderPlugin[] = [];
const imageGenerationProviders: ImageGenerationProviderPlugin[] = [];
const webFetchProviders: WebFetchProviderPlugin[] = [];
@@ -59,6 +65,8 @@ export function createCapturedPluginRegistration(params?: {
cliRegistrars,
cliBackends,
speechProviders,
realtimeTranscriptionProviders,
realtimeVoiceProviders,
mediaUnderstandingProviders,
imageGenerationProviders,
webFetchProviders,
@@ -106,6 +114,12 @@ export function createCapturedPluginRegistration(params?: {
registerSpeechProvider(provider: SpeechProviderPlugin) {
speechProviders.push(provider);
},
registerRealtimeTranscriptionProvider(provider: RealtimeTranscriptionProviderPlugin) {
realtimeTranscriptionProviders.push(provider);
},
registerRealtimeVoiceProvider(provider: RealtimeVoiceProviderPlugin) {
realtimeVoiceProviders.push(provider);
},
registerMediaUnderstandingProvider(provider: MediaUnderstandingProviderPlugin) {
mediaUnderstandingProviders.push(provider);
},

View File

@@ -155,9 +155,10 @@ async function loadPluginCliCommandRegistry(
export async function getPluginCliCommandDescriptors(
cfg?: OpenClawConfig,
env?: NodeJS.ProcessEnv,
loaderOptions?: Pick<PluginLoadOptions, "pluginSdkResolution">,
): Promise<OpenClawPluginCliCommandDescriptor[]> {
try {
const { registry } = await loadPluginCliMetadataRegistry(cfg, env);
const { registry } = await loadPluginCliMetadataRegistry(cfg, env, loaderOptions);
const seen = new Set<string>();
const descriptors: OpenClawPluginCliCommandDescriptor[] = [];
for (const entry of registry.cliRegistrars) {

View File

@@ -8,6 +8,8 @@ import {
pluginRegistrationContractRegistry,
providerContractLoadError,
providerContractPluginIds,
realtimeTranscriptionProviderContractRegistry,
realtimeVoiceProviderContractRegistry,
resolveWebFetchProviderContractEntriesForPluginId,
resolveWebSearchProviderContractEntriesForPluginId,
speechProviderContractRegistry,
@@ -27,7 +29,11 @@ describe("plugin contract registry", () => {
predicate: (plugin: {
origin: string;
providers: unknown[];
contracts?: { speechProviders?: unknown[] };
contracts?: {
speechProviders?: unknown[];
realtimeTranscriptionProviders?: unknown[];
realtimeVoiceProviders?: unknown[];
};
}) => boolean;
}) {
expect(uniqueSortedStrings(params.actualPluginIds)).toEqual(
@@ -39,7 +45,11 @@ describe("plugin contract registry", () => {
predicate: (plugin: {
origin: string;
providers: unknown[];
contracts?: { speechProviders?: unknown[] };
contracts?: {
speechProviders?: unknown[];
realtimeTranscriptionProviders?: unknown[];
realtimeVoiceProviders?: unknown[];
};
}) => boolean,
) {
return loadPluginManifestRegistry({})
@@ -70,6 +80,14 @@ describe("plugin contract registry", () => {
name: "does not duplicate bundled media provider ids",
ids: () => mediaUnderstandingProviderContractRegistry.map((entry) => entry.provider.id),
},
{
name: "does not duplicate bundled realtime transcription provider ids",
ids: () => realtimeTranscriptionProviderContractRegistry.map((entry) => entry.provider.id),
},
{
name: "does not duplicate bundled realtime voice provider ids",
ids: () => realtimeVoiceProviderContractRegistry.map((entry) => entry.provider.id),
},
{
name: "does not duplicate bundled image-generation provider ids",
ids: () => imageGenerationProviderContractRegistry.map((entry) => entry.provider.id),
@@ -101,6 +119,23 @@ describe("plugin contract registry", () => {
});
});
it("covers every bundled realtime voice plugin discovered from manifests", () => {
expectRegistryPluginIds({
actualPluginIds: realtimeVoiceProviderContractRegistry.map((entry) => entry.pluginId),
predicate: (plugin) =>
plugin.origin === "bundled" && (plugin.contracts?.realtimeVoiceProviders?.length ?? 0) > 0,
});
});
it("covers every bundled realtime transcription plugin discovered from manifests", () => {
expectRegistryPluginIds({
actualPluginIds: realtimeTranscriptionProviderContractRegistry.map((entry) => entry.pluginId),
predicate: (plugin) =>
plugin.origin === "bundled" &&
(plugin.contracts?.realtimeTranscriptionProviders?.length ?? 0) > 0,
});
});
it("covers every bundled web fetch plugin from the shared resolver", () => {
const bundledWebFetchPluginIds = resolveBundledWebFetchPluginIds({});

View File

@@ -3,6 +3,8 @@ import {
BUNDLED_MEDIA_UNDERSTANDING_PLUGIN_IDS,
BUNDLED_PLUGIN_CONTRACT_SNAPSHOTS,
BUNDLED_PROVIDER_PLUGIN_IDS,
BUNDLED_REALTIME_TRANSCRIPTION_PLUGIN_IDS,
BUNDLED_REALTIME_VOICE_PLUGIN_IDS,
BUNDLED_SPEECH_PLUGIN_IDS,
BUNDLED_WEB_FETCH_PLUGIN_IDS,
BUNDLED_WEB_SEARCH_PLUGIN_IDS,
@@ -12,6 +14,8 @@ import type {
ImageGenerationProviderPlugin,
MediaUnderstandingProviderPlugin,
ProviderPlugin,
RealtimeTranscriptionProviderPlugin,
RealtimeVoiceProviderPlugin,
SpeechProviderPlugin,
WebFetchProviderPlugin,
WebSearchProviderPlugin,
@@ -19,6 +23,8 @@ import type {
import {
loadVitestImageGenerationProviderContractRegistry,
loadVitestMediaUnderstandingProviderContractRegistry,
loadVitestRealtimeTranscriptionProviderContractRegistry,
loadVitestRealtimeVoiceProviderContractRegistry,
loadVitestSpeechProviderContractRegistry,
} from "./speech-vitest-registry.js";
@@ -38,6 +44,9 @@ type WebFetchProviderContractEntry = CapabilityContractEntry<WebFetchProviderPlu
};
type SpeechProviderContractEntry = CapabilityContractEntry<SpeechProviderPlugin>;
type RealtimeTranscriptionProviderContractEntry =
CapabilityContractEntry<RealtimeTranscriptionProviderPlugin>;
type RealtimeVoiceProviderContractEntry = CapabilityContractEntry<RealtimeVoiceProviderPlugin>;
type MediaUnderstandingProviderContractEntry =
CapabilityContractEntry<MediaUnderstandingProviderPlugin>;
type ImageGenerationProviderContractEntry = CapabilityContractEntry<ImageGenerationProviderPlugin>;
@@ -47,6 +56,8 @@ type PluginRegistrationContractEntry = {
cliBackendIds: string[];
providerIds: string[];
speechProviderIds: string[];
realtimeTranscriptionProviderIds: string[];
realtimeVoiceProviderIds: string[];
mediaUnderstandingProviderIds: string[];
imageGenerationProviderIds: string[];
webFetchProviderIds: string[];
@@ -94,6 +105,10 @@ let webSearchProviderContractRegistryByPluginIdCache: Map<
WebSearchProviderContractEntry[]
> | null = null;
let speechProviderContractRegistryCache: SpeechProviderContractEntry[] | null = null;
let realtimeTranscriptionProviderContractRegistryCache:
| RealtimeTranscriptionProviderContractEntry[]
| null = null;
let realtimeVoiceProviderContractRegistryCache: RealtimeVoiceProviderContractEntry[] | null = null;
let mediaUnderstandingProviderContractRegistryCache:
| MediaUnderstandingProviderContractEntry[]
| null = null;
@@ -387,6 +402,36 @@ function loadSpeechProviderContractRegistry(): SpeechProviderContractEntry[] {
return speechProviderContractRegistryCache;
}
function loadRealtimeVoiceProviderContractRegistry(): RealtimeVoiceProviderContractEntry[] {
if (!realtimeVoiceProviderContractRegistryCache) {
realtimeVoiceProviderContractRegistryCache = process.env.VITEST
? loadVitestRealtimeVoiceProviderContractRegistry()
: loadBundledCapabilityRuntimeRegistry({
pluginIds: BUNDLED_REALTIME_VOICE_PLUGIN_IDS,
pluginSdkResolution: "dist",
}).realtimeVoiceProviders.map((entry) => ({
pluginId: entry.pluginId,
provider: entry.provider,
}));
}
return realtimeVoiceProviderContractRegistryCache;
}
function loadRealtimeTranscriptionProviderContractRegistry(): RealtimeTranscriptionProviderContractEntry[] {
if (!realtimeTranscriptionProviderContractRegistryCache) {
realtimeTranscriptionProviderContractRegistryCache = process.env.VITEST
? loadVitestRealtimeTranscriptionProviderContractRegistry()
: loadBundledCapabilityRuntimeRegistry({
pluginIds: BUNDLED_REALTIME_TRANSCRIPTION_PLUGIN_IDS,
pluginSdkResolution: "dist",
}).realtimeTranscriptionProviders.map((entry) => ({
pluginId: entry.pluginId,
provider: entry.provider,
}));
}
return realtimeTranscriptionProviderContractRegistryCache;
}
function loadMediaUnderstandingProviderContractRegistry(): MediaUnderstandingProviderContractEntry[] {
if (!mediaUnderstandingProviderContractRegistryCache) {
mediaUnderstandingProviderContractRegistryCache = process.env.VITEST
@@ -519,6 +564,12 @@ export const speechProviderContractRegistry: SpeechProviderContractEntry[] = cre
loadSpeechProviderContractRegistry,
);
export const realtimeTranscriptionProviderContractRegistry: RealtimeTranscriptionProviderContractEntry[] =
createLazyArrayView(loadRealtimeTranscriptionProviderContractRegistry);
export const realtimeVoiceProviderContractRegistry: RealtimeVoiceProviderContractEntry[] =
createLazyArrayView(loadRealtimeVoiceProviderContractRegistry);
export const mediaUnderstandingProviderContractRegistry: MediaUnderstandingProviderContractEntry[] =
createLazyArrayView(loadMediaUnderstandingProviderContractRegistry);
@@ -531,6 +582,8 @@ function loadPluginRegistrationContractRegistry(): PluginRegistrationContractEnt
cliBackendIds: uniqueStrings(entry.cliBackendIds),
providerIds: uniqueStrings(entry.providerIds),
speechProviderIds: uniqueStrings(entry.speechProviderIds),
realtimeTranscriptionProviderIds: uniqueStrings(entry.realtimeTranscriptionProviderIds),
realtimeVoiceProviderIds: uniqueStrings(entry.realtimeVoiceProviderIds),
mediaUnderstandingProviderIds: uniqueStrings(entry.mediaUnderstandingProviderIds),
imageGenerationProviderIds: uniqueStrings(entry.imageGenerationProviderIds),
webFetchProviderIds: uniqueStrings(entry.webFetchProviderIds),

View File

@@ -5,6 +5,8 @@ import { createJiti } from "jiti";
import {
BUNDLED_IMAGE_GENERATION_PLUGIN_IDS,
BUNDLED_MEDIA_UNDERSTANDING_PLUGIN_IDS,
BUNDLED_REALTIME_TRANSCRIPTION_PLUGIN_IDS,
BUNDLED_REALTIME_VOICE_PLUGIN_IDS,
BUNDLED_SPEECH_PLUGIN_IDS,
} from "../bundled-capability-metadata.js";
import { loadBundledCapabilityRuntimeRegistry } from "../bundled-capability-runtime.js";
@@ -13,6 +15,8 @@ import { buildPluginLoaderAliasMap, buildPluginLoaderJitiOptions } from "../sdk-
import type {
ImageGenerationProviderPlugin,
MediaUnderstandingProviderPlugin,
RealtimeTranscriptionProviderPlugin,
RealtimeVoiceProviderPlugin,
SpeechProviderPlugin,
} from "../types.js";
@@ -26,6 +30,16 @@ export type MediaUnderstandingProviderContractEntry = {
provider: MediaUnderstandingProviderPlugin;
};
export type RealtimeVoiceProviderContractEntry = {
pluginId: string;
provider: RealtimeVoiceProviderPlugin;
};
export type RealtimeTranscriptionProviderContractEntry = {
pluginId: string;
provider: RealtimeTranscriptionProviderPlugin;
};
export type ImageGenerationProviderContractEntry = {
pluginId: string;
provider: ImageGenerationProviderPlugin;
@@ -190,6 +204,96 @@ export function loadVitestMediaUnderstandingProviderContractRegistry(): MediaUnd
return registrations;
}
export function loadVitestRealtimeVoiceProviderContractRegistry(): RealtimeVoiceProviderContractEntry[] {
const registrations: RealtimeVoiceProviderContractEntry[] = [];
const { manifests, unresolvedPluginIds } = resolveTestApiModuleRecords(
BUNDLED_REALTIME_VOICE_PLUGIN_IDS,
);
for (const plugin of manifests) {
if (!plugin.rootDir) {
continue;
}
const testApiPath = path.join(plugin.rootDir, "test-api.ts");
if (!fs.existsSync(testApiPath)) {
continue;
}
const builder = resolveNamedBuilder<RealtimeVoiceProviderPlugin>(
createVitestCapabilityLoader(testApiPath)(testApiPath),
/^build.+RealtimeVoiceProvider$/u,
);
if (!builder) {
continue;
}
registrations.push({
pluginId: plugin.id,
provider: builder(),
});
unresolvedPluginIds.delete(plugin.id);
}
if (unresolvedPluginIds.size === 0) {
return registrations;
}
const runtimeRegistry = loadBundledCapabilityRuntimeRegistry({
pluginIds: [...unresolvedPluginIds],
pluginSdkResolution: "dist",
});
registrations.push(
...runtimeRegistry.realtimeVoiceProviders.map((entry) => ({
pluginId: entry.pluginId,
provider: entry.provider,
})),
);
return registrations;
}
export function loadVitestRealtimeTranscriptionProviderContractRegistry(): RealtimeTranscriptionProviderContractEntry[] {
const registrations: RealtimeTranscriptionProviderContractEntry[] = [];
const { manifests, unresolvedPluginIds } = resolveTestApiModuleRecords(
BUNDLED_REALTIME_TRANSCRIPTION_PLUGIN_IDS,
);
for (const plugin of manifests) {
if (!plugin.rootDir) {
continue;
}
const testApiPath = path.join(plugin.rootDir, "test-api.ts");
if (!fs.existsSync(testApiPath)) {
continue;
}
const builder = resolveNamedBuilder<RealtimeTranscriptionProviderPlugin>(
createVitestCapabilityLoader(testApiPath)(testApiPath),
/^build.+RealtimeTranscriptionProvider$/u,
);
if (!builder) {
continue;
}
registrations.push({
pluginId: plugin.id,
provider: builder(),
});
unresolvedPluginIds.delete(plugin.id);
}
if (unresolvedPluginIds.size === 0) {
return registrations;
}
const runtimeRegistry = loadBundledCapabilityRuntimeRegistry({
pluginIds: [...unresolvedPluginIds],
pluginSdkResolution: "dist",
});
registrations.push(
...runtimeRegistry.realtimeTranscriptionProviders.map((entry) => ({
pluginId: entry.pluginId,
provider: entry.provider,
})),
);
return registrations;
}
export function loadVitestImageGenerationProviderContractRegistry(): ImageGenerationProviderContractEntry[] {
const registrations: ImageGenerationProviderContractEntry[] = [];
const { manifests, unresolvedPluginIds } = resolveTestApiModuleRecords(

View File

@@ -590,6 +590,8 @@ function createPluginRecord(params: {
cliBackendIds: [],
providerIds: [],
speechProviderIds: [],
realtimeTranscriptionProviderIds: [],
realtimeVoiceProviderIds: [],
mediaUnderstandingProviderIds: [],
imageGenerationProviderIds: [],
webFetchProviderIds: [],

View File

@@ -52,6 +52,8 @@ export type PluginManifest = {
export type PluginManifestContracts = {
speechProviders?: string[];
realtimeTranscriptionProviders?: string[];
realtimeVoiceProviders?: string[];
mediaUnderstandingProviders?: string[];
imageGenerationProviders?: string[];
webFetchProviders?: string[];
@@ -125,6 +127,8 @@ function normalizeManifestContracts(value: unknown): PluginManifestContracts | u
}
const speechProviders = normalizeStringList(value.speechProviders);
const realtimeTranscriptionProviders = normalizeStringList(value.realtimeTranscriptionProviders);
const realtimeVoiceProviders = normalizeStringList(value.realtimeVoiceProviders);
const mediaUnderstandingProviders = normalizeStringList(value.mediaUnderstandingProviders);
const imageGenerationProviders = normalizeStringList(value.imageGenerationProviders);
const webFetchProviders = normalizeStringList(value.webFetchProviders);
@@ -132,6 +136,8 @@ function normalizeManifestContracts(value: unknown): PluginManifestContracts | u
const tools = normalizeStringList(value.tools);
const contracts = {
...(speechProviders.length > 0 ? { speechProviders } : {}),
...(realtimeTranscriptionProviders.length > 0 ? { realtimeTranscriptionProviders } : {}),
...(realtimeVoiceProviders.length > 0 ? { realtimeVoiceProviders } : {}),
...(mediaUnderstandingProviders.length > 0 ? { mediaUnderstandingProviders } : {}),
...(imageGenerationProviders.length > 0 ? { imageGenerationProviders } : {}),
...(webFetchProviders.length > 0 ? { webFetchProviders } : {}),

View File

@@ -11,6 +11,8 @@ export function createEmptyPluginRegistry(): PluginRegistry {
providers: [],
cliBackends: [],
speechProviders: [],
realtimeTranscriptionProviders: [],
realtimeVoiceProviders: [],
mediaUnderstandingProviders: [],
imageGenerationProviders: [],
webFetchProviders: [],

View File

@@ -38,7 +38,7 @@ import {
import type {
CliBackendPlugin,
ImageGenerationProviderPlugin,
WebFetchProviderPlugin,
RealtimeTranscriptionProviderPlugin,
OpenClawPluginApi,
OpenClawPluginChannelRegistration,
OpenClawPluginCliCommandDescriptor,
@@ -52,6 +52,7 @@ import type {
OpenClawPluginHookOptions,
MediaUnderstandingProviderPlugin,
ProviderPlugin,
RealtimeVoiceProviderPlugin,
OpenClawPluginService,
OpenClawPluginToolContext,
OpenClawPluginToolFactory,
@@ -67,6 +68,7 @@ import type {
PluginHookHandlerMap,
PluginHookRegistration as TypedPluginHookRegistration,
SpeechProviderPlugin,
WebFetchProviderPlugin,
WebSearchProviderPlugin,
} from "./types.js";
@@ -142,6 +144,10 @@ type PluginOwnedProviderRegistration<T extends { id: string }> = {
export type PluginSpeechProviderRegistration =
PluginOwnedProviderRegistration<SpeechProviderPlugin>;
export type PluginRealtimeTranscriptionProviderRegistration =
PluginOwnedProviderRegistration<RealtimeTranscriptionProviderPlugin>;
export type PluginRealtimeVoiceProviderRegistration =
PluginOwnedProviderRegistration<RealtimeVoiceProviderPlugin>;
export type PluginMediaUnderstandingProviderRegistration =
PluginOwnedProviderRegistration<MediaUnderstandingProviderPlugin>;
export type PluginImageGenerationProviderRegistration =
@@ -213,6 +219,8 @@ export type PluginRecord = {
cliBackendIds: string[];
providerIds: string[];
speechProviderIds: string[];
realtimeTranscriptionProviderIds: string[];
realtimeVoiceProviderIds: string[];
mediaUnderstandingProviderIds: string[];
imageGenerationProviderIds: string[];
webFetchProviderIds: string[];
@@ -239,6 +247,8 @@ export type PluginRegistry = {
providers: PluginProviderRegistration[];
cliBackends?: PluginCliBackendRegistration[];
speechProviders: PluginSpeechProviderRegistration[];
realtimeTranscriptionProviders: PluginRealtimeTranscriptionProviderRegistration[];
realtimeVoiceProviders: PluginRealtimeVoiceProviderRegistration[];
mediaUnderstandingProviders: PluginMediaUnderstandingProviderRegistration[];
imageGenerationProviders: PluginImageGenerationProviderRegistration[];
webFetchProviders: PluginWebFetchProviderRegistration[];
@@ -699,6 +709,32 @@ export function createPluginRegistry(registryParams: PluginRegistryParams) {
});
};
const registerRealtimeTranscriptionProvider = (
record: PluginRecord,
provider: RealtimeTranscriptionProviderPlugin,
) => {
registerUniqueProviderLike({
record,
provider,
kindLabel: "realtime transcription provider",
registrations: registry.realtimeTranscriptionProviders,
ownedIds: record.realtimeTranscriptionProviderIds,
});
};
const registerRealtimeVoiceProvider = (
record: PluginRecord,
provider: RealtimeVoiceProviderPlugin,
) => {
registerUniqueProviderLike({
record,
provider,
kindLabel: "realtime voice provider",
registrations: registry.realtimeVoiceProviders,
ownedIds: record.realtimeVoiceProviderIds,
});
};
const registerMediaUnderstandingProvider = (
record: PluginRecord,
provider: MediaUnderstandingProviderPlugin,
@@ -1009,6 +1045,10 @@ export function createPluginRegistry(registryParams: PluginRegistryParams) {
registerHttpRoute: (routeParams) => registerHttpRoute(record, routeParams),
registerProvider: (provider) => registerProvider(record, provider),
registerSpeechProvider: (provider) => registerSpeechProvider(record, provider),
registerRealtimeTranscriptionProvider: (provider) =>
registerRealtimeTranscriptionProvider(record, provider),
registerRealtimeVoiceProvider: (provider) =>
registerRealtimeVoiceProvider(record, provider),
registerMediaUnderstandingProvider: (provider) =>
registerMediaUnderstandingProvider(record, provider),
registerImageGenerationProvider: (provider) =>
@@ -1198,6 +1238,8 @@ export function createPluginRegistry(registryParams: PluginRegistryParams) {
registerProvider,
registerCliBackend,
registerSpeechProvider,
registerRealtimeTranscriptionProvider,
registerRealtimeVoiceProvider,
registerMediaUnderstandingProvider,
registerImageGenerationProvider,
registerWebSearchProvider,

View File

@@ -199,6 +199,8 @@ describe("setActivePluginRegistry", () => {
cliBackendIds: [],
providerIds: [],
speechProviderIds: [],
realtimeTranscriptionProviderIds: [],
realtimeVoiceProviderIds: [],
mediaUnderstandingProviderIds: [],
imageGenerationProviderIds: [],
webFetchProviderIds: [],
@@ -225,6 +227,8 @@ describe("setActivePluginRegistry", () => {
cliBackendIds: [],
providerIds: [],
speechProviderIds: [],
realtimeTranscriptionProviderIds: [],
realtimeVoiceProviderIds: [],
mediaUnderstandingProviderIds: [],
imageGenerationProviderIds: [],
webFetchProviderIds: [],

View File

@@ -51,6 +51,8 @@ export function createPluginRecord(
cliBackendIds: [],
providerIds: [],
speechProviderIds: [],
realtimeTranscriptionProviderIds: [],
realtimeVoiceProviderIds: [],
mediaUnderstandingProviderIds: [],
imageGenerationProviderIds: [],
webFetchProviderIds: [],
@@ -107,7 +109,7 @@ export function createCustomHook(params: {
export function createPluginLoadResult(
overrides: Partial<PluginLoadResult> & Pick<PluginLoadResult, "plugins"> = { plugins: [] },
): PluginLoadResult {
const { plugins, ...rest } = overrides;
const { plugins, realtimeTranscriptionProviders, realtimeVoiceProviders, ...rest } = overrides;
return {
plugins,
diagnostics: [],
@@ -129,6 +131,8 @@ export function createPluginLoadResult(
commands: [],
conversationBindingResolvedHandlers: [],
...rest,
realtimeTranscriptionProviders: realtimeTranscriptionProviders ?? [],
realtimeVoiceProviders: realtimeVoiceProviders ?? [],
};
}

View File

@@ -28,6 +28,8 @@ export type PluginCapabilityKind =
| "cli-backend"
| "text-inference"
| "speech"
| "realtime-transcription"
| "realtime-voice"
| "media-understanding"
| "image-generation"
| "web-search"
@@ -233,6 +235,8 @@ function buildCapabilityEntries(plugin: PluginRegistry["plugins"][number]) {
{ kind: "cli-backend" as const, ids: plugin.cliBackendIds ?? [] },
{ kind: "text-inference" as const, ids: plugin.providerIds },
{ kind: "speech" as const, ids: plugin.speechProviderIds },
{ kind: "realtime-transcription" as const, ids: plugin.realtimeTranscriptionProviderIds },
{ kind: "realtime-voice" as const, ids: plugin.realtimeVoiceProviderIds },
{ kind: "media-understanding" as const, ids: plugin.mediaUnderstandingProviderIds },
{ kind: "image-generation" as const, ids: plugin.imageGenerationProviderIds },
{ kind: "web-search" as const, ids: plugin.webSearchProviderIds },

View File

@@ -30,6 +30,22 @@ import type { HookEntry } from "../hooks/types.js";
import type { ImageGenerationProvider } from "../image-generation/types.js";
import type { ProviderUsageSnapshot } from "../infra/provider-usage.types.js";
import type { MediaUnderstandingProvider } from "../media-understanding/types.js";
import type {
RealtimeTranscriptionProviderConfig,
RealtimeTranscriptionProviderConfiguredContext,
RealtimeTranscriptionProviderId,
RealtimeTranscriptionProviderResolveConfigContext,
RealtimeTranscriptionSession,
RealtimeTranscriptionSessionCreateRequest,
} from "../realtime-transcription/provider-types.js";
import type {
RealtimeVoiceBridge,
RealtimeVoiceBridgeCreateRequest,
RealtimeVoiceProviderConfig,
RealtimeVoiceProviderConfiguredContext,
RealtimeVoiceProviderId,
RealtimeVoiceProviderResolveConfigContext,
} from "../realtime-voice/provider-types.js";
import type { RuntimeEnv } from "../runtime.js";
import type {
RuntimeWebFetchMetadata,
@@ -1526,6 +1542,38 @@ export type PluginSpeechProviderEntry = SpeechProviderPlugin & {
pluginId: string;
};
/** Realtime transcription capability registered by a plugin. */
export type RealtimeTranscriptionProviderPlugin = {
id: RealtimeTranscriptionProviderId;
label: string;
aliases?: string[];
autoSelectOrder?: number;
resolveConfig?: (
ctx: RealtimeTranscriptionProviderResolveConfigContext,
) => RealtimeTranscriptionProviderConfig;
isConfigured: (ctx: RealtimeTranscriptionProviderConfiguredContext) => boolean;
createSession: (req: RealtimeTranscriptionSessionCreateRequest) => RealtimeTranscriptionSession;
};
export type PluginRealtimeTranscriptionProviderEntry = RealtimeTranscriptionProviderPlugin & {
pluginId: string;
};
/** Realtime voice capability registered by a plugin. */
export type RealtimeVoiceProviderPlugin = {
id: RealtimeVoiceProviderId;
label: string;
aliases?: string[];
autoSelectOrder?: number;
resolveConfig?: (ctx: RealtimeVoiceProviderResolveConfigContext) => RealtimeVoiceProviderConfig;
isConfigured: (ctx: RealtimeVoiceProviderConfiguredContext) => boolean;
createBridge: (req: RealtimeVoiceBridgeCreateRequest) => RealtimeVoiceBridge;
};
export type PluginRealtimeVoiceProviderEntry = RealtimeVoiceProviderPlugin & {
pluginId: string;
};
export type MediaUnderstandingProviderPlugin = MediaUnderstandingProvider;
export type ImageGenerationProviderPlugin = ImageGenerationProvider;
@@ -1850,6 +1898,10 @@ export type OpenClawPluginApi = {
registerProvider: (provider: ProviderPlugin) => void;
/** Register a speech synthesis provider (speech capability). */
registerSpeechProvider: (provider: SpeechProviderPlugin) => void;
/** Register a realtime transcription provider (streaming STT capability). */
registerRealtimeTranscriptionProvider: (provider: RealtimeTranscriptionProviderPlugin) => void;
/** Register a realtime voice provider (duplex voice capability). */
registerRealtimeVoiceProvider: (provider: RealtimeVoiceProviderPlugin) => void;
/** Register a media understanding provider (media understanding capability). */
registerMediaUnderstandingProvider: (provider: MediaUnderstandingProviderPlugin) => void;
/** Register an image generation provider (image generation capability). */

View File

@@ -0,0 +1,80 @@
import type { OpenClawConfig } from "../config/config.js";
import { resolvePluginCapabilityProviders } from "../plugins/capability-provider-runtime.js";
import type { RealtimeTranscriptionProviderPlugin } from "../plugins/types.js";
import type { RealtimeTranscriptionProviderId } from "./provider-types.js";
function trimToUndefined(value: string | undefined): string | undefined {
const trimmed = value?.trim().toLowerCase();
return trimmed ? trimmed : undefined;
}
export function normalizeRealtimeTranscriptionProviderId(
providerId: string | undefined,
): RealtimeTranscriptionProviderId | undefined {
return trimToUndefined(providerId);
}
function resolveRealtimeTranscriptionProviderEntries(
cfg?: OpenClawConfig,
): RealtimeTranscriptionProviderPlugin[] {
return resolvePluginCapabilityProviders({
key: "realtimeTranscriptionProviders",
cfg,
});
}
function buildProviderMaps(cfg?: OpenClawConfig): {
canonical: Map<string, RealtimeTranscriptionProviderPlugin>;
aliases: Map<string, RealtimeTranscriptionProviderPlugin>;
} {
const canonical = new Map<string, RealtimeTranscriptionProviderPlugin>();
const aliases = new Map<string, RealtimeTranscriptionProviderPlugin>();
const register = (provider: RealtimeTranscriptionProviderPlugin) => {
const id = normalizeRealtimeTranscriptionProviderId(provider.id);
if (!id) {
return;
}
canonical.set(id, provider);
aliases.set(id, provider);
for (const alias of provider.aliases ?? []) {
const normalizedAlias = normalizeRealtimeTranscriptionProviderId(alias);
if (normalizedAlias) {
aliases.set(normalizedAlias, provider);
}
}
};
for (const provider of resolveRealtimeTranscriptionProviderEntries(cfg)) {
register(provider);
}
return { canonical, aliases };
}
export function listRealtimeTranscriptionProviders(
cfg?: OpenClawConfig,
): RealtimeTranscriptionProviderPlugin[] {
return [...buildProviderMaps(cfg).canonical.values()];
}
export function getRealtimeTranscriptionProvider(
providerId: string | undefined,
cfg?: OpenClawConfig,
): RealtimeTranscriptionProviderPlugin | undefined {
const normalized = normalizeRealtimeTranscriptionProviderId(providerId);
if (!normalized) {
return undefined;
}
return buildProviderMaps(cfg).aliases.get(normalized);
}
export function canonicalizeRealtimeTranscriptionProviderId(
providerId: string | undefined,
cfg?: OpenClawConfig,
): RealtimeTranscriptionProviderId | undefined {
const normalized = normalizeRealtimeTranscriptionProviderId(providerId);
if (!normalized) {
return undefined;
}
return getRealtimeTranscriptionProvider(normalized, cfg)?.id ?? normalized;
}

View File

@@ -0,0 +1,33 @@
import type { OpenClawConfig } from "../config/config.js";
export type RealtimeTranscriptionProviderId = string;
export type RealtimeTranscriptionProviderConfig = Record<string, unknown>;
export type RealtimeTranscriptionProviderResolveConfigContext = {
cfg: OpenClawConfig;
rawConfig: RealtimeTranscriptionProviderConfig;
};
export type RealtimeTranscriptionProviderConfiguredContext = {
cfg?: OpenClawConfig;
providerConfig: RealtimeTranscriptionProviderConfig;
};
export type RealtimeTranscriptionSessionCallbacks = {
onPartial?: (partial: string) => void;
onTranscript?: (transcript: string) => void;
onSpeechStart?: () => void;
onError?: (error: Error) => void;
};
export type RealtimeTranscriptionSessionCreateRequest = RealtimeTranscriptionSessionCallbacks & {
providerConfig: RealtimeTranscriptionProviderConfig;
};
export type RealtimeTranscriptionSession = {
connect(): Promise<void>;
sendAudio(audio: Buffer): void;
close(): void;
isConnected(): boolean;
};

View File

@@ -0,0 +1,76 @@
import type { OpenClawConfig } from "../config/config.js";
import { resolvePluginCapabilityProviders } from "../plugins/capability-provider-runtime.js";
import type { RealtimeVoiceProviderPlugin } from "../plugins/types.js";
import type { RealtimeVoiceProviderId } from "./provider-types.js";
function trimToUndefined(value: string | undefined): string | undefined {
const trimmed = value?.trim().toLowerCase();
return trimmed ? trimmed : undefined;
}
export function normalizeRealtimeVoiceProviderId(
providerId: string | undefined,
): RealtimeVoiceProviderId | undefined {
return trimToUndefined(providerId);
}
function resolveRealtimeVoiceProviderEntries(cfg?: OpenClawConfig): RealtimeVoiceProviderPlugin[] {
return resolvePluginCapabilityProviders({
key: "realtimeVoiceProviders",
cfg,
});
}
function buildProviderMaps(cfg?: OpenClawConfig): {
canonical: Map<string, RealtimeVoiceProviderPlugin>;
aliases: Map<string, RealtimeVoiceProviderPlugin>;
} {
const canonical = new Map<string, RealtimeVoiceProviderPlugin>();
const aliases = new Map<string, RealtimeVoiceProviderPlugin>();
const register = (provider: RealtimeVoiceProviderPlugin) => {
const id = normalizeRealtimeVoiceProviderId(provider.id);
if (!id) {
return;
}
canonical.set(id, provider);
aliases.set(id, provider);
for (const alias of provider.aliases ?? []) {
const normalizedAlias = normalizeRealtimeVoiceProviderId(alias);
if (normalizedAlias) {
aliases.set(normalizedAlias, provider);
}
}
};
for (const provider of resolveRealtimeVoiceProviderEntries(cfg)) {
register(provider);
}
return { canonical, aliases };
}
export function listRealtimeVoiceProviders(cfg?: OpenClawConfig): RealtimeVoiceProviderPlugin[] {
return [...buildProviderMaps(cfg).canonical.values()];
}
export function getRealtimeVoiceProvider(
providerId: string | undefined,
cfg?: OpenClawConfig,
): RealtimeVoiceProviderPlugin | undefined {
const normalized = normalizeRealtimeVoiceProviderId(providerId);
if (!normalized) {
return undefined;
}
return buildProviderMaps(cfg).aliases.get(normalized);
}
export function canonicalizeRealtimeVoiceProviderId(
providerId: string | undefined,
cfg?: OpenClawConfig,
): RealtimeVoiceProviderId | undefined {
const normalized = normalizeRealtimeVoiceProviderId(providerId);
if (!normalized) {
return undefined;
}
return getRealtimeVoiceProvider(normalized, cfg)?.id ?? normalized;
}

View File

@@ -0,0 +1,66 @@
import type { OpenClawConfig } from "../config/config.js";
export type RealtimeVoiceProviderId = string;
export type RealtimeVoiceRole = "user" | "assistant";
export type RealtimeVoiceCloseReason = "completed" | "error";
export type RealtimeVoiceTool = {
type: "function";
name: string;
description: string;
parameters: {
type: "object";
properties: Record<string, unknown>;
required?: string[];
};
};
export type RealtimeVoiceToolCallEvent = {
itemId: string;
callId: string;
name: string;
args: unknown;
};
export type RealtimeVoiceBridgeCallbacks = {
onAudio: (muLaw: Buffer) => void;
onClearAudio: () => void;
onMark?: (markName: string) => void;
onTranscript?: (role: RealtimeVoiceRole, text: string, isFinal: boolean) => void;
onToolCall?: (event: RealtimeVoiceToolCallEvent) => void;
onReady?: () => void;
onError?: (error: Error) => void;
onClose?: (reason: RealtimeVoiceCloseReason) => void;
};
export type RealtimeVoiceProviderConfig = Record<string, unknown>;
export type RealtimeVoiceProviderResolveConfigContext = {
cfg: OpenClawConfig;
rawConfig: RealtimeVoiceProviderConfig;
};
export type RealtimeVoiceProviderConfiguredContext = {
cfg?: OpenClawConfig;
providerConfig: RealtimeVoiceProviderConfig;
};
export type RealtimeVoiceBridgeCreateRequest = RealtimeVoiceBridgeCallbacks & {
providerConfig: RealtimeVoiceProviderConfig;
instructions?: string;
tools?: RealtimeVoiceTool[];
};
export type RealtimeVoiceBridge = {
connect(): Promise<void>;
sendAudio(audio: Buffer): void;
setMediaTimestamp(ts: number): void;
sendUserMessage?(text: string): void;
triggerGreeting?(instructions?: string): void;
submitToolResult(callId: string, result: unknown): void;
acknowledgeMark(): void;
close(): void;
isConnected(): boolean;
};

View File

@@ -27,6 +27,8 @@ export const createTestRegistry = (channels: TestChannelRegistration[] = []): Pl
})),
providers: [],
speechProviders: [],
realtimeTranscriptionProviders: [],
realtimeVoiceProviders: [],
mediaUnderstandingProviders: [],
imageGenerationProviders: [],
webFetchProviders: [],