feat: expose talk-capable realtime providers

This commit is contained in:
Peter Steinberger
2026-05-05 20:59:23 +01:00
parent c90c68c636
commit 7225a2678e
4 changed files with 79 additions and 5 deletions

View File

@@ -65,6 +65,27 @@ describe("buildGoogleRealtimeVoiceProvider", () => {
delete process.env.GOOGLE_API_KEY;
});
it("declares realtime Talk capabilities for catalog selection", () => {
const provider = buildGoogleRealtimeVoiceProvider();
expect(provider.capabilities).toEqual({
transports: ["provider-websocket", "gateway-relay"],
inputAudioFormats: [
{ encoding: "g711_ulaw", sampleRateHz: 8000, channels: 1 },
{ encoding: "pcm16", sampleRateHz: 24000, channels: 1 },
],
outputAudioFormats: [
{ encoding: "g711_ulaw", sampleRateHz: 8000, channels: 1 },
{ encoding: "pcm16", sampleRateHz: 24000, channels: 1 },
],
supportsBrowserSession: true,
supportsBargeIn: true,
supportsToolCalls: true,
supportsVideoFrames: true,
supportsSessionResumption: true,
});
});
it("normalizes provider config and cfg model-provider key fallback", () => {
const provider = buildGoogleRealtimeVoiceProvider();
const resolved = provider.resolveConfig?.({
@@ -294,7 +315,7 @@ describe("buildGoogleRealtimeVoiceProvider", () => {
});
expect(session).toMatchObject({
provider: "google",
transport: "json-pcm-websocket",
transport: "provider-websocket",
protocol: "google-live-bidi",
clientSecret: "auth_tokens/browser-session",
websocketUrl:

View File

@@ -32,6 +32,7 @@ import {
convertPcmToMulaw8k,
mulawToPcm,
REALTIME_VOICE_AUDIO_FORMAT_G711_ULAW_8KHZ,
REALTIME_VOICE_AUDIO_FORMAT_PCM16_24KHZ,
REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME,
resamplePcm,
} from "openclaw/plugin-sdk/realtime-voice";
@@ -877,7 +878,7 @@ async function createGoogleRealtimeBrowserSession(
return {
provider: "google",
transport: "json-pcm-websocket",
transport: "provider-websocket",
protocol: "google-live-bidi",
clientSecret,
websocketUrl: GOOGLE_REALTIME_BROWSER_WEBSOCKET_URL,
@@ -900,6 +901,22 @@ export function buildGoogleRealtimeVoiceProvider(): RealtimeVoiceProviderPlugin
label: "Google Live Voice",
defaultModel: GOOGLE_REALTIME_DEFAULT_MODEL,
autoSelectOrder: 20,
capabilities: {
transports: ["provider-websocket", "gateway-relay"],
inputAudioFormats: [
REALTIME_VOICE_AUDIO_FORMAT_G711_ULAW_8KHZ,
REALTIME_VOICE_AUDIO_FORMAT_PCM16_24KHZ,
],
outputAudioFormats: [
REALTIME_VOICE_AUDIO_FORMAT_G711_ULAW_8KHZ,
REALTIME_VOICE_AUDIO_FORMAT_PCM16_24KHZ,
],
supportsBrowserSession: true,
supportsBargeIn: true,
supportsToolCalls: true,
supportsVideoFrames: true,
supportsSessionResumption: true,
},
resolveConfig: ({ cfg, rawConfig }) => normalizeProviderConfig(rawConfig, cfg),
isConfigured: ({ providerConfig }) =>
Boolean(normalizeProviderConfig(providerConfig).apiKey || resolveEnvApiKey()),

View File

@@ -114,6 +114,25 @@ describe("buildOpenAIRealtimeVoiceProvider", () => {
vi.unstubAllEnvs();
});
it("declares realtime Talk capabilities for catalog selection", () => {
const provider = buildOpenAIRealtimeVoiceProvider();
expect(provider.capabilities).toEqual({
transports: ["webrtc", "gateway-relay"],
inputAudioFormats: [
{ encoding: "g711_ulaw", sampleRateHz: 8000, channels: 1 },
{ encoding: "pcm16", sampleRateHz: 24000, channels: 1 },
],
outputAudioFormats: [
{ encoding: "g711_ulaw", sampleRateHz: 8000, channels: 1 },
{ encoding: "pcm16", sampleRateHz: 24000, channels: 1 },
],
supportsBrowserSession: true,
supportsBargeIn: true,
supportsToolCalls: true,
});
});
it("adds OpenClaw attribution headers to native realtime websocket requests", () => {
vi.stubEnv("OPENCLAW_VERSION", "2026.3.22");
const provider = buildOpenAIRealtimeVoiceProvider();
@@ -192,7 +211,7 @@ describe("buildOpenAIRealtimeVoiceProvider", () => {
});
expect(session).toMatchObject({
provider: "openai",
transport: "webrtc-sdp",
transport: "webrtc",
clientSecret: "client-secret-123",
offerUrl: "https://api.openai.com/v1/realtime/calls",
});

View File

@@ -20,7 +20,10 @@ import type {
RealtimeVoiceProviderPlugin,
RealtimeVoiceTool,
} from "openclaw/plugin-sdk/realtime-voice";
import { REALTIME_VOICE_AUDIO_FORMAT_G711_ULAW_8KHZ } from "openclaw/plugin-sdk/realtime-voice";
import {
REALTIME_VOICE_AUDIO_FORMAT_G711_ULAW_8KHZ,
REALTIME_VOICE_AUDIO_FORMAT_PCM16_24KHZ,
} from "openclaw/plugin-sdk/realtime-voice";
import {
normalizeResolvedSecretInputString,
normalizeSecretInputString,
@@ -857,7 +860,7 @@ async function createOpenAIRealtimeBrowserSession(
const offerHeaders = resolveOpenAIRealtimeBrowserOfferHeaders();
return {
provider: "openai",
transport: "webrtc-sdp",
transport: "webrtc",
clientSecret,
offerUrl: "https://api.openai.com/v1/realtime/calls",
...(offerHeaders ? { offerHeaders } : {}),
@@ -873,6 +876,20 @@ export function buildOpenAIRealtimeVoiceProvider(): RealtimeVoiceProviderPlugin
label: "OpenAI Realtime Voice",
defaultModel: OPENAI_REALTIME_DEFAULT_MODEL,
autoSelectOrder: 10,
capabilities: {
transports: ["webrtc", "gateway-relay"],
inputAudioFormats: [
REALTIME_VOICE_AUDIO_FORMAT_G711_ULAW_8KHZ,
REALTIME_VOICE_AUDIO_FORMAT_PCM16_24KHZ,
],
outputAudioFormats: [
REALTIME_VOICE_AUDIO_FORMAT_G711_ULAW_8KHZ,
REALTIME_VOICE_AUDIO_FORMAT_PCM16_24KHZ,
],
supportsBrowserSession: true,
supportsBargeIn: true,
supportsToolCalls: true,
},
resolveConfig: ({ rawConfig }) => normalizeProviderConfig(rawConfig),
isConfigured: ({ providerConfig }) =>
hasOpenAIRealtimeApiKeyInput(normalizeProviderConfig(providerConfig).apiKey),