diff --git a/extensions/openai/realtime-transcription-provider.test.ts b/extensions/openai/realtime-transcription-provider.test.ts index b947ce87608..30b0ed6530e 100644 --- a/extensions/openai/realtime-transcription-provider.test.ts +++ b/extensions/openai/realtime-transcription-provider.test.ts @@ -1,13 +1,7 @@ -import { afterEach, describe, expect, it } from "vitest"; +import { describe, expect, it } from "vitest"; import { buildOpenAIRealtimeTranscriptionProvider } from "./realtime-transcription-provider.js"; describe("buildOpenAIRealtimeTranscriptionProvider", () => { - const originalEnv = { ...process.env }; - - afterEach(() => { - process.env = { ...originalEnv }; - }); - it("normalizes OpenAI config defaults", () => { const provider = buildOpenAIRealtimeTranscriptionProvider(); const resolved = provider.resolveConfig?.({ @@ -26,15 +20,19 @@ describe("buildOpenAIRealtimeTranscriptionProvider", () => { }); }); - it("reads provider-owned env fallbacks", () => { - process.env.REALTIME_TRANSCRIPTION_MODEL = "gpt-4o-transcribe"; - process.env.SILENCE_DURATION_MS = "900"; - process.env.VAD_THRESHOLD = "0.45"; - + it("keeps provider-owned transcription settings configurable via raw provider config", () => { const provider = buildOpenAIRealtimeTranscriptionProvider(); const resolved = provider.resolveConfig?.({ cfg: {} as never, - rawConfig: {}, + rawConfig: { + providers: { + openai: { + model: "gpt-4o-transcribe", + silenceDurationMs: 900, + vadThreshold: 0.45, + }, + }, + }, }); expect(resolved).toEqual({ diff --git a/extensions/openai/realtime-transcription-provider.ts b/extensions/openai/realtime-transcription-provider.ts index 984f0dcc847..d4fd8d09350 100644 --- a/extensions/openai/realtime-transcription-provider.ts +++ b/extensions/openai/realtime-transcription-provider.ts @@ -57,21 +57,9 @@ function normalizeProviderConfig( value: raw?.openaiApiKey, path: "plugins.entries.voice-call.config.streaming.openaiApiKey", }), - model: - trimToUndefined(raw?.model) ?? - trimToUndefined(raw?.sttModel) ?? - trimToUndefined(process.env.REALTIME_TRANSCRIPTION_MODEL) ?? - trimToUndefined(process.env.STREAMING_STT_MODEL), - silenceDurationMs: - asNumber(raw?.silenceDurationMs) ?? - (typeof process.env.SILENCE_DURATION_MS === "string" - ? Number.parseInt(process.env.SILENCE_DURATION_MS, 10) - : undefined), - vadThreshold: - asNumber(raw?.vadThreshold) ?? - (typeof process.env.VAD_THRESHOLD === "string" - ? Number.parseFloat(process.env.VAD_THRESHOLD) - : undefined), + model: trimToUndefined(raw?.model) ?? trimToUndefined(raw?.sttModel), + silenceDurationMs: asNumber(raw?.silenceDurationMs), + vadThreshold: asNumber(raw?.vadThreshold), }; } diff --git a/extensions/openai/realtime-voice-provider.test.ts b/extensions/openai/realtime-voice-provider.test.ts index 92803299695..ea2b1c82a8a 100644 --- a/extensions/openai/realtime-voice-provider.test.ts +++ b/extensions/openai/realtime-voice-provider.test.ts @@ -1,24 +1,22 @@ -import { afterEach, describe, expect, it } from "vitest"; +import { describe, expect, it } from "vitest"; import { buildOpenAIRealtimeVoiceProvider } from "./realtime-voice-provider.js"; describe("buildOpenAIRealtimeVoiceProvider", () => { - const originalEnv = { ...process.env }; - - afterEach(() => { - process.env = { ...originalEnv }; - }); - - it("normalizes provider-owned env fallbacks", () => { - process.env.REALTIME_VOICE_MODEL = "gpt-realtime"; - process.env.REALTIME_VOICE_VOICE = "verse"; - process.env.REALTIME_VOICE_TEMPERATURE = "0.6"; - process.env.SILENCE_DURATION_MS = "850"; - process.env.VAD_THRESHOLD = "0.35"; - + it("normalizes provider-owned voice settings from raw provider config", () => { const provider = buildOpenAIRealtimeVoiceProvider(); const resolved = provider.resolveConfig?.({ cfg: {} as never, - rawConfig: {}, + rawConfig: { + providers: { + openai: { + model: "gpt-realtime", + voice: "verse", + temperature: 0.6, + silenceDurationMs: 850, + vadThreshold: 0.35, + }, + }, + }, }); expect(resolved).toEqual({ diff --git a/extensions/openai/realtime-voice-provider.ts b/extensions/openai/realtime-voice-provider.ts index 687b5098a89..40f7259fda8 100644 --- a/extensions/openai/realtime-voice-provider.ts +++ b/extensions/openai/realtime-voice-provider.ts @@ -103,25 +103,11 @@ function normalizeProviderConfig( value: raw?.apiKey, path: "plugins.entries.voice-call.config.realtime.providers.openai.apiKey", }), - model: trimToUndefined(raw?.model) ?? trimToUndefined(process.env.REALTIME_VOICE_MODEL), - voice: (trimToUndefined(raw?.voice) ?? trimToUndefined(process.env.REALTIME_VOICE_VOICE)) as - | OpenAIRealtimeVoice - | undefined, - temperature: - asNumber(raw?.temperature) ?? - (typeof process.env.REALTIME_VOICE_TEMPERATURE === "string" - ? Number.parseFloat(process.env.REALTIME_VOICE_TEMPERATURE) - : undefined), - vadThreshold: - asNumber(raw?.vadThreshold) ?? - (typeof process.env.VAD_THRESHOLD === "string" - ? Number.parseFloat(process.env.VAD_THRESHOLD) - : undefined), - silenceDurationMs: - asNumber(raw?.silenceDurationMs) ?? - (typeof process.env.SILENCE_DURATION_MS === "string" - ? Number.parseInt(process.env.SILENCE_DURATION_MS, 10) - : undefined), + model: trimToUndefined(raw?.model), + voice: trimToUndefined(raw?.voice) as OpenAIRealtimeVoice | undefined, + temperature: asNumber(raw?.temperature), + vadThreshold: asNumber(raw?.vadThreshold), + silenceDurationMs: asNumber(raw?.silenceDurationMs), prefixPaddingMs: asNumber(raw?.prefixPaddingMs), azureEndpoint: trimToUndefined(raw?.azureEndpoint), azureDeployment: trimToUndefined(raw?.azureDeployment), diff --git a/extensions/voice-call/src/config.test.ts b/extensions/voice-call/src/config.test.ts index 966ef8cdf97..1dbb1269f41 100644 --- a/extensions/voice-call/src/config.test.ts +++ b/extensions/voice-call/src/config.test.ts @@ -273,12 +273,6 @@ describe("normalizeVoiceCallConfig", () => { }); describe("resolveVoiceCallConfig", () => { - const originalEnv = { ...process.env }; - - afterEach(() => { - process.env = { ...originalEnv }; - }); - it("keeps legacy streaming OpenAI fields inside providers.openai without forcing provider selection", () => { const resolved = resolveVoiceCallConfig({ enabled: true, @@ -301,14 +295,13 @@ describe("resolveVoiceCallConfig", () => { }); }); - it("maps realtime instructions from the legacy env hook without altering provider selection", () => { - process.env.REALTIME_VOICE_INSTRUCTIONS = "Stay concise."; - + it("preserves configured realtime instructions without env indirection", () => { const resolved = resolveVoiceCallConfig({ enabled: true, provider: "twilio", realtime: { enabled: true, + instructions: "Stay concise.", }, }); diff --git a/extensions/voice-call/src/config.ts b/extensions/voice-call/src/config.ts index 021b7042cf0..29b3e5a4eda 100644 --- a/extensions/voice-call/src/config.ts +++ b/extensions/voice-call/src/config.ts @@ -605,12 +605,6 @@ export function resolveVoiceCallConfig(config: VoiceCallConfigInput): VoiceCallC resolved.streaming = mergeLegacyStreamingOpenAICompat(resolved.streaming); resolved.realtime = mergeLegacyRealtimeOpenAICompat(resolved.realtime); - if ( - typeof resolved.realtime.instructions !== "string" && - typeof process.env.REALTIME_VOICE_INSTRUCTIONS === "string" - ) { - resolved.realtime.instructions = process.env.REALTIME_VOICE_INSTRUCTIONS; - } return normalizeVoiceCallConfig(resolved); }