From 155f4300babdff2f0e506f9ea878088fd80f5415 Mon Sep 17 00:00:00 2001
From: Vincent Koc <vincentkoc@ieee.org>
Date: Sun, 5 Apr 2026 08:14:41 +0100
Subject: [PATCH] fix(voice-call): use full config for realtime transcription
 (#61224)

* fix(voice-call): use full config for realtime transcription

* fix(changelog): note voice-call transcription regression

* Update CHANGELOG.md
---
 CHANGELOG.md                              |  1 +
 extensions/voice-call/src/runtime.test.ts | 27 +++++++++++++++++++++++
 extensions/voice-call/src/runtime.ts      |  1 +
 extensions/voice-call/src/webhook.ts      |  6 ++++-
 4 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ab10a5f3148..00d4035d78f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -51,6 +51,7 @@ Docs: https://docs.openclaw.ai
 - Status/usage: let `/status` and `session_status` fall back to transcript token totals when the session meta store stayed at zero, so LM Studio, Ollama, DashScope, and similar OpenAI-compatible providers stop showing `Context: 0/...`. (#55041) Thanks @jjjojoj.
 - Providers/Z.AI: preserve explicitly registered `glm-5-*` variants like `glm-5-turbo` instead of intercepting them with the generic GLM-5 forward-compat shim. (#48185) Thanks @haoyu-haoyu.
 - Live model switching: only treat explicit user-driven model changes as pending live switches, so fallback rotation, heartbeat overrides, and compaction no longer trip `LiveSessionModelSwitchError` before making an API call. (#60266) Thanks @kiranvk-2011.
+- Voice-call/OpenAI: pass full plugin config into realtime transcription provider resolution so streaming calls can discover the bundled OpenAI realtime transcription provider again. Fixes #60936. Thanks @sliekens and @vincentkoc.
 - Plugins/OpenAI: enable `gpt-image-1` reference-image edits through `/images/edits` multipart uploads, and stop inferring unsupported resolution overrides when no explicit `size` or `resolution` is provided.
 - Gateway/startup: default `gateway.mode` to `local` when unset, detect PID recycling in gateway lock files on Windows and macOS, and show startup progress so healthy restarts stop getting blocked by stale locks. (#54801, #60085, #59843)
 - Mobile pairing/Android: tighten secure endpoint handling so Tailscale and public remote setup reject cleartext endpoints, private LAN pairing still works, merged-role approvals mint both node and operator device tokens, and bootstrap tokens survive node auto-pair until operator approval finishes. (#60128, #60208, #60221)
diff --git a/extensions/voice-call/src/runtime.test.ts b/extensions/voice-call/src/runtime.test.ts
index ffe9093c4e2..ecb7ab2cb07 100644
--- a/extensions/voice-call/src/runtime.test.ts
+++ b/extensions/voice-call/src/runtime.test.ts
@@ -1,3 +1,4 @@
+import type { OpenClawConfig } from "openclaw/plugin-sdk/core";
 import { beforeEach, describe, expect, it, vi } from "vitest";
 import type { VoiceCallConfig } from "./config.js";
 import type { CoreConfig } from "./core-bridge.js";
@@ -10,6 +11,7 @@ const mocks = vi.hoisted(() => ({
   webhookStart: vi.fn(),
   webhookStop: vi.fn(),
   webhookGetMediaStreamHandler: vi.fn(),
+  webhookCtorArgs: [] as unknown[][],
   startTunnel: vi.fn(),
   setupTailscaleExposure: vi.fn(),
   cleanupTailscaleExposure: vi.fn(),
@@ -28,6 +30,9 @@ vi.mock("./manager.js", () => ({
 
 vi.mock("./webhook.js", () => ({
   VoiceCallWebhookServer: class {
+    constructor(...args: unknown[]) {
+      mocks.webhookCtorArgs.push(args);
+    }
     start = mocks.webhookStart;
     stop = mocks.webhookStop;
     getMediaStreamHandler = mocks.webhookGetMediaStreamHandler;
@@ -58,6 +63,7 @@ describe("createVoiceCallRuntime lifecycle", () => {
     mocks.webhookStart.mockResolvedValue("http://127.0.0.1:3334/voice/webhook");
     mocks.webhookStop.mockResolvedValue(undefined);
     mocks.webhookGetMediaStreamHandler.mockReturnValue(undefined);
+    mocks.webhookCtorArgs.length = 0;
     mocks.startTunnel.mockResolvedValue(null);
     mocks.setupTailscaleExposure.mockResolvedValue(null);
     mocks.cleanupTailscaleExposure.mockResolvedValue(undefined);
@@ -106,4 +112,25 @@ describe("createVoiceCallRuntime lifecycle", () => {
     expect(mocks.cleanupTailscaleExposure).toHaveBeenCalledTimes(1);
     expect(mocks.webhookStop).toHaveBeenCalledTimes(1);
   });
+
+  it("passes fullConfig to the webhook server for streaming provider resolution", async () => {
+    const coreConfig = { messages: { tts: { provider: "openai" } } } as CoreConfig;
+    const fullConfig = {
+      plugins: {
+        entries: {
+          openai: { enabled: true },
+        },
+      },
+    } as OpenClawConfig;
+
+    await createVoiceCallRuntime({
+      config: createBaseConfig(),
+      coreConfig,
+      fullConfig,
+      agentRuntime: {} as never,
+    });
+
+    expect(mocks.webhookCtorArgs[0]?.[3]).toBe(coreConfig);
+    expect(mocks.webhookCtorArgs[0]?.[4]).toBe(fullConfig);
+  });
 });
diff --git a/extensions/voice-call/src/runtime.ts b/extensions/voice-call/src/runtime.ts
index 731073985ec..dc463b6ebbf 100644
--- a/extensions/voice-call/src/runtime.ts
+++ b/extensions/voice-call/src/runtime.ts
@@ -231,6 +231,7 @@ export async function createVoiceCallRuntime(params: {
     manager,
     provider,
     coreConfig,
+    (fullConfig ?? (coreConfig as OpenClawConfig)) as OpenClawConfig,
     agentRuntime,
   );
   if (realtimeProvider) {
diff --git a/extensions/voice-call/src/webhook.ts b/extensions/voice-call/src/webhook.ts
index ef64cfcc4d3..5327066e9a8 100644
--- a/extensions/voice-call/src/webhook.ts
+++ b/extensions/voice-call/src/webhook.ts
@@ -84,6 +84,7 @@ export class VoiceCallWebhookServer {
   private manager: CallManager;
   private provider: VoiceCallProvider;
   private coreConfig: CoreConfig | null;
+  private fullConfig: OpenClawConfig | null;
   private agentRuntime: CoreAgentDeps | null;
   private stopStaleCallReaper: (() => void) | null = null;
   private readonly webhookInFlightLimiter = createWebhookInFlightLimiter();
@@ -100,12 +101,14 @@ export class VoiceCallWebhookServer {
     manager: CallManager,
     provider: VoiceCallProvider,
     coreConfig?: CoreConfig,
+    fullConfig?: OpenClawConfig,
     agentRuntime?: CoreAgentDeps,
   ) {
     this.config = normalizeVoiceCallConfig(config);
     this.manager = manager;
     this.provider = provider;
     this.coreConfig = coreConfig ?? null;
+    this.fullConfig = fullConfig ?? null;
     this.agentRuntime = agentRuntime ?? null;
   }
 
@@ -159,7 +162,8 @@ export class VoiceCallWebhookServer {
    */
   private async initializeMediaStreaming(): Promise<void> {
     const streaming = this.config.streaming;
-    const pluginConfig = this.coreConfig as unknown as OpenClawConfig | undefined;
+    const pluginConfig =
+      this.fullConfig ?? (this.coreConfig as unknown as OpenClawConfig | undefined);
     const { getRealtimeTranscriptionProvider, listRealtimeTranscriptionProviders } =
       await import("./realtime-transcription.runtime.js");
     const resolution = resolveConfiguredCapabilityProvider({