fix(voice-call): use full config for realtime transcription (#61224)

* fix(voice-call): use full config for realtime transcription

* fix(changelog): note voice-call transcription regression

* Update CHANGELOG.md
This commit is contained in:
Vincent Koc
2026-04-05 08:14:41 +01:00
committed by GitHub
parent 42bc411c46
commit 155f4300ba
4 changed files with 34 additions and 1 deletions

View File

@@ -51,6 +51,7 @@ Docs: https://docs.openclaw.ai
- Status/usage: let `/status` and `session_status` fall back to transcript token totals when the session meta store stayed at zero, so LM Studio, Ollama, DashScope, and similar OpenAI-compatible providers stop showing `Context: 0/...`. (#55041) Thanks @jjjojoj.
- Providers/Z.AI: preserve explicitly registered `glm-5-*` variants like `glm-5-turbo` instead of intercepting them with the generic GLM-5 forward-compat shim. (#48185) Thanks @haoyu-haoyu.
- Live model switching: only treat explicit user-driven model changes as pending live switches, so fallback rotation, heartbeat overrides, and compaction no longer trip `LiveSessionModelSwitchError` before making an API call. (#60266) Thanks @kiranvk-2011.
- Voice-call/OpenAI: pass full plugin config into realtime transcription provider resolution so streaming calls can discover the bundled OpenAI realtime transcription provider again. Fixes #60936. Thanks @sliekens and @vincentkoc.
- Plugins/OpenAI: enable `gpt-image-1` reference-image edits through `/images/edits` multipart uploads, and stop inferring unsupported resolution overrides when no explicit `size` or `resolution` is provided.
- Gateway/startup: default `gateway.mode` to `local` when unset, detect PID recycling in gateway lock files on Windows and macOS, and show startup progress so healthy restarts stop getting blocked by stale locks. (#54801, #60085, #59843)
- Mobile pairing/Android: tighten secure endpoint handling so Tailscale and public remote setup reject cleartext endpoints, private LAN pairing still works, merged-role approvals mint both node and operator device tokens, and bootstrap tokens survive node auto-pair until operator approval finishes. (#60128, #60208, #60221)

View File

@@ -1,3 +1,4 @@
import type { OpenClawConfig } from "openclaw/plugin-sdk/core";
import { beforeEach, describe, expect, it, vi } from "vitest";
import type { VoiceCallConfig } from "./config.js";
import type { CoreConfig } from "./core-bridge.js";
@@ -10,6 +11,7 @@ const mocks = vi.hoisted(() => ({
webhookStart: vi.fn(),
webhookStop: vi.fn(),
webhookGetMediaStreamHandler: vi.fn(),
webhookCtorArgs: [] as unknown[][],
startTunnel: vi.fn(),
setupTailscaleExposure: vi.fn(),
cleanupTailscaleExposure: vi.fn(),
@@ -28,6 +30,9 @@ vi.mock("./manager.js", () => ({
vi.mock("./webhook.js", () => ({
VoiceCallWebhookServer: class {
constructor(...args: unknown[]) {
mocks.webhookCtorArgs.push(args);
}
start = mocks.webhookStart;
stop = mocks.webhookStop;
getMediaStreamHandler = mocks.webhookGetMediaStreamHandler;
@@ -58,6 +63,7 @@ describe("createVoiceCallRuntime lifecycle", () => {
mocks.webhookStart.mockResolvedValue("http://127.0.0.1:3334/voice/webhook");
mocks.webhookStop.mockResolvedValue(undefined);
mocks.webhookGetMediaStreamHandler.mockReturnValue(undefined);
mocks.webhookCtorArgs.length = 0;
mocks.startTunnel.mockResolvedValue(null);
mocks.setupTailscaleExposure.mockResolvedValue(null);
mocks.cleanupTailscaleExposure.mockResolvedValue(undefined);
@@ -106,4 +112,25 @@ describe("createVoiceCallRuntime lifecycle", () => {
expect(mocks.cleanupTailscaleExposure).toHaveBeenCalledTimes(1);
expect(mocks.webhookStop).toHaveBeenCalledTimes(1);
});
it("passes fullConfig to the webhook server for streaming provider resolution", async () => {
const coreConfig = { messages: { tts: { provider: "openai" } } } as CoreConfig;
const fullConfig = {
plugins: {
entries: {
openai: { enabled: true },
},
},
} as OpenClawConfig;
await createVoiceCallRuntime({
config: createBaseConfig(),
coreConfig,
fullConfig,
agentRuntime: {} as never,
});
expect(mocks.webhookCtorArgs[0]?.[3]).toBe(coreConfig);
expect(mocks.webhookCtorArgs[0]?.[4]).toBe(fullConfig);
});
});

View File

@@ -231,6 +231,7 @@ export async function createVoiceCallRuntime(params: {
manager,
provider,
coreConfig,
(fullConfig ?? (coreConfig as OpenClawConfig)) as OpenClawConfig,
agentRuntime,
);
if (realtimeProvider) {

View File

@@ -84,6 +84,7 @@ export class VoiceCallWebhookServer {
private manager: CallManager;
private provider: VoiceCallProvider;
private coreConfig: CoreConfig | null;
private fullConfig: OpenClawConfig | null;
private agentRuntime: CoreAgentDeps | null;
private stopStaleCallReaper: (() => void) | null = null;
private readonly webhookInFlightLimiter = createWebhookInFlightLimiter();
@@ -100,12 +101,14 @@ export class VoiceCallWebhookServer {
manager: CallManager,
provider: VoiceCallProvider,
coreConfig?: CoreConfig,
fullConfig?: OpenClawConfig,
agentRuntime?: CoreAgentDeps,
) {
this.config = normalizeVoiceCallConfig(config);
this.manager = manager;
this.provider = provider;
this.coreConfig = coreConfig ?? null;
this.fullConfig = fullConfig ?? null;
this.agentRuntime = agentRuntime ?? null;
}
@@ -159,7 +162,8 @@ export class VoiceCallWebhookServer {
*/
private async initializeMediaStreaming(): Promise<void> {
const streaming = this.config.streaming;
const pluginConfig = this.coreConfig as unknown as OpenClawConfig | undefined;
const pluginConfig =
this.fullConfig ?? (this.coreConfig as unknown as OpenClawConfig | undefined);
const { getRealtimeTranscriptionProvider, listRealtimeTranscriptionProviders } =
await import("./realtime-transcription.runtime.js");
const resolution = resolveConfiguredCapabilityProvider({