mirror of
https://github.com/openclaw/openclaw.git
synced 2026-04-11 01:01:13 +00:00
fix(voice-call): use full config for realtime transcription (#61224)
* fix(voice-call): use full config for realtime transcription * fix(changelog): note voice-call transcription regression * Update CHANGELOG.md
This commit is contained in:
@@ -51,6 +51,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Status/usage: let `/status` and `session_status` fall back to transcript token totals when the session meta store stayed at zero, so LM Studio, Ollama, DashScope, and similar OpenAI-compatible providers stop showing `Context: 0/...`. (#55041) Thanks @jjjojoj.
|
||||
- Providers/Z.AI: preserve explicitly registered `glm-5-*` variants like `glm-5-turbo` instead of intercepting them with the generic GLM-5 forward-compat shim. (#48185) Thanks @haoyu-haoyu.
|
||||
- Live model switching: only treat explicit user-driven model changes as pending live switches, so fallback rotation, heartbeat overrides, and compaction no longer trip `LiveSessionModelSwitchError` before making an API call. (#60266) Thanks @kiranvk-2011.
|
||||
- Voice-call/OpenAI: pass full plugin config into realtime transcription provider resolution so streaming calls can discover the bundled OpenAI realtime transcription provider again. Fixes #60936. Thanks @sliekens and @vincentkoc.
|
||||
- Plugins/OpenAI: enable `gpt-image-1` reference-image edits through `/images/edits` multipart uploads, and stop inferring unsupported resolution overrides when no explicit `size` or `resolution` is provided.
|
||||
- Gateway/startup: default `gateway.mode` to `local` when unset, detect PID recycling in gateway lock files on Windows and macOS, and show startup progress so healthy restarts stop getting blocked by stale locks. (#54801, #60085, #59843)
|
||||
- Mobile pairing/Android: tighten secure endpoint handling so Tailscale and public remote setup reject cleartext endpoints, private LAN pairing still works, merged-role approvals mint both node and operator device tokens, and bootstrap tokens survive node auto-pair until operator approval finishes. (#60128, #60208, #60221)
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import type { OpenClawConfig } from "openclaw/plugin-sdk/core";
|
||||
import { beforeEach, describe, expect, it, vi } from "vitest";
|
||||
import type { VoiceCallConfig } from "./config.js";
|
||||
import type { CoreConfig } from "./core-bridge.js";
|
||||
@@ -10,6 +11,7 @@ const mocks = vi.hoisted(() => ({
|
||||
webhookStart: vi.fn(),
|
||||
webhookStop: vi.fn(),
|
||||
webhookGetMediaStreamHandler: vi.fn(),
|
||||
webhookCtorArgs: [] as unknown[][],
|
||||
startTunnel: vi.fn(),
|
||||
setupTailscaleExposure: vi.fn(),
|
||||
cleanupTailscaleExposure: vi.fn(),
|
||||
@@ -28,6 +30,9 @@ vi.mock("./manager.js", () => ({
|
||||
|
||||
vi.mock("./webhook.js", () => ({
|
||||
VoiceCallWebhookServer: class {
|
||||
constructor(...args: unknown[]) {
|
||||
mocks.webhookCtorArgs.push(args);
|
||||
}
|
||||
start = mocks.webhookStart;
|
||||
stop = mocks.webhookStop;
|
||||
getMediaStreamHandler = mocks.webhookGetMediaStreamHandler;
|
||||
@@ -58,6 +63,7 @@ describe("createVoiceCallRuntime lifecycle", () => {
|
||||
mocks.webhookStart.mockResolvedValue("http://127.0.0.1:3334/voice/webhook");
|
||||
mocks.webhookStop.mockResolvedValue(undefined);
|
||||
mocks.webhookGetMediaStreamHandler.mockReturnValue(undefined);
|
||||
mocks.webhookCtorArgs.length = 0;
|
||||
mocks.startTunnel.mockResolvedValue(null);
|
||||
mocks.setupTailscaleExposure.mockResolvedValue(null);
|
||||
mocks.cleanupTailscaleExposure.mockResolvedValue(undefined);
|
||||
@@ -106,4 +112,25 @@ describe("createVoiceCallRuntime lifecycle", () => {
|
||||
expect(mocks.cleanupTailscaleExposure).toHaveBeenCalledTimes(1);
|
||||
expect(mocks.webhookStop).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it("passes fullConfig to the webhook server for streaming provider resolution", async () => {
|
||||
const coreConfig = { messages: { tts: { provider: "openai" } } } as CoreConfig;
|
||||
const fullConfig = {
|
||||
plugins: {
|
||||
entries: {
|
||||
openai: { enabled: true },
|
||||
},
|
||||
},
|
||||
} as OpenClawConfig;
|
||||
|
||||
await createVoiceCallRuntime({
|
||||
config: createBaseConfig(),
|
||||
coreConfig,
|
||||
fullConfig,
|
||||
agentRuntime: {} as never,
|
||||
});
|
||||
|
||||
expect(mocks.webhookCtorArgs[0]?.[3]).toBe(coreConfig);
|
||||
expect(mocks.webhookCtorArgs[0]?.[4]).toBe(fullConfig);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -231,6 +231,7 @@ export async function createVoiceCallRuntime(params: {
|
||||
manager,
|
||||
provider,
|
||||
coreConfig,
|
||||
(fullConfig ?? (coreConfig as OpenClawConfig)) as OpenClawConfig,
|
||||
agentRuntime,
|
||||
);
|
||||
if (realtimeProvider) {
|
||||
|
||||
@@ -84,6 +84,7 @@ export class VoiceCallWebhookServer {
|
||||
private manager: CallManager;
|
||||
private provider: VoiceCallProvider;
|
||||
private coreConfig: CoreConfig | null;
|
||||
private fullConfig: OpenClawConfig | null;
|
||||
private agentRuntime: CoreAgentDeps | null;
|
||||
private stopStaleCallReaper: (() => void) | null = null;
|
||||
private readonly webhookInFlightLimiter = createWebhookInFlightLimiter();
|
||||
@@ -100,12 +101,14 @@ export class VoiceCallWebhookServer {
|
||||
manager: CallManager,
|
||||
provider: VoiceCallProvider,
|
||||
coreConfig?: CoreConfig,
|
||||
fullConfig?: OpenClawConfig,
|
||||
agentRuntime?: CoreAgentDeps,
|
||||
) {
|
||||
this.config = normalizeVoiceCallConfig(config);
|
||||
this.manager = manager;
|
||||
this.provider = provider;
|
||||
this.coreConfig = coreConfig ?? null;
|
||||
this.fullConfig = fullConfig ?? null;
|
||||
this.agentRuntime = agentRuntime ?? null;
|
||||
}
|
||||
|
||||
@@ -159,7 +162,8 @@ export class VoiceCallWebhookServer {
|
||||
*/
|
||||
private async initializeMediaStreaming(): Promise<void> {
|
||||
const streaming = this.config.streaming;
|
||||
const pluginConfig = this.coreConfig as unknown as OpenClawConfig | undefined;
|
||||
const pluginConfig =
|
||||
this.fullConfig ?? (this.coreConfig as unknown as OpenClawConfig | undefined);
|
||||
const { getRealtimeTranscriptionProvider, listRealtimeTranscriptionProviders } =
|
||||
await import("./realtime-transcription.runtime.js");
|
||||
const resolution = resolveConfiguredCapabilityProvider({
|
||||
|
||||
Reference in New Issue
Block a user