import { mkdtemp, rm, writeFile } from "node:fs/promises"; import { tmpdir } from "node:os"; import path from "node:path"; import { GoogleGenAI, Modality } from "@google/genai"; import { chromium, type Browser } from "playwright"; import { createServer, type ViteDevServer } from "vite"; const OPENAI_REALTIME_MODEL = process.env.OPENCLAW_REALTIME_OPENAI_MODEL?.trim() || "gpt-realtime-1.5"; const OPENAI_REALTIME_VOICE = process.env.OPENCLAW_REALTIME_OPENAI_VOICE?.trim() || "alloy"; const GOOGLE_REALTIME_MODEL = process.env.OPENCLAW_REALTIME_GOOGLE_MODEL?.trim() || "gemini-2.5-flash-native-audio-preview-12-2025"; const GOOGLE_REALTIME_VOICE = process.env.OPENCLAW_REALTIME_GOOGLE_VOICE?.trim() || "Kore"; const GOOGLE_LIVE_WS_URL = "wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContentConstrained"; type SmokeResult = { name: string; ok: boolean; details?: Record; }; function getEnv(name: string): string | undefined { const value = process.env[name]?.trim(); return value ? value : undefined; } function shortError(error: unknown): string { return error instanceof Error ? error.message : String(error); } async function readBoundedText(response: Response): Promise { const text = await response.text(); return text.length > 600 ? `${text.slice(0, 600)}...` : text; } function printResult(result: SmokeResult): void { console.log(`${result.name}: ${result.ok ? "ok" : "failed"}`, result.details ?? {}); } function compareStrings(left: string | undefined, right: string | undefined): number { return (left ?? "").localeCompare(right ?? ""); } async function createOpenAIClientSecret(apiKey: string): Promise { const response = await fetch("https://api.openai.com/v1/realtime/client_secrets", { method: "POST", headers: { Authorization: `Bearer ${apiKey}`, "Content-Type": "application/json", }, body: JSON.stringify({ session: { type: "realtime", model: OPENAI_REALTIME_MODEL, audio: { output: { voice: OPENAI_REALTIME_VOICE }, }, }, }), }); if (!response.ok) { throw new Error( `OpenAI Realtime client secret failed (${response.status}): ${await readBoundedText( response, )}`, ); } const payload = (await response.json()) as Record; const nested = payload.client_secret && typeof payload.client_secret === "object" ? (payload.client_secret as Record) : undefined; const value = typeof payload.value === "string" ? payload.value : undefined; const nestedValue = typeof nested?.value === "string" ? nested.value : undefined; const secret = value ?? nestedValue; if (!secret) { throw new Error("OpenAI Realtime client secret response did not include a value"); } return secret; } async function smokeOpenAIWebRtc(browser: Browser, apiKey: string): Promise { try { const clientSecret = await createOpenAIClientSecret(apiKey); const context = await browser.newContext({ permissions: ["microphone"], }); const page = await context.newPage(); const result = await page.evaluate( async ({ clientSecret: secret }) => { let media: MediaStream; if (navigator.mediaDevices?.getUserMedia) { media = await navigator.mediaDevices.getUserMedia({ audio: true }); } else { const audioContext = new AudioContext(); const destination = audioContext.createMediaStreamDestination(); const oscillator = audioContext.createOscillator(); oscillator.connect(destination); oscillator.start(); media = destination.stream; } const peer = new RTCPeerConnection(); for (const track of media.getAudioTracks()) { peer.addTrack(track, media); } const channel = peer.createDataChannel("oai-events"); const connectionState = new Promise((resolve) => { const timeout = window.setTimeout(() => resolve(peer.connectionState), 12_000); peer.addEventListener("connectionstatechange", () => { if (peer.connectionState === "connected" || peer.connectionState === "failed") { window.clearTimeout(timeout); resolve(peer.connectionState); } }); channel.addEventListener("open", () => { window.clearTimeout(timeout); resolve(peer.connectionState || "data-channel-open"); }); }); const offer = await peer.createOffer(); await peer.setLocalDescription(offer); const response = await fetch("https://api.openai.com/v1/realtime/calls", { method: "POST", body: offer.sdp, headers: { Authorization: `Bearer ${secret}`, "Content-Type": "application/sdp", }, }); if (!response.ok) { throw new Error(`OpenAI Realtime SDP offer failed (${response.status})`); } const answer = await response.text(); await peer.setRemoteDescription({ type: "answer", sdp: answer }); const state = await connectionState; peer.close(); media.getTracks().forEach((track) => track.stop()); return { answerHasAudio: answer.includes("m=audio"), remoteDescriptionApplied: peer.remoteDescription?.type === "answer", connectionState: state, }; }, { clientSecret }, ); await context.close(); return { name: "openai-webrtc-browser", ok: result.answerHasAudio && result.remoteDescriptionApplied, details: { model: OPENAI_REALTIME_MODEL, answerHasAudio: result.answerHasAudio, remoteDescriptionApplied: result.remoteDescriptionApplied, connectionState: result.connectionState, }, }; } catch (error) { return { name: "openai-webrtc-browser", ok: false, details: { error: shortError(error) } }; } } async function createGoogleLiveToken(apiKey: string): Promise { const ai = new GoogleGenAI({ apiKey, httpOptions: { apiVersion: "v1alpha" }, }); const now = Date.now(); const token = await ai.authTokens.create({ config: { uses: 1, expireTime: new Date(now + 30 * 60 * 1000).toISOString(), newSessionExpireTime: new Date(now + 60 * 1000).toISOString(), liveConnectConstraints: { model: GOOGLE_REALTIME_MODEL, config: { responseModalities: [Modality.AUDIO], speechConfig: { voiceConfig: { prebuiltVoiceConfig: { voiceName: GOOGLE_REALTIME_VOICE }, }, }, systemInstruction: "OpenClaw browser Talk live smoke.", inputAudioTranscription: {}, outputAudioTranscription: {}, }, }, }, }); const name = token.name?.trim(); if (!name) { throw new Error("Google Live auth token response did not include a token name"); } return name; } async function smokeGoogleLiveBrowserWs(browser: Browser, apiKey: string): Promise { try { const token = await createGoogleLiveToken(apiKey); const page = await browser.newPage(); await page.evaluate("globalThis.__name = (fn) => fn"); const result = await page.evaluate( async ({ model, tokenName, websocketUrl }) => { const debug: { opened: boolean; messages: string[]; close?: { code: number; reason: string }; error: boolean; } = { opened: false, messages: [], error: false }; const dataToText = async (data: unknown): Promise => { if (typeof data === "string") { return data; } if (data instanceof Blob) { return await data.text(); } if (data instanceof ArrayBuffer) { return new TextDecoder().decode(data); } return String(data); }; const url = new URL(websocketUrl); url.searchParams.set("access_token", tokenName); const ws = new WebSocket(url.toString()); const done = new Promise>((resolve, reject) => { const timeout = window.setTimeout( () => reject(new Error(`Google Live setup timed out: ${JSON.stringify(debug)}`)), 15_000, ); ws.addEventListener("open", () => { debug.opened = true; ws.send( JSON.stringify({ setup: { model: model.startsWith("models/") ? model : `models/${model}`, generationConfig: { responseModalities: ["AUDIO"] }, inputAudioTranscription: {}, outputAudioTranscription: {}, }, }), ); }); ws.addEventListener("message", (event) => { void (async () => { const text = await dataToText(event.data); debug.messages.push(text.slice(0, 300)); const message = JSON.parse(text) as { setupComplete?: unknown }; if (!message.setupComplete) { return; } window.clearTimeout(timeout); resolve({ setupComplete: true, readyState: ws.readyState }); })().catch((error) => { window.clearTimeout(timeout); reject(error); }); }); ws.addEventListener("error", () => { debug.error = true; window.clearTimeout(timeout); reject(new Error("Google Live browser WebSocket errored")); }); ws.addEventListener("close", (event) => { debug.close = { code: event.code, reason: event.reason }; if (event.code !== 1000) { window.clearTimeout(timeout); reject(new Error(`Google Live browser WebSocket closed: ${JSON.stringify(debug)}`)); } }); }); const value = await done; ws.close(1000); return value; }, { model: GOOGLE_REALTIME_MODEL, tokenName: token, websocketUrl: GOOGLE_LIVE_WS_URL, }, ); await page.close(); return { name: "google-live-browser-ws", ok: result.setupComplete === true, details: { model: GOOGLE_REALTIME_MODEL, setupComplete: result.setupComplete === true }, }; } catch (error) { return { name: "google-live-browser-ws", ok: false, details: { error: shortError(error) } }; } } async function smokeGatewayRelayBrowser(browser: Browser): Promise { let server: ViteDevServer | undefined; const dir = await mkdtemp(path.join(tmpdir(), "openclaw-realtime-talk-")); try { const repoRoot = process.cwd().replaceAll("\\", "/"); await writeFile( path.join(dir, "index.html"), '', ); await writeFile( path.join(dir, "main.ts"), ` import { GatewayRelayRealtimeTalkTransport } from "/@fs/${repoRoot}/ui/src/ui/chat/realtime-talk-gateway-relay.ts"; const delay = (ms) => new Promise((resolve) => setTimeout(resolve, ms)); const listeners = new Set(); const requests = []; const statuses = []; const transcripts = []; function emit(event) { for (const listener of [...listeners]) { listener(event); } } function base64ZeroPcm(bytes) { let text = ""; for (let index = 0; index < bytes; index += 1) { text += String.fromCharCode(0); } return btoa(text); } const client = { addEventListener(listener) { listeners.add(listener); return () => listeners.delete(listener); }, async request(method, params) { requests.push({ method, params }); if (method === "chat.send") { const runId = params.idempotencyKey || "run-smoke"; window.setTimeout(() => { emit({ event: "chat", payload: { runId, state: "final", message: { text: "relay consult ok" } } }); }, 50); return { runId }; } return { ok: true }; }, }; try { const transport = new GatewayRelayRealtimeTalkTransport( { provider: "smoke", transport: "gateway-relay", relaySessionId: "relay-live-smoke", audio: { inputEncoding: "pcm16", inputSampleRateHz: 24000, outputEncoding: "pcm16", outputSampleRateHz: 24000, }, }, { client, sessionKey: "main", callbacks: { onStatus: (status, detail) => statuses.push({ status, detail }), onTranscript: (entry) => transcripts.push(entry), }, }, ); await transport.start(); emit({ event: "talk.realtime.relay", payload: { relaySessionId: "relay-live-smoke", type: "ready" } }); emit({ event: "talk.realtime.relay", payload: { relaySessionId: "relay-live-smoke", type: "transcript", role: "user", text: "relay user", final: true }, }); emit({ event: "talk.realtime.relay", payload: { relaySessionId: "relay-live-smoke", type: "transcript", role: "assistant", text: "relay assistant", final: false }, }); emit({ event: "talk.realtime.relay", payload: { relaySessionId: "relay-live-smoke", type: "audio", audioBase64: base64ZeroPcm(480) }, }); const processor = transport.inputProcessor; processor?.onaudioprocess?.({ inputBuffer: { getChannelData: () => new Float32Array(160).fill(0.01) }, }); emit({ event: "talk.realtime.relay", payload: { relaySessionId: "relay-live-smoke", type: "mark" } }); emit({ event: "talk.realtime.relay", payload: { relaySessionId: "relay-live-smoke", type: "toolCall", callId: "call-smoke", name: "openclaw_agent_consult", args: { question: "confirm relay consult path" }, }, }); await delay(400); transport.stop(); await delay(100); window.__relaySmokeResult = { requests, statuses, transcripts }; window.__relaySmokeDone = true; } catch (error) { window.__relaySmokeResult = { error: error instanceof Error ? error.message : String(error), requests, statuses, transcripts }; window.__relaySmokeDone = true; } `, ); server = await createServer({ root: dir, logLevel: "silent", server: { host: "127.0.0.1", port: 0 }, }); await server.listen(); const address = server.httpServer?.address(); if (!address || typeof address === "string") { throw new Error("Vite did not expose a local port"); } const url = `http://127.0.0.1:${address.port}/`; const context = await browser.newContext({ permissions: ["microphone"] }); await context.grantPermissions(["microphone"], { origin: url }); const page = await context.newPage(); await page.goto(url); await page.waitForFunction(() => globalThis.__relaySmokeDone === true, undefined, { timeout: 15_000, }); const result = (await page.evaluate(() => globalThis.__relaySmokeResult)) as { error?: string; requests?: Array<{ method?: string }>; statuses?: Array<{ status?: string }>; transcripts?: Array<{ role?: string; text?: string }>; }; await context.close(); if (result.error) { throw new Error(result.error); } const methods = new Set((result.requests ?? []).map((request) => request.method)); const statusNames = new Set((result.statuses ?? []).map((entry) => entry.status)); const transcriptTexts = new Set((result.transcripts ?? []).map((entry) => entry.text)); const expectedMethods = [ "talk.realtime.relayAudio", "talk.realtime.relayMark", "talk.realtime.relayToolResult", "talk.realtime.relayStop", ]; const ok = expectedMethods.every((method) => methods.has(method)) && statusNames.has("listening") && statusNames.has("thinking") && transcriptTexts.has("relay user") && transcriptTexts.has("relay assistant"); return { name: "gateway-relay-browser-adapter", ok, details: { methods: [...methods].toSorted(compareStrings), statuses: [...statusNames].toSorted(compareStrings), transcripts: [...transcriptTexts].toSorted(compareStrings), }, }; } catch (error) { return { name: "gateway-relay-browser-adapter", ok: false, details: { error: shortError(error) }, }; } finally { await server?.close(); await rm(dir, { recursive: true, force: true }); } } async function main(): Promise { const openAIKey = getEnv("OPENAI_API_KEY"); const googleKey = getEnv("GEMINI_API_KEY") ?? getEnv("GOOGLE_API_KEY"); const browser = await chromium.launch({ headless: true, args: [ "--autoplay-policy=no-user-gesture-required", "--no-sandbox", "--use-fake-device-for-media-stream", "--use-fake-ui-for-media-stream", ], }); const results: SmokeResult[] = []; try { if (!openAIKey) { results.push({ name: "openai-webrtc-browser", ok: false, details: { error: "OPENAI_API_KEY missing" }, }); } else { results.push(await smokeOpenAIWebRtc(browser, openAIKey)); } if (!googleKey) { results.push({ name: "google-live-browser-ws", ok: false, details: { error: "GEMINI_API_KEY or GOOGLE_API_KEY missing" }, }); } else { results.push(await smokeGoogleLiveBrowserWs(browser, googleKey)); } results.push(await smokeGatewayRelayBrowser(browser)); } finally { await browser.close(); } for (const result of results) { printResult(result); } if (results.some((result) => !result.ok)) { process.exitCode = 1; } } await main();