Fix Google Meet realtime interruption playback (#72524)

Fixes #72523.

Remote proof:
- CI run 24980529154 passed on 29f825bea5.
- Blacksmith Testbox tbx_01kq6tsgbaxgstxmtearwy9n4w passed focused formatting, Google Meet tests, Google realtime provider tests, and extension test typecheck.

Thanks @BsnizND.

Co-authored-by: BSnizND <199837910+BsnizND@users.noreply.github.com>
This commit is contained in:
BsnizND
2026-04-26 23:49:10 -07:00
committed by GitHub
parent 8811112ab3
commit 2785be2604
10 changed files with 373 additions and 23 deletions

View File

@@ -24,6 +24,7 @@ Docs: https://docs.openclaw.ai
- Web search: route plugin-scoped web_search SecretRefs through the active runtime config snapshot so provider execution receives resolved credentials across app/runtime paths, including `plugins.entries.brave.config.webSearch.apiKey`. Fixes #68690. Thanks @VACInc.
- Voice Call: allow SecretRef-backed Twilio auth tokens and call-specific OpenAI/ElevenLabs TTS API keys through the plugin config surface. Fixes #68690. Thanks @joshavant.
- Google Meet: clean stale chrome-node realtime audio bridges by URL before rejoining, expose active node bridge inspection, and tolerate transient node input pull failures instead of dropping the Meet session. Fixes #72371. (#72372) Thanks @BsnizND.
- Google Meet: clear queued Gemini Live playback when realtime interruptions arrive, restart Chrome command-pair audio output after clears, and expose Google Live interruption/VAD config knobs for Meet and Voice Call realtime bridges. Fixes #72523. (#72524) Thanks @BsnizND.
- Matrix/E2EE: stabilize recovery and broken-device QA flows while avoiding Matrix device-cleanup sync races that could leave shutdown-time crypto work running. Thanks @gumadeiras.
- Cron: treat isolated run-level agent failures as job errors even when no reply payload is produced, synthesizing a safe error payload so model/provider failures increment error counters and trigger failure notifications instead of clearing as successful. Fixes #43604; carries forward #43631. Thanks @SPFAdvisors.
- Cron: preserve exact `NO_REPLY` tool results from isolated jobs with empty final assistant turns as quiet successes instead of surfacing incomplete-turn errors. Fixes #68452; carries forward #68453. Thanks @anyech.

View File

@@ -308,6 +308,9 @@ Gemini Live API for backend audio bridges such as Voice Call and Google Meet.
| VAD start sensitivity | `...google.startSensitivity` | (unset) |
| VAD end sensitivity | `...google.endSensitivity` | (unset) |
| Silence duration | `...google.silenceDurationMs` | (unset) |
| Activity handling | `...google.activityHandling` | Google default, `start-of-activity-interrupts` |
| Turn coverage | `...google.turnCoverage` | Google default, `only-activity` |
| Disable auto VAD | `...google.automaticActivityDetectionDisabled` | `false` |
| API key | `...google.apiKey` | Falls back to `models.providers.google.apiKey`, `GEMINI_API_KEY`, or `GOOGLE_API_KEY` |
Example Voice Call realtime config:
@@ -326,6 +329,8 @@ Example Voice Call realtime config:
google: {
model: "gemini-2.5-flash-native-audio-preview-12-2025",
voice: "Kore",
activityHandling: "start-of-activity-interrupts",
turnCoverage: "only-activity",
},
},
},

View File

@@ -217,6 +217,7 @@ type TestBridgeProcess = {
killed: boolean;
kill: ReturnType<typeof vi.fn>;
on: EventEmitter["on"];
emit: EventEmitter["emit"];
};
describe("google-meet plugin", () => {
@@ -1881,6 +1882,7 @@ describe("google-meet plugin", () => {
let callbacks:
| {
onAudio: (audio: Buffer) => void;
onClearAudio: () => void;
onMark?: (markName: string) => void;
onToolCall?: (event: {
itemId: string;
@@ -1916,6 +1918,7 @@ describe("google-meet plugin", () => {
};
const inputStdout = new PassThrough();
const outputStdinWrites: Buffer[] = [];
const replacementOutputStdinWrites: Buffer[] = [];
const makeProcess = (stdio: {
stdin?: { write(chunk: unknown): unknown } | null;
stdout?: { on(event: "data", listener: (chunk: unknown) => void): unknown } | null;
@@ -1937,9 +1940,20 @@ describe("google-meet plugin", () => {
done();
},
});
const replacementOutputStdin = new Writable({
write(chunk, _encoding, done) {
replacementOutputStdinWrites.push(Buffer.from(chunk));
done();
},
});
const inputProcess = makeProcess({ stdout: inputStdout, stdin: null });
const outputProcess = makeProcess({ stdin: outputStdin, stdout: null });
const spawnMock = vi.fn().mockReturnValueOnce(outputProcess).mockReturnValueOnce(inputProcess);
const replacementOutputProcess = makeProcess({ stdin: replacementOutputStdin, stdout: null });
const spawnMock = vi
.fn()
.mockReturnValueOnce(outputProcess)
.mockReturnValueOnce(inputProcess)
.mockReturnValueOnce(replacementOutputProcess);
const sessionStore: Record<string, unknown> = {};
const runtime = {
agent: {
@@ -1977,6 +1991,8 @@ describe("google-meet plugin", () => {
inputStdout.write(Buffer.from([1, 2, 3]));
callbacks?.onAudio(Buffer.from([4, 5]));
callbacks?.onMark?.("mark-1");
callbacks?.onClearAudio();
callbacks?.onAudio(Buffer.from([6, 7]));
callbacks?.onReady?.();
callbacks?.onToolCall?.({
itemId: "item-1",
@@ -1993,6 +2009,10 @@ describe("google-meet plugin", () => {
});
expect(sendAudio).toHaveBeenCalledWith(Buffer.from([1, 2, 3]));
expect(outputStdinWrites).toEqual([Buffer.from([4, 5])]);
expect(outputProcess.kill).toHaveBeenCalledWith("SIGTERM");
expect(replacementOutputStdinWrites).toEqual([Buffer.from([6, 7])]);
outputProcess.emit("error", new Error("stale output process failed after clear"));
expect(bridge.close).not.toHaveBeenCalled();
expect(bridge.acknowledgeMark).toHaveBeenCalled();
expect(bridge.triggerGreeting).not.toHaveBeenCalled();
handle.speak("Say exactly: hello from the meeting.");
@@ -2003,7 +2023,8 @@ describe("google-meet plugin", () => {
audioInputActive: true,
audioOutputActive: true,
lastInputBytes: 3,
lastOutputBytes: 2,
lastOutputBytes: 4,
clearCount: 1,
});
expect(callbacks).toMatchObject({
tools: [
@@ -2035,6 +2056,7 @@ describe("google-meet plugin", () => {
let callbacks:
| {
onAudio: (audio: Buffer) => void;
onClearAudio: () => void;
onToolCall?: (event: {
itemId: string;
callId: string;
@@ -2114,6 +2136,7 @@ describe("google-meet plugin", () => {
});
callbacks?.onAudio(Buffer.from([1, 2, 3]));
callbacks?.onClearAudio();
callbacks?.onReady?.();
callbacks?.onToolCall?.({
itemId: "item-1",
@@ -2138,6 +2161,19 @@ describe("google-meet plugin", () => {
}),
);
});
await vi.waitFor(() => {
expect(runtime.nodes.invoke).toHaveBeenCalledWith(
expect.objectContaining({
nodeId: "node-1",
command: "googlemeet.chrome",
params: {
action: "clearAudio",
bridgeId: "bridge-1",
},
timeoutMs: 5_000,
}),
);
});
await vi.waitFor(() => {
expect(bridge.submitToolResult).toHaveBeenCalledWith("tool-call-1", {
text: "Use the launch update.",
@@ -2166,6 +2202,7 @@ describe("google-meet plugin", () => {
audioOutputActive: true,
lastInputBytes: 3,
lastOutputBytes: 3,
clearCount: 1,
});
await handle.stop();

View File

@@ -40,6 +40,83 @@ vi.mock("node:child_process", async (importOriginal) => {
});
describe("google-meet node host bridge sessions", () => {
it("clears output playback without closing the active bridge when the old output exits", async () => {
const { handleGoogleMeetNodeHostCommand } = await import("./src/node-host.js");
const originalPlatform = process.platform;
children.length = 0;
Object.defineProperty(process, "platform", { configurable: true, value: "darwin" });
try {
const start = JSON.parse(
await handleGoogleMeetNodeHostCommand(
JSON.stringify({
action: "start",
url: "https://meet.google.com/xyz-abcd-uvw",
mode: "realtime",
launch: false,
audioInputCommand: ["mock-rec"],
audioOutputCommand: ["mock-play"],
}),
),
);
expect(children).toHaveLength(2);
const firstOutput = children[0];
const cleared = JSON.parse(
await handleGoogleMeetNodeHostCommand(
JSON.stringify({
action: "clearAudio",
bridgeId: start.bridgeId,
}),
),
);
expect(cleared).toEqual({ bridgeId: start.bridgeId, ok: true, clearCount: 1 });
expect(children).toHaveLength(3);
expect(firstOutput?.kill).toHaveBeenCalledWith("SIGTERM");
firstOutput?.emit("error", new Error("stale output failed after clear"));
firstOutput?.emit("exit", 0, "SIGTERM");
const status = JSON.parse(
await handleGoogleMeetNodeHostCommand(
JSON.stringify({
action: "status",
bridgeId: start.bridgeId,
}),
),
);
expect(status.bridge).toMatchObject({
bridgeId: start.bridgeId,
closed: false,
clearCount: 1,
});
const audio = Buffer.from([1, 2, 3]);
await handleGoogleMeetNodeHostCommand(
JSON.stringify({
action: "pushAudio",
bridgeId: start.bridgeId,
base64: audio.toString("base64"),
}),
);
expect(children[2]?.stdin?.write).toHaveBeenCalledWith(audio);
expect(firstOutput?.stdin?.write).not.toHaveBeenCalled();
await handleGoogleMeetNodeHostCommand(
JSON.stringify({
action: "stop",
bridgeId: start.bridgeId,
}),
);
} finally {
Object.defineProperty(process, "platform", { configurable: true, value: originalPlatform });
}
});
it("lists active bridge sessions and hides closed sessions", async () => {
const { handleGoogleMeetNodeHostCommand } = await import("./src/node-host.js");
const originalPlatform = process.platform;

View File

@@ -15,6 +15,7 @@ type NodeBridgeSession = {
id: string;
url?: string;
mode?: string;
outputCommand: { command: string; args: string[] };
input?: ChildProcess;
output?: ChildProcess;
chunks: Buffer[];
@@ -23,9 +24,11 @@ type NodeBridgeSession = {
createdAt: string;
lastInputAt?: string;
lastOutputAt?: string;
lastClearAt?: string;
lastInputBytes: number;
lastOutputBytes: number;
closedAt?: string;
clearCount: number;
};
const sessions = new Map<string, NodeBridgeSession>();
@@ -110,6 +113,25 @@ function stopSession(session: NodeBridgeSession) {
wake(session);
}
function attachOutputProcessHandlers(session: NodeBridgeSession, outputProcess: ChildProcess) {
outputProcess.on("exit", () => {
if (session.output === outputProcess) {
stopSession(session);
}
});
outputProcess.on("error", () => {
if (session.output === outputProcess) {
stopSession(session);
}
});
}
function startOutputProcess(command: { command: string; args: string[] }) {
return spawn(command.command, command.args, {
stdio: ["pipe", "ignore", "pipe"],
});
}
function startCommandPair(params: {
inputCommand: string[];
outputCommand: string[];
@@ -122,16 +144,16 @@ function startCommandPair(params: {
id: `meet_node_${randomUUID()}`,
url: params.url,
mode: params.mode,
outputCommand: output,
chunks: [],
waiters: [],
closed: false,
createdAt: new Date().toISOString(),
lastInputBytes: 0,
lastOutputBytes: 0,
clearCount: 0,
};
const outputProcess = spawn(output.command, output.args, {
stdio: ["pipe", "ignore", "pipe"],
});
const outputProcess = startOutputProcess(output);
const inputProcess = spawn(input.command, input.args, {
stdio: ["ignore", "pipe", "pipe"],
});
@@ -148,9 +170,8 @@ function startCommandPair(params: {
wake(session);
});
inputProcess.on("exit", () => stopSession(session));
outputProcess.on("exit", () => stopSession(session));
attachOutputProcessHandlers(session, outputProcess);
inputProcess.on("error", () => stopSession(session));
outputProcess.on("error", () => stopSession(session));
sessions.set(session.id, session);
return session;
}
@@ -224,6 +245,25 @@ function pushAudio(params: Record<string, unknown>) {
return { bridgeId, ok: true };
}
function clearAudio(params: Record<string, unknown>) {
const bridgeId = readString(params.bridgeId);
if (!bridgeId) {
throw new Error("bridgeId required");
}
const session = sessions.get(bridgeId);
if (!session || session.closed) {
throw new Error(`bridge is not open: ${bridgeId}`);
}
const previousOutput = session.output;
const outputProcess = startOutputProcess(session.outputCommand);
session.output = outputProcess;
attachOutputProcessHandlers(session, outputProcess);
session.clearCount += 1;
session.lastClearAt = new Date().toISOString();
terminateChild(previousOutput);
return { bridgeId, ok: true, clearCount: session.clearCount };
}
function startChrome(params: Record<string, unknown>) {
const url = readString(params.url);
if (!url) {
@@ -317,8 +357,11 @@ function bridgeStatus(params: Record<string, unknown>) {
createdAt: session.createdAt,
lastInputAt: session.lastInputAt,
lastOutputAt: session.lastOutputAt,
lastClearAt: session.lastClearAt,
lastInputBytes: session.lastInputBytes,
lastOutputBytes: session.lastOutputBytes,
clearCount: session.clearCount,
queuedInputChunks: session.chunks.length,
}
: bridgeId
? { bridgeId, closed: true }
@@ -438,6 +481,9 @@ export async function handleGoogleMeetNodeHostCommand(paramsJSON?: string | null
case "pushAudio":
result = pushAudio(params);
break;
case "clearAudio":
result = clearAudio(params);
break;
case "stop":
result = stopChrome(params);
break;

View File

@@ -50,10 +50,12 @@ export async function startNodeRealtimeAudioBridge(params: {
let realtimeReady = false;
let lastInputAt: string | undefined;
let lastOutputAt: string | undefined;
let lastClearAt: string | undefined;
let lastInputBytes = 0;
let lastOutputBytes = 0;
let consecutiveInputErrors = 0;
let lastInputError: string | undefined;
let clearCount = 0;
const resolved = resolveGoogleMeetRealtimeProvider({
config: params.config,
fullConfig: params.fullConfig,
@@ -118,6 +120,26 @@ export async function startNodeRealtimeAudioBridge(params: {
void stop();
});
},
clearAudio: () => {
lastClearAt = new Date().toISOString();
clearCount += 1;
void params.runtime.nodes
.invoke({
nodeId: params.nodeId,
command: "googlemeet.chrome",
params: {
action: "clearAudio",
bridgeId: params.bridgeId,
},
timeoutMs: 5_000,
})
.catch((error) => {
params.logger.warn(
`[google-meet] node audio clear failed: ${formatErrorMessage(error)}`,
);
void stop();
});
},
},
onTranscript: (role, text, isFinal) => {
if (isFinal) {
@@ -230,10 +252,12 @@ export async function startNodeRealtimeAudioBridge(params: {
audioOutputActive: lastOutputBytes > 0,
lastInputAt,
lastOutputAt,
lastClearAt,
lastInputBytes,
lastOutputBytes,
consecutiveInputErrors,
lastInputError,
clearCount,
bridgeClosed: stopped,
}),
stop,

View File

@@ -91,9 +91,11 @@ export async function startCommandRealtimeAudioBridge(params: {
const spawnFn: SpawnFn =
params.spawn ??
((command, args, options) => spawn(command, args, options) as unknown as BridgeProcess);
const outputProcess = spawnFn(output.command, output.args, {
stdio: ["pipe", "ignore", "pipe"],
});
const spawnOutputProcess = () =>
spawnFn(output.command, output.args, {
stdio: ["pipe", "ignore", "pipe"],
});
let outputProcess = spawnOutputProcess();
const inputProcess = spawnFn(input.command, input.args, {
stdio: ["ignore", "pipe", "pipe"],
});
@@ -104,6 +106,8 @@ export async function startCommandRealtimeAudioBridge(params: {
let lastOutputAt: string | undefined;
let lastInputBytes = 0;
let lastOutputBytes = 0;
let lastClearAt: string | undefined;
let clearCount = 0;
const stop = async () => {
if (stopped) {
@@ -125,26 +129,53 @@ export async function startCommandRealtimeAudioBridge(params: {
params.logger.warn(`[google-meet] ${label} failed: ${formatErrorMessage(error)}`);
void stop();
};
const attachOutputProcessHandlers = (proc: BridgeProcess) => {
proc.on("error", (error) => {
if (proc !== outputProcess) {
return;
}
fail("audio output command")(error);
});
proc.on("exit", (code, signal) => {
if (proc !== outputProcess) {
return;
}
if (!stopped) {
params.logger.warn(
`[google-meet] audio output command exited (${code ?? signal ?? "done"})`,
);
void stop();
}
});
proc.stderr?.on("data", (chunk) => {
params.logger.debug?.(`[google-meet] audio output: ${String(chunk).trim()}`);
});
};
const clearOutputPlayback = () => {
if (stopped) {
return;
}
const previousOutput = outputProcess;
outputProcess = spawnOutputProcess();
attachOutputProcessHandlers(outputProcess);
clearCount += 1;
lastClearAt = new Date().toISOString();
params.logger.debug?.(
`[google-meet] cleared realtime audio output buffer by restarting playback command`,
);
previousOutput.kill("SIGTERM");
};
inputProcess.on("error", fail("audio input command"));
outputProcess.on("error", fail("audio output command"));
inputProcess.on("exit", (code, signal) => {
if (!stopped) {
params.logger.warn(`[google-meet] audio input command exited (${code ?? signal ?? "done"})`);
void stop();
}
});
outputProcess.on("exit", (code, signal) => {
if (!stopped) {
params.logger.warn(`[google-meet] audio output command exited (${code ?? signal ?? "done"})`);
void stop();
}
});
attachOutputProcessHandlers(outputProcess);
inputProcess.stderr?.on("data", (chunk) => {
params.logger.debug?.(`[google-meet] audio input: ${String(chunk).trim()}`);
});
outputProcess.stderr?.on("data", (chunk) => {
params.logger.debug?.(`[google-meet] audio output: ${String(chunk).trim()}`);
});
const resolved = resolveGoogleMeetRealtimeProvider({
config: params.config,
@@ -167,6 +198,7 @@ export async function startCommandRealtimeAudioBridge(params: {
lastOutputBytes += muLaw.byteLength;
outputProcess.stdin?.write(muLaw);
},
clearAudio: clearOutputPlayback,
},
onTranscript: (role, text, isFinal) => {
if (isFinal) {
@@ -240,6 +272,8 @@ export async function startCommandRealtimeAudioBridge(params: {
lastOutputAt,
lastInputBytes,
lastOutputBytes,
lastClearAt,
clearCount,
bridgeClosed: stopped,
}),
stop,

View File

@@ -31,10 +31,13 @@ export type GoogleMeetChromeHealth = {
audioOutputActive?: boolean;
lastInputAt?: string;
lastOutputAt?: string;
lastClearAt?: string;
lastInputBytes?: number;
lastOutputBytes?: number;
consecutiveInputErrors?: number;
lastInputError?: string;
clearCount?: number;
queuedInputChunks?: number;
browserUrl?: string;
browserTitle?: string;
bridgeClosed?: boolean;

View File

@@ -77,6 +77,9 @@ describe("buildGoogleRealtimeVoiceProvider", () => {
temperature: 0.4,
silenceDurationMs: 700,
startSensitivity: "high",
activityHandling: "no_interruption",
turnCoverage: "turn_includes_only_activity",
automaticActivityDetectionDisabled: false,
},
},
},
@@ -92,6 +95,9 @@ describe("buildGoogleRealtimeVoiceProvider", () => {
silenceDurationMs: 700,
startSensitivity: "high",
endSensitivity: undefined,
activityHandling: "no-interruption",
turnCoverage: "only-activity",
automaticActivityDetectionDisabled: false,
enableAffectiveDialog: undefined,
thinkingLevel: undefined,
thinkingBudget: undefined,
@@ -107,6 +113,9 @@ describe("buildGoogleRealtimeVoiceProvider", () => {
voice: "Kore",
temperature: 0.3,
startSensitivity: "low",
endSensitivity: "low",
activityHandling: "no-interruption",
turnCoverage: "only-activity",
},
instructions: "Speak briefly.",
tools: [
@@ -144,6 +153,14 @@ describe("buildGoogleRealtimeVoiceProvider", () => {
},
},
outputAudioTranscription: {},
realtimeInputConfig: {
activityHandling: "NO_INTERRUPTION",
automaticActivityDetection: {
startOfSpeechSensitivity: "START_SENSITIVITY_LOW",
endOfSpeechSensitivity: "END_SENSITIVITY_LOW",
},
turnCoverage: "TURN_INCLUDES_ONLY_ACTIVITY",
},
tools: [
{
functionDeclarations: [
@@ -240,6 +257,28 @@ describe("buildGoogleRealtimeVoiceProvider", () => {
expect(session.sendRealtimeInput).toHaveBeenCalledWith({ audioStreamEnd: true });
});
it("can disable automatic VAD for manual activity signaling experiments", async () => {
const provider = buildGoogleRealtimeVoiceProvider();
const bridge = provider.createBridge({
providerConfig: {
apiKey: "gemini-key",
automaticActivityDetectionDisabled: true,
},
onAudio: vi.fn(),
onClearAudio: vi.fn(),
});
await bridge.connect();
expect(lastConnectParams().config).toMatchObject({
realtimeInputConfig: {
automaticActivityDetection: {
disabled: true,
},
},
});
});
it("sends text prompts as ordered client turns", async () => {
const provider = buildGoogleRealtimeVoiceProvider();
const bridge = provider.createBridge({

View File

@@ -1,8 +1,10 @@
import { randomUUID } from "node:crypto";
import {
ActivityHandling,
EndSensitivity,
Modality,
StartSensitivity,
TurnCoverage,
type FunctionDeclaration,
type FunctionResponse,
type LiveServerContent,
@@ -34,6 +36,8 @@ const DEFAULT_AUDIO_STREAM_END_SILENCE_MS = 700;
type GoogleRealtimeSensitivity = "low" | "high";
type GoogleRealtimeThinkingLevel = "minimal" | "low" | "medium" | "high";
type GoogleRealtimeActivityHandling = "start-of-activity-interrupts" | "no-interruption";
type GoogleRealtimeTurnCoverage = "only-activity" | "all-input" | "audio-activity-and-all-video";
type GoogleRealtimeVoiceProviderConfig = {
apiKey?: string;
@@ -45,6 +49,9 @@ type GoogleRealtimeVoiceProviderConfig = {
silenceDurationMs?: number;
startSensitivity?: GoogleRealtimeSensitivity;
endSensitivity?: GoogleRealtimeSensitivity;
activityHandling?: GoogleRealtimeActivityHandling;
turnCoverage?: GoogleRealtimeTurnCoverage;
automaticActivityDetectionDisabled?: boolean;
enableAffectiveDialog?: boolean;
thinkingLevel?: GoogleRealtimeThinkingLevel;
thinkingBudget?: number;
@@ -60,6 +67,9 @@ type GoogleRealtimeVoiceBridgeConfig = RealtimeVoiceBridgeCreateRequest & {
silenceDurationMs?: number;
startSensitivity?: GoogleRealtimeSensitivity;
endSensitivity?: GoogleRealtimeSensitivity;
activityHandling?: GoogleRealtimeActivityHandling;
turnCoverage?: GoogleRealtimeTurnCoverage;
automaticActivityDetectionDisabled?: boolean;
enableAffectiveDialog?: boolean;
thinkingLevel?: GoogleRealtimeThinkingLevel;
thinkingBudget?: number;
@@ -105,6 +115,40 @@ function asThinkingLevel(value: unknown): GoogleRealtimeThinkingLevel | undefine
: undefined;
}
function asActivityHandling(value: unknown): GoogleRealtimeActivityHandling | undefined {
const normalized = normalizeOptionalString(value)?.toLowerCase().replaceAll("_", "-");
switch (normalized) {
case "start-of-activity-interrupts":
case "start-of-activity-interrupt":
case "interrupt":
case "interrupts":
return "start-of-activity-interrupts";
case "no-interruption":
case "no-interruptions":
case "none":
return "no-interruption";
default:
return undefined;
}
}
function asTurnCoverage(value: unknown): GoogleRealtimeTurnCoverage | undefined {
const normalized = normalizeOptionalString(value)?.toLowerCase().replaceAll("_", "-");
switch (normalized) {
case "only-activity":
case "turn-includes-only-activity":
return "only-activity";
case "all-input":
case "turn-includes-all-input":
return "all-input";
case "audio-activity-and-all-video":
case "turn-includes-audio-activity-and-all-video":
return "audio-activity-and-all-video";
default:
return undefined;
}
}
function resolveGoogleRealtimeProviderConfigRecord(
config: Record<string, unknown>,
): Record<string, unknown> | undefined {
@@ -140,6 +184,9 @@ function normalizeProviderConfig(
silenceDurationMs: asFiniteNumber(raw?.silenceDurationMs),
startSensitivity: asSensitivity(raw?.startSensitivity),
endSensitivity: asSensitivity(raw?.endSensitivity),
activityHandling: asActivityHandling(raw?.activityHandling),
turnCoverage: asTurnCoverage(raw?.turnCoverage),
automaticActivityDetectionDisabled: asBoolean(raw?.automaticActivityDetectionDisabled),
enableAffectiveDialog: asBoolean(raw?.enableAffectiveDialog),
thinkingLevel: asThinkingLevel(raw?.thinkingLevel),
thinkingBudget: asFiniteNumber(raw?.thinkingBudget),
@@ -176,6 +223,32 @@ function mapEndSensitivity(
}
}
function mapActivityHandling(
value: GoogleRealtimeActivityHandling | undefined,
): ActivityHandling | undefined {
switch (value) {
case "no-interruption":
return ActivityHandling.NO_INTERRUPTION;
case "start-of-activity-interrupts":
return ActivityHandling.START_OF_ACTIVITY_INTERRUPTS;
default:
return undefined;
}
}
function mapTurnCoverage(value: GoogleRealtimeTurnCoverage | undefined): TurnCoverage | undefined {
switch (value) {
case "only-activity":
return TurnCoverage.TURN_INCLUDES_ONLY_ACTIVITY;
case "all-input":
return TurnCoverage.TURN_INCLUDES_ALL_INPUT;
case "audio-activity-and-all-video":
return TurnCoverage.TURN_INCLUDES_AUDIO_ACTIVITY_AND_ALL_VIDEO;
default:
return undefined;
}
}
function buildThinkingConfig(config: GoogleRealtimeVoiceBridgeConfig): ThinkingConfig | undefined {
if (config.thinkingLevel) {
return { thinkingLevel: config.thinkingLevel.toUpperCase() as ThinkingConfig["thinkingLevel"] };
@@ -191,7 +264,12 @@ function buildRealtimeInputConfig(
): RealtimeInputConfig | undefined {
const startSensitivity = mapStartSensitivity(config.startSensitivity);
const endSensitivity = mapEndSensitivity(config.endSensitivity);
const activityHandling = mapActivityHandling(config.activityHandling);
const turnCoverage = mapTurnCoverage(config.turnCoverage);
const automaticActivityDetection = {
...(typeof config.automaticActivityDetectionDisabled === "boolean"
? { disabled: config.automaticActivityDetectionDisabled }
: {}),
...(startSensitivity ? { startOfSpeechSensitivity: startSensitivity } : {}),
...(endSensitivity ? { endOfSpeechSensitivity: endSensitivity } : {}),
...(typeof config.prefixPaddingMs === "number"
@@ -201,9 +279,12 @@ function buildRealtimeInputConfig(
? { silenceDurationMs: Math.max(0, Math.floor(config.silenceDurationMs)) }
: {}),
};
return Object.keys(automaticActivityDetection).length > 0
? { automaticActivityDetection }
: undefined;
const realtimeInputConfig = {
...(Object.keys(automaticActivityDetection).length > 0 ? { automaticActivityDetection } : {}),
...(activityHandling ? { activityHandling } : {}),
...(turnCoverage ? { turnCoverage } : {}),
};
return Object.keys(realtimeInputConfig).length > 0 ? realtimeInputConfig : undefined;
}
function buildFunctionDeclarations(tools: RealtimeVoiceTool[] | undefined): FunctionDeclaration[] {
@@ -519,6 +600,9 @@ export function buildGoogleRealtimeVoiceProvider(): RealtimeVoiceProviderPlugin
silenceDurationMs: config.silenceDurationMs,
startSensitivity: config.startSensitivity,
endSensitivity: config.endSensitivity,
activityHandling: config.activityHandling,
turnCoverage: config.turnCoverage,
automaticActivityDetectionDisabled: config.automaticActivityDetectionDisabled,
enableAffectiveDialog: config.enableAffectiveDialog,
thinkingLevel: config.thinkingLevel,
thinkingBudget: config.thinkingBudget,