From cab66c55561a683664df14452493bfe2c7e4e8fd Mon Sep 17 00:00:00 2001 From: Rohan Shiralkar Date: Sun, 26 Apr 2026 00:51:49 +0530 Subject: [PATCH] fix(google): honor models.providers.google.request.allowPrivateNetwork in TTS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Image generation and media understanding both thread the sanitized models.providers.google.request config (including allowPrivateNetwork) into resolveGoogleGenerativeAiHttpRequestConfig. Speech synthesis omitted that arg, so TTS always saw allowPrivateNetwork: false regardless of config — silently falling back to a different speech provider when the configured Google TTS endpoint resolved to a private/internal IP (proxies, custom backends, test mocks). Mirror the image-generation-provider pattern: thread request through synthesizeGoogleTtsPcm at both call sites (synthesize and synthesizeTelephony). Follow-up to #67216. --- extensions/google/speech-provider.test.ts | 29 +++++++++++++++++++++++ extensions/google/speech-provider.ts | 14 ++++++++++- 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/extensions/google/speech-provider.test.ts b/extensions/google/speech-provider.test.ts index f64e90364f4..752669d96e8 100644 --- a/extensions/google/speech-provider.test.ts +++ b/extensions/google/speech-provider.test.ts @@ -1,3 +1,4 @@ +import * as providerHttp from "openclaw/plugin-sdk/provider-http"; import { afterEach, describe, expect, it, vi } from "vitest"; import { buildGoogleSpeechProvider, __testing } from "./speech-provider.js"; @@ -315,4 +316,32 @@ describe("Google speech provider", () => { "Google TTS failed (429): Quota exceeded [code=RESOURCE_EXHAUSTED] [request_id=google_req_123]", ); }); + + it("honors configured private-network opt-in for Google TTS", async () => { + installGoogleTtsFetchMock(); + const postJsonRequestSpy = vi.spyOn(providerHttp, "postJsonRequest"); + + const provider = buildGoogleSpeechProvider(); + await provider.synthesize({ + text: "hello", + cfg: { + models: { + providers: { + google: { + baseUrl: "https://generativelanguage.googleapis.com/v1beta", + request: { allowPrivateNetwork: true }, + models: [], + }, + }, + }, + }, + providerConfig: { apiKey: "google-test-key" }, + target: "audio-file", + timeoutMs: 12_345, + }); + + expect(postJsonRequestSpy).toHaveBeenCalledWith( + expect.objectContaining({ allowPrivateNetwork: true }), + ); + }); }); diff --git a/extensions/google/speech-provider.ts b/extensions/google/speech-provider.ts index a34a8907916..fccc7afe7b2 100644 --- a/extensions/google/speech-provider.ts +++ b/extensions/google/speech-provider.ts @@ -1,4 +1,8 @@ -import { assertOkOrThrowProviderError, postJsonRequest } from "openclaw/plugin-sdk/provider-http"; +import { + assertOkOrThrowProviderError, + postJsonRequest, + sanitizeConfiguredModelProviderRequest, +} from "openclaw/plugin-sdk/provider-http"; import type { OpenClawConfig } from "openclaw/plugin-sdk/provider-onboard"; import { normalizeResolvedSecretInputString } from "openclaw/plugin-sdk/secret-input"; import type { @@ -264,6 +268,7 @@ async function synthesizeGoogleTtsPcm(params: { text: string; apiKey: string; baseUrl?: string; + request?: ReturnType; model: string; voiceName: string; audioProfile?: string; @@ -274,6 +279,7 @@ async function synthesizeGoogleTtsPcm(params: { resolveGoogleGenerativeAiHttpRequestConfig({ apiKey: params.apiKey, baseUrl: params.baseUrl, + request: params.request, capability: "audio", transport: "http", }); @@ -379,6 +385,9 @@ export function buildGoogleSpeechProvider(): SpeechProviderPlugin { text: req.text, apiKey, baseUrl: resolveGoogleTtsBaseUrl({ cfg: req.cfg, providerConfig: config }), + request: sanitizeConfiguredModelProviderRequest( + req.cfg?.models?.providers?.google?.request, + ), model: normalizeGoogleTtsModel(overrides.model ?? config.model), voiceName: normalizeGoogleTtsVoiceName(overrides.voiceName ?? config.voiceName), audioProfile: overrides.audioProfile ?? config.audioProfile, @@ -405,6 +414,9 @@ export function buildGoogleSpeechProvider(): SpeechProviderPlugin { text: req.text, apiKey, baseUrl: resolveGoogleTtsBaseUrl({ cfg: req.cfg, providerConfig: config }), + request: sanitizeConfiguredModelProviderRequest( + req.cfg?.models?.providers?.google?.request, + ), model: config.model, voiceName: config.voiceName, audioProfile: config.audioProfile,