From a43c1f880757e830b5fb4d3c7b5532a625f0e3e4 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Fri, 24 Apr 2026 19:32:40 +0100 Subject: [PATCH] refactor: share provider HTTP errors with google --- docs/plugins/sdk-provider-plugins.md | 8 ++- docs/plugins/sdk-subpaths.md | 6 +- extensions/elevenlabs/tts.ts | 2 +- extensions/google/google.live.test.ts | 66 +++++++++++++++++++ .../google/media-understanding-provider.ts | 4 +- ...media-understanding-provider.video.test.ts | 28 +++++++- extensions/google/speech-provider.test.ts | 33 ++++++++++ extensions/google/speech-provider.ts | 4 +- extensions/gradium/tts.ts | 2 +- extensions/openai/openai-tts.live.test.ts | 44 +++++++++++++ extensions/openai/tts.ts | 2 +- extensions/xai/tts.ts | 4 +- src/plugin-sdk/provider-http.ts | 10 +++ 13 files changed, 197 insertions(+), 16 deletions(-) create mode 100644 extensions/google/google.live.test.ts create mode 100644 extensions/openai/openai-tts.live.test.ts diff --git a/docs/plugins/sdk-provider-plugins.md b/docs/plugins/sdk-provider-plugins.md index fbff1dc0847..b34e9611b88 100644 --- a/docs/plugins/sdk-provider-plugins.md +++ b/docs/plugins/sdk-provider-plugins.md @@ -493,8 +493,10 @@ API key auth, and dynamic model resolution. ```typescript - import { postJsonRequest } from "openclaw/plugin-sdk/provider-http"; - import { assertOkOrThrowProviderError } from "openclaw/plugin-sdk/speech"; + import { + assertOkOrThrowProviderError, + postJsonRequest, + } from "openclaw/plugin-sdk/provider-http"; api.registerSpeechProvider({ id: "acme-ai", @@ -525,7 +527,7 @@ API key auth, and dynamic model resolution. ``` Use `assertOkOrThrowProviderError(...)` for provider HTTP failures so - speech plugins share capped error-body reads, JSON error parsing, and + plugins share capped error-body reads, JSON error parsing, and request-id suffixes. diff --git a/docs/plugins/sdk-subpaths.md b/docs/plugins/sdk-subpaths.md index 0635d4a0985..db67fd39c57 100644 --- a/docs/plugins/sdk-subpaths.md +++ b/docs/plugins/sdk-subpaths.md @@ -94,7 +94,7 @@ For the plugin authoring guide, see [Plugin SDK overview](/plugins/sdk-overview) | `plugin-sdk/provider-auth` | `createProviderApiKeyAuthMethod`, `ensureApiKeyFromOptionEnvOrPrompt`, `upsertAuthProfile`, `upsertApiKeyProfile`, `writeOAuthCredentials` | | `plugin-sdk/provider-model-shared` | `ProviderReplayFamily`, `buildProviderReplayFamilyHooks`, `normalizeModelCompat`, shared replay-policy builders, provider-endpoint helpers, and model-id normalization helpers such as `normalizeNativeXaiModelId` | | `plugin-sdk/provider-catalog-shared` | `findCatalogTemplate`, `buildSingleProviderApiKeyCatalog`, `supportsNativeStreamingUsageCompat`, `applyProviderNativeStreamingUsageCompat` | - | `plugin-sdk/provider-http` | Generic provider HTTP/endpoint capability helpers, including audio transcription multipart form helpers | + | `plugin-sdk/provider-http` | Generic provider HTTP/endpoint capability helpers, provider HTTP errors, and audio transcription multipart form helpers | | `plugin-sdk/provider-web-fetch-contract` | Narrow web-fetch config/selection contract helpers such as `enablePluginInConfig` and `WebFetchProviderPlugin` | | `plugin-sdk/provider-web-fetch` | Web-fetch provider registration/cache helpers | | `plugin-sdk/provider-web-search-config-contract` | Narrow web-search config/credential helpers for providers that do not need plugin-enable wiring | @@ -218,8 +218,8 @@ For the plugin authoring guide, see [Plugin SDK overview](/plugins/sdk-overview) | `plugin-sdk/media-understanding` | Media understanding provider types plus provider-facing image/audio helper exports | | `plugin-sdk/text-runtime` | Shared text/markdown/logging helpers such as assistant-visible-text stripping, markdown render/chunking/table helpers, redaction helpers, directive-tag helpers, and safe-text utilities | | `plugin-sdk/text-chunking` | Outbound text chunking helper | - | `plugin-sdk/speech` | Speech provider types plus provider-facing directive, registry, validation, and provider HTTP error helpers | - | `plugin-sdk/speech-core` | Shared speech provider types, registry, directive, normalization, and provider HTTP error helpers | + | `plugin-sdk/speech` | Speech provider types plus provider-facing directive, registry, validation, and speech helper exports | + | `plugin-sdk/speech-core` | Shared speech provider types, registry, directive, normalization, and speech helper exports | | `plugin-sdk/realtime-transcription` | Realtime transcription provider types, registry helpers, and shared WebSocket session helper | | `plugin-sdk/realtime-voice` | Realtime voice provider types and registry helpers | | `plugin-sdk/image-generation` | Image generation provider types | diff --git a/extensions/elevenlabs/tts.ts b/extensions/elevenlabs/tts.ts index c2384596780..d33dcf9e5b1 100644 --- a/extensions/elevenlabs/tts.ts +++ b/extensions/elevenlabs/tts.ts @@ -1,5 +1,5 @@ +import { assertOkOrThrowProviderError } from "openclaw/plugin-sdk/provider-http"; import { - assertOkOrThrowProviderError, normalizeApplyTextNormalization, normalizeLanguageCode, normalizeSeed, diff --git a/extensions/google/google.live.test.ts b/extensions/google/google.live.test.ts new file mode 100644 index 00000000000..efe156b6262 --- /dev/null +++ b/extensions/google/google.live.test.ts @@ -0,0 +1,66 @@ +import { describe, expect, it } from "vitest"; +import { isLiveTestEnabled } from "../../src/agents/live-test-helpers.js"; +import { + registerProviderPlugin, + requireRegisteredProvider, +} from "../../test/helpers/plugins/provider-registration.js"; +import { normalizeTranscriptForMatch } from "../../test/helpers/stt-live-audio.js"; +import plugin from "./index.js"; + +const GOOGLE_API_KEY = + process.env.GEMINI_API_KEY?.trim() || process.env.GOOGLE_API_KEY?.trim() || ""; +const LIVE = isLiveTestEnabled() && GOOGLE_API_KEY.length > 0; +const describeLive = LIVE ? describe : describe.skip; + +const registerGooglePlugin = () => + registerProviderPlugin({ + plugin, + id: "google", + name: "Google Provider", + }); + +describeLive("google plugin live", () => { + it("synthesizes speech through the registered provider", async () => { + const { speechProviders } = await registerGooglePlugin(); + const provider = requireRegisteredProvider(speechProviders, "google"); + + const audioFile = await provider.synthesize({ + text: "OpenClaw Google text to speech integration test OK.", + cfg: { plugins: { enabled: true } } as never, + providerConfig: { apiKey: GOOGLE_API_KEY }, + target: "audio-file", + timeoutMs: 90_000, + }); + + expect(audioFile.outputFormat).toBe("wav"); + expect(audioFile.fileExtension).toBe(".wav"); + expect(audioFile.audioBuffer.byteLength).toBeGreaterThan(512); + }, 120_000); + + it("transcribes synthesized speech through the media provider", async () => { + const { mediaProviders, speechProviders } = await registerGooglePlugin(); + const speechProvider = requireRegisteredProvider(speechProviders, "google"); + const mediaProvider = requireRegisteredProvider(mediaProviders, "google"); + + const phrase = "Testing Google audio transcription with OpenClaw."; + const audioFile = await speechProvider.synthesize({ + text: phrase, + cfg: { plugins: { enabled: true } } as never, + providerConfig: { apiKey: GOOGLE_API_KEY }, + target: "audio-file", + timeoutMs: 90_000, + }); + + const transcript = await mediaProvider.transcribeAudio?.({ + buffer: audioFile.audioBuffer, + fileName: "google-live.wav", + mime: "audio/wav", + apiKey: GOOGLE_API_KEY, + timeoutMs: 90_000, + }); + + const normalized = normalizeTranscriptForMatch(transcript?.text ?? ""); + expect(normalized).toContain("google"); + expect(normalized).toContain("openclaw"); + }, 180_000); +}); diff --git a/extensions/google/media-understanding-provider.ts b/extensions/google/media-understanding-provider.ts index 744a3b60978..0c688a217d8 100644 --- a/extensions/google/media-understanding-provider.ts +++ b/extensions/google/media-understanding-provider.ts @@ -8,7 +8,7 @@ import { type VideoDescriptionResult, } from "openclaw/plugin-sdk/media-understanding"; import { - assertOkOrThrowHttpError, + assertOkOrThrowProviderError, postJsonRequest, type ProviderRequestTransportOverrides, } from "openclaw/plugin-sdk/provider-http"; @@ -96,7 +96,7 @@ async function generateGeminiInlineDataText(params: { }); try { - await assertOkOrThrowHttpError(res, params.httpErrorLabel); + await assertOkOrThrowProviderError(res, params.httpErrorLabel); const payload = (await res.json()) as { candidates?: Array<{ diff --git a/extensions/google/media-understanding-provider.video.test.ts b/extensions/google/media-understanding-provider.video.test.ts index 69b845ae25e..5af2d6cd669 100644 --- a/extensions/google/media-understanding-provider.video.test.ts +++ b/extensions/google/media-understanding-provider.video.test.ts @@ -4,7 +4,7 @@ import { createRequestCaptureJsonFetch, installPinnedHostnameTestHooks, } from "../../src/media-understanding/audio.test-helpers.js"; -import { describeGeminiVideo } from "./media-understanding-provider.js"; +import { describeGeminiVideo, transcribeGeminiAudio } from "./media-understanding-provider.js"; import { resolveGoogleGenerativeAiHttpRequestConfig } from "./runtime-api.js"; installPinnedHostnameTestHooks(); @@ -129,4 +129,30 @@ describe("describeGeminiVideo", () => { "Google Generative AI baseUrl must use https://generativelanguage.googleapis.com", ); }); + + it("formats Google audio transcription HTTP errors with provider details", async () => { + await expect( + transcribeGeminiAudio({ + buffer: Buffer.from("audio-bytes"), + fileName: "clip.wav", + apiKey: "test-key", + timeoutMs: 1500, + fetchFn: async () => + new Response( + JSON.stringify({ + error: { + message: "Unsupported audio", + status: "INVALID_ARGUMENT", + }, + }), + { + status: 400, + headers: { "x-request-id": "google_audio_req" }, + }, + ), + }), + ).rejects.toThrow( + "Audio transcription failed (400): Unsupported audio [code=INVALID_ARGUMENT] [request_id=google_audio_req]", + ); + }); }); diff --git a/extensions/google/speech-provider.test.ts b/extensions/google/speech-provider.test.ts index 29ae0d57d9b..c55deb20610 100644 --- a/extensions/google/speech-provider.test.ts +++ b/extensions/google/speech-provider.test.ts @@ -245,4 +245,37 @@ describe("Google speech provider", () => { ]), ); }); + + it("formats Google TTS HTTP errors with provider details", async () => { + vi.stubGlobal( + "fetch", + vi.fn().mockResolvedValue( + new Response( + JSON.stringify({ + error: { + message: "Quota exceeded", + status: "RESOURCE_EXHAUSTED", + }, + }), + { + status: 429, + headers: { "x-request-id": "google_req_123" }, + }, + ), + ), + ); + const provider = buildGoogleSpeechProvider(); + + await expect( + provider.synthesize({ + text: "Read this plainly.", + cfg: {}, + providerConfig: { apiKey: "google-test-key" }, + target: "audio-file", + timeoutMs: 10_000, + }), + ).rejects.toThrow( + "Google TTS failed (429): Quota exceeded [code=RESOURCE_EXHAUSTED] [request_id=google_req_123]", + ); + }); }); diff --git a/extensions/google/speech-provider.ts b/extensions/google/speech-provider.ts index 0c22fb18f95..ff91c57664a 100644 --- a/extensions/google/speech-provider.ts +++ b/extensions/google/speech-provider.ts @@ -1,4 +1,4 @@ -import { assertOkOrThrowHttpError, postJsonRequest } from "openclaw/plugin-sdk/provider-http"; +import { assertOkOrThrowProviderError, postJsonRequest } from "openclaw/plugin-sdk/provider-http"; import type { OpenClawConfig } from "openclaw/plugin-sdk/provider-onboard"; import { normalizeResolvedSecretInputString } from "openclaw/plugin-sdk/secret-input"; import type { @@ -281,7 +281,7 @@ async function synthesizeGoogleTtsPcm(params: { }); try { - await assertOkOrThrowHttpError(res, "Google TTS failed"); + await assertOkOrThrowProviderError(res, "Google TTS failed"); return extractGoogleSpeechPcm((await res.json()) as GoogleGenerateSpeechResponse); } finally { await release(); diff --git a/extensions/gradium/tts.ts b/extensions/gradium/tts.ts index c3528e403fc..725d74ff736 100644 --- a/extensions/gradium/tts.ts +++ b/extensions/gradium/tts.ts @@ -1,4 +1,4 @@ -import { assertOkOrThrowProviderError } from "openclaw/plugin-sdk/speech"; +import { assertOkOrThrowProviderError } from "openclaw/plugin-sdk/provider-http"; import { fetchWithSsrFGuard } from "openclaw/plugin-sdk/ssrf-runtime"; import { normalizeGradiumBaseUrl } from "./shared.js"; diff --git a/extensions/openai/openai-tts.live.test.ts b/extensions/openai/openai-tts.live.test.ts new file mode 100644 index 00000000000..6f4df3cfde6 --- /dev/null +++ b/extensions/openai/openai-tts.live.test.ts @@ -0,0 +1,44 @@ +import { describe, expect, it } from "vitest"; +import { isLiveTestEnabled } from "../../src/agents/live-test-helpers.js"; +import { buildOpenAISpeechProvider } from "./speech-provider.js"; + +const OPENAI_API_KEY = process.env.OPENAI_API_KEY?.trim() ?? ""; +const LIVE = isLiveTestEnabled() && OPENAI_API_KEY.length > 0; +const describeLive = LIVE ? describe : describe.skip; + +describeLive("openai tts live", () => { + it("synthesizes audio through the speech provider", async () => { + const speechProvider = buildOpenAISpeechProvider(); + + const voices = await speechProvider.listVoices?.({}); + expect(voices).toEqual(expect.arrayContaining([expect.objectContaining({ id: "alloy" })])); + + const providerConfig = { + apiKey: OPENAI_API_KEY, + baseUrl: "https://api.openai.com/v1", + model: "gpt-4o-mini-tts", + voice: "alloy", + }; + + const audioFile = await speechProvider.synthesize({ + text: "OpenClaw OpenAI text to speech integration test OK.", + cfg: { plugins: { enabled: true } } as never, + providerConfig, + target: "audio-file", + timeoutMs: 45_000, + }); + expect(audioFile.outputFormat).toBe("mp3"); + expect(audioFile.fileExtension).toBe(".mp3"); + expect(audioFile.audioBuffer.byteLength).toBeGreaterThan(512); + + const telephony = await speechProvider.synthesizeTelephony?.({ + text: "OpenClaw OpenAI telephony integration test OK.", + cfg: { plugins: { enabled: true } } as never, + providerConfig, + timeoutMs: 45_000, + }); + expect(telephony?.outputFormat).toBe("pcm"); + expect(telephony?.sampleRate).toBe(24_000); + expect(telephony?.audioBuffer.byteLength).toBeGreaterThan(512); + }, 60_000); +}); diff --git a/extensions/openai/tts.ts b/extensions/openai/tts.ts index 2b3cd58c4ba..5947648063b 100644 --- a/extensions/openai/tts.ts +++ b/extensions/openai/tts.ts @@ -1,8 +1,8 @@ +import { assertOkOrThrowProviderError } from "openclaw/plugin-sdk/provider-http"; import { captureHttpExchange, isDebugProxyGlobalFetchPatchInstalled, } from "openclaw/plugin-sdk/proxy-capture"; -import { assertOkOrThrowProviderError } from "openclaw/plugin-sdk/speech"; import { fetchWithSsrFGuard, ssrfPolicyFromHttpBaseUrlAllowedHostname, diff --git a/extensions/xai/tts.ts b/extensions/xai/tts.ts index 1430e9e8007..43c4f229a96 100644 --- a/extensions/xai/tts.ts +++ b/extensions/xai/tts.ts @@ -1,5 +1,5 @@ -import { postJsonRequest } from "openclaw/plugin-sdk/provider-http"; -import { assertOkOrThrowProviderError, trimToUndefined } from "openclaw/plugin-sdk/speech"; +import { assertOkOrThrowProviderError, postJsonRequest } from "openclaw/plugin-sdk/provider-http"; +import { trimToUndefined } from "openclaw/plugin-sdk/speech"; import { XAI_BASE_URL } from "./api.js"; export { XAI_BASE_URL }; diff --git a/src/plugin-sdk/provider-http.ts b/src/plugin-sdk/provider-http.ts index 4af656cc503..a6453228183 100644 --- a/src/plugin-sdk/provider-http.ts +++ b/src/plugin-sdk/provider-http.ts @@ -1,6 +1,16 @@ // Shared provider-facing HTTP helpers. Keep generic transport utilities here so // capability SDKs do not depend on each other. +export { + assertOkOrThrowProviderError, + createProviderHttpError, + extractProviderErrorDetail, + extractProviderRequestId, + formatProviderErrorPayload, + formatProviderHttpErrorMessage, + readResponseTextLimited, + truncateErrorDetail, +} from "../tts/provider-error-utils.js"; export { assertOkOrThrowHttpError, buildAudioTranscriptionFormData,