refactor: share provider HTTP errors with google

2026-05-06 05:50:43 +00:00 · 2026-04-24 19:32:40 +01:00
parent b1016c39fd
commit a43c1f8807
13 changed files with 197 additions and 16 deletions
--- a/docs/plugins/sdk-provider-plugins.md
+++ b/docs/plugins/sdk-provider-plugins.md
@@ -493,8 +493,10 @@ API key auth, and dynamic model resolution.
    <Tabs>
      <Tab title="Speech (TTS)">
        ```typescript
-        import { postJsonRequest } from "openclaw/plugin-sdk/provider-http";
-        import { assertOkOrThrowProviderError } from "openclaw/plugin-sdk/speech";
+        import {
+          assertOkOrThrowProviderError,
+          postJsonRequest,
+        } from "openclaw/plugin-sdk/provider-http";

        api.registerSpeechProvider({
          id: "acme-ai",
@@ -525,7 +527,7 @@ API key auth, and dynamic model resolution.
        ```

        Use `assertOkOrThrowProviderError(...)` for provider HTTP failures so
-        speech plugins share capped error-body reads, JSON error parsing, and
+        plugins share capped error-body reads, JSON error parsing, and
        request-id suffixes.
      </Tab>
      <Tab title="Realtime transcription">
--- a/docs/plugins/sdk-subpaths.md
+++ b/docs/plugins/sdk-subpaths.md
@@ -94,7 +94,7 @@ For the plugin authoring guide, see [Plugin SDK overview](/plugins/sdk-overview)
    | `plugin-sdk/provider-auth` | `createProviderApiKeyAuthMethod`, `ensureApiKeyFromOptionEnvOrPrompt`, `upsertAuthProfile`, `upsertApiKeyProfile`, `writeOAuthCredentials` |
    | `plugin-sdk/provider-model-shared` | `ProviderReplayFamily`, `buildProviderReplayFamilyHooks`, `normalizeModelCompat`, shared replay-policy builders, provider-endpoint helpers, and model-id normalization helpers such as `normalizeNativeXaiModelId` |
    | `plugin-sdk/provider-catalog-shared` | `findCatalogTemplate`, `buildSingleProviderApiKeyCatalog`, `supportsNativeStreamingUsageCompat`, `applyProviderNativeStreamingUsageCompat` |
-    | `plugin-sdk/provider-http` | Generic provider HTTP/endpoint capability helpers, including audio transcription multipart form helpers |
+    | `plugin-sdk/provider-http` | Generic provider HTTP/endpoint capability helpers, provider HTTP errors, and audio transcription multipart form helpers |
    | `plugin-sdk/provider-web-fetch-contract` | Narrow web-fetch config/selection contract helpers such as `enablePluginInConfig` and `WebFetchProviderPlugin` |
    | `plugin-sdk/provider-web-fetch` | Web-fetch provider registration/cache helpers |
    | `plugin-sdk/provider-web-search-config-contract` | Narrow web-search config/credential helpers for providers that do not need plugin-enable wiring |
@@ -218,8 +218,8 @@ For the plugin authoring guide, see [Plugin SDK overview](/plugins/sdk-overview)
    | `plugin-sdk/media-understanding` | Media understanding provider types plus provider-facing image/audio helper exports |
    | `plugin-sdk/text-runtime` | Shared text/markdown/logging helpers such as assistant-visible-text stripping, markdown render/chunking/table helpers, redaction helpers, directive-tag helpers, and safe-text utilities |
    | `plugin-sdk/text-chunking` | Outbound text chunking helper |
-    | `plugin-sdk/speech` | Speech provider types plus provider-facing directive, registry, validation, and provider HTTP error helpers |
-    | `plugin-sdk/speech-core` | Shared speech provider types, registry, directive, normalization, and provider HTTP error helpers |
+    | `plugin-sdk/speech` | Speech provider types plus provider-facing directive, registry, validation, and speech helper exports |
+    | `plugin-sdk/speech-core` | Shared speech provider types, registry, directive, normalization, and speech helper exports |
    | `plugin-sdk/realtime-transcription` | Realtime transcription provider types, registry helpers, and shared WebSocket session helper |
    | `plugin-sdk/realtime-voice` | Realtime voice provider types and registry helpers |
    | `plugin-sdk/image-generation` | Image generation provider types |
--- a/extensions/elevenlabs/tts.ts
+++ b/extensions/elevenlabs/tts.ts
@@ -1,5 +1,5 @@
+import { assertOkOrThrowProviderError } from "openclaw/plugin-sdk/provider-http";
 import {
-  assertOkOrThrowProviderError,
  normalizeApplyTextNormalization,
  normalizeLanguageCode,
  normalizeSeed,
--- a/extensions/google/google.live.test.ts
+++ b/extensions/google/google.live.test.ts
@@ -0,0 +1,66 @@
+import { describe, expect, it } from "vitest";
+import { isLiveTestEnabled } from "../../src/agents/live-test-helpers.js";
+import {
+  registerProviderPlugin,
+  requireRegisteredProvider,
+} from "../../test/helpers/plugins/provider-registration.js";
+import { normalizeTranscriptForMatch } from "../../test/helpers/stt-live-audio.js";
+import plugin from "./index.js";
+
+const GOOGLE_API_KEY =
+  process.env.GEMINI_API_KEY?.trim() || process.env.GOOGLE_API_KEY?.trim() || "";
+const LIVE = isLiveTestEnabled() && GOOGLE_API_KEY.length > 0;
+const describeLive = LIVE ? describe : describe.skip;
+
+const registerGooglePlugin = () =>
+  registerProviderPlugin({
+    plugin,
+    id: "google",
+    name: "Google Provider",
+  });
+
+describeLive("google plugin live", () => {
+  it("synthesizes speech through the registered provider", async () => {
+    const { speechProviders } = await registerGooglePlugin();
+    const provider = requireRegisteredProvider(speechProviders, "google");
+
+    const audioFile = await provider.synthesize({
+      text: "OpenClaw Google text to speech integration test OK.",
+      cfg: { plugins: { enabled: true } } as never,
+      providerConfig: { apiKey: GOOGLE_API_KEY },
+      target: "audio-file",
+      timeoutMs: 90_000,
+    });
+
+    expect(audioFile.outputFormat).toBe("wav");
+    expect(audioFile.fileExtension).toBe(".wav");
+    expect(audioFile.audioBuffer.byteLength).toBeGreaterThan(512);
+  }, 120_000);
+
+  it("transcribes synthesized speech through the media provider", async () => {
+    const { mediaProviders, speechProviders } = await registerGooglePlugin();
+    const speechProvider = requireRegisteredProvider(speechProviders, "google");
+    const mediaProvider = requireRegisteredProvider(mediaProviders, "google");
+
+    const phrase = "Testing Google audio transcription with OpenClaw.";
+    const audioFile = await speechProvider.synthesize({
+      text: phrase,
+      cfg: { plugins: { enabled: true } } as never,
+      providerConfig: { apiKey: GOOGLE_API_KEY },
+      target: "audio-file",
+      timeoutMs: 90_000,
+    });
+
+    const transcript = await mediaProvider.transcribeAudio?.({
+      buffer: audioFile.audioBuffer,
+      fileName: "google-live.wav",
+      mime: "audio/wav",
+      apiKey: GOOGLE_API_KEY,
+      timeoutMs: 90_000,
+    });
+
+    const normalized = normalizeTranscriptForMatch(transcript?.text ?? "");
+    expect(normalized).toContain("google");
+    expect(normalized).toContain("openclaw");
+  }, 180_000);
+});
--- a/extensions/google/media-understanding-provider.ts
+++ b/extensions/google/media-understanding-provider.ts
@@ -8,7 +8,7 @@ import {
  type VideoDescriptionResult,
 } from "openclaw/plugin-sdk/media-understanding";
 import {
-  assertOkOrThrowHttpError,
+  assertOkOrThrowProviderError,
  postJsonRequest,
  type ProviderRequestTransportOverrides,
 } from "openclaw/plugin-sdk/provider-http";
@@ -96,7 +96,7 @@ async function generateGeminiInlineDataText(params: {
  });

  try {
-    await assertOkOrThrowHttpError(res, params.httpErrorLabel);
+    await assertOkOrThrowProviderError(res, params.httpErrorLabel);

    const payload = (await res.json()) as {
      candidates?: Array<{
--- a/extensions/google/media-understanding-provider.video.test.ts
+++ b/extensions/google/media-understanding-provider.video.test.ts
@@ -4,7 +4,7 @@ import {
  createRequestCaptureJsonFetch,
  installPinnedHostnameTestHooks,
 } from "../../src/media-understanding/audio.test-helpers.js";
-import { describeGeminiVideo } from "./media-understanding-provider.js";
+import { describeGeminiVideo, transcribeGeminiAudio } from "./media-understanding-provider.js";
 import { resolveGoogleGenerativeAiHttpRequestConfig } from "./runtime-api.js";

 installPinnedHostnameTestHooks();
@@ -129,4 +129,30 @@ describe("describeGeminiVideo", () => {
      "Google Generative AI baseUrl must use https://generativelanguage.googleapis.com",
    );
  });
+
+  it("formats Google audio transcription HTTP errors with provider details", async () => {
+    await expect(
+      transcribeGeminiAudio({
+        buffer: Buffer.from("audio-bytes"),
+        fileName: "clip.wav",
+        apiKey: "test-key",
+        timeoutMs: 1500,
+        fetchFn: async () =>
+          new Response(
+            JSON.stringify({
+              error: {
+                message: "Unsupported audio",
+                status: "INVALID_ARGUMENT",
+              },
+            }),
+            {
+              status: 400,
+              headers: { "x-request-id": "google_audio_req" },
+            },
+          ),
+      }),
+    ).rejects.toThrow(
+      "Audio transcription failed (400): Unsupported audio [code=INVALID_ARGUMENT] [request_id=google_audio_req]",
+    );
+  });
 });
--- a/extensions/google/speech-provider.test.ts
+++ b/extensions/google/speech-provider.test.ts
@@ -245,4 +245,37 @@ describe("Google speech provider", () => {
      ]),
    );
  });
+
+  it("formats Google TTS HTTP errors with provider details", async () => {
+    vi.stubGlobal(
+      "fetch",
+      vi.fn().mockResolvedValue(
+        new Response(
+          JSON.stringify({
+            error: {
+              message: "Quota exceeded",
+              status: "RESOURCE_EXHAUSTED",
+            },
+          }),
+          {
+            status: 429,
+            headers: { "x-request-id": "google_req_123" },
+          },
+        ),
+      ),
+    );
+    const provider = buildGoogleSpeechProvider();
+
+    await expect(
+      provider.synthesize({
+        text: "Read this plainly.",
+        cfg: {},
+        providerConfig: { apiKey: "google-test-key" },
+        target: "audio-file",
+        timeoutMs: 10_000,
+      }),
+    ).rejects.toThrow(
+      "Google TTS failed (429): Quota exceeded [code=RESOURCE_EXHAUSTED] [request_id=google_req_123]",
+    );
+  });
 });
--- a/extensions/google/speech-provider.ts
+++ b/extensions/google/speech-provider.ts
@@ -1,4 +1,4 @@
-import { assertOkOrThrowHttpError, postJsonRequest } from "openclaw/plugin-sdk/provider-http";
+import { assertOkOrThrowProviderError, postJsonRequest } from "openclaw/plugin-sdk/provider-http";
 import type { OpenClawConfig } from "openclaw/plugin-sdk/provider-onboard";
 import { normalizeResolvedSecretInputString } from "openclaw/plugin-sdk/secret-input";
 import type {
@@ -281,7 +281,7 @@ async function synthesizeGoogleTtsPcm(params: {
  });

  try {
-    await assertOkOrThrowHttpError(res, "Google TTS failed");
+    await assertOkOrThrowProviderError(res, "Google TTS failed");
    return extractGoogleSpeechPcm((await res.json()) as GoogleGenerateSpeechResponse);
  } finally {
    await release();
--- a/extensions/gradium/tts.ts
+++ b/extensions/gradium/tts.ts
@@ -1,4 +1,4 @@
-import { assertOkOrThrowProviderError } from "openclaw/plugin-sdk/speech";
+import { assertOkOrThrowProviderError } from "openclaw/plugin-sdk/provider-http";
 import { fetchWithSsrFGuard } from "openclaw/plugin-sdk/ssrf-runtime";
 import { normalizeGradiumBaseUrl } from "./shared.js";

--- a/extensions/openai/openai-tts.live.test.ts
+++ b/extensions/openai/openai-tts.live.test.ts
@@ -0,0 +1,44 @@
+import { describe, expect, it } from "vitest";
+import { isLiveTestEnabled } from "../../src/agents/live-test-helpers.js";
+import { buildOpenAISpeechProvider } from "./speech-provider.js";
+
+const OPENAI_API_KEY = process.env.OPENAI_API_KEY?.trim() ?? "";
+const LIVE = isLiveTestEnabled() && OPENAI_API_KEY.length > 0;
+const describeLive = LIVE ? describe : describe.skip;
+
+describeLive("openai tts live", () => {
+  it("synthesizes audio through the speech provider", async () => {
+    const speechProvider = buildOpenAISpeechProvider();
+
+    const voices = await speechProvider.listVoices?.({});
+    expect(voices).toEqual(expect.arrayContaining([expect.objectContaining({ id: "alloy" })]));
+
+    const providerConfig = {
+      apiKey: OPENAI_API_KEY,
+      baseUrl: "https://api.openai.com/v1",
+      model: "gpt-4o-mini-tts",
+      voice: "alloy",
+    };
+
+    const audioFile = await speechProvider.synthesize({
+      text: "OpenClaw OpenAI text to speech integration test OK.",
+      cfg: { plugins: { enabled: true } } as never,
+      providerConfig,
+      target: "audio-file",
+      timeoutMs: 45_000,
+    });
+    expect(audioFile.outputFormat).toBe("mp3");
+    expect(audioFile.fileExtension).toBe(".mp3");
+    expect(audioFile.audioBuffer.byteLength).toBeGreaterThan(512);
+
+    const telephony = await speechProvider.synthesizeTelephony?.({
+      text: "OpenClaw OpenAI telephony integration test OK.",
+      cfg: { plugins: { enabled: true } } as never,
+      providerConfig,
+      timeoutMs: 45_000,
+    });
+    expect(telephony?.outputFormat).toBe("pcm");
+    expect(telephony?.sampleRate).toBe(24_000);
+    expect(telephony?.audioBuffer.byteLength).toBeGreaterThan(512);
+  }, 60_000);
+});
--- a/extensions/openai/tts.ts
+++ b/extensions/openai/tts.ts
@@ -1,8 +1,8 @@
+import { assertOkOrThrowProviderError } from "openclaw/plugin-sdk/provider-http";
 import {
  captureHttpExchange,
  isDebugProxyGlobalFetchPatchInstalled,
 } from "openclaw/plugin-sdk/proxy-capture";
-import { assertOkOrThrowProviderError } from "openclaw/plugin-sdk/speech";
 import {
  fetchWithSsrFGuard,
  ssrfPolicyFromHttpBaseUrlAllowedHostname,
--- a/extensions/xai/tts.ts
+++ b/extensions/xai/tts.ts
@@ -1,5 +1,5 @@
-import { postJsonRequest } from "openclaw/plugin-sdk/provider-http";
-import { assertOkOrThrowProviderError, trimToUndefined } from "openclaw/plugin-sdk/speech";
+import { assertOkOrThrowProviderError, postJsonRequest } from "openclaw/plugin-sdk/provider-http";
+import { trimToUndefined } from "openclaw/plugin-sdk/speech";
 import { XAI_BASE_URL } from "./api.js";
 export { XAI_BASE_URL };

--- a/src/plugin-sdk/provider-http.ts
+++ b/src/plugin-sdk/provider-http.ts
@@ -1,6 +1,16 @@
 // Shared provider-facing HTTP helpers. Keep generic transport utilities here so
 // capability SDKs do not depend on each other.

+export {
+  assertOkOrThrowProviderError,
+  createProviderHttpError,
+  extractProviderErrorDetail,
+  extractProviderRequestId,
+  formatProviderErrorPayload,
+  formatProviderHttpErrorMessage,
+  readResponseTextLimited,
+  truncateErrorDetail,
+} from "../tts/provider-error-utils.js";
 export {
  assertOkOrThrowHttpError,
  buildAudioTranscriptionFormData,