fix: guard speech provider fetches

2026-05-06 19:40:42 +00:00 · 2026-04-24 20:51:18 +01:00
parent 25ad66520b
commit 7425cb0549
3 changed files with 120 additions and 80 deletions
--- a/extensions/elevenlabs/speech-provider.ts
+++ b/extensions/elevenlabs/speech-provider.ts
@@ -18,6 +18,10 @@ import {
  requireInRange,
  trimToUndefined,
 } from "openclaw/plugin-sdk/speech";
 import {
  fetchWithSsrFGuard,
  ssrfPolicyFromHttpBaseUrlAllowedHostname,
 } from "openclaw/plugin-sdk/ssrf-runtime";
 import { normalizeLowercaseStringOrEmpty } from "openclaw/plugin-sdk/text-runtime";
 import { resolveElevenLabsApiKeyWithProfileFallback } from "./config-api.js";
 import { isValidElevenLabsVoiceId, normalizeElevenLabsBaseUrl } from "./shared.js";
@@ -293,30 +297,40 @@ export async function listElevenLabsVoices(params: {
  apiKey: string;
  baseUrl?: string;
 }): Promise<SpeechVoiceOption[]> {
-  const res = await fetch(`${normalizeElevenLabsBaseUrl(params.baseUrl)}/v1/voices`, {
+  const normalizedBaseUrl = normalizeElevenLabsBaseUrl(params.baseUrl);
-    headers: {
+  const { response, release } = await fetchWithSsrFGuard({
-      "xi-api-key": params.apiKey,
+    url: `${normalizedBaseUrl}/v1/voices`,
    init: {
      headers: {
        "xi-api-key": params.apiKey,
      },
    },
    policy: ssrfPolicyFromHttpBaseUrlAllowedHostname(normalizedBaseUrl),
    auditContext: "elevenlabs.voices",
  });
-  await assertOkOrThrowProviderError(res, "ElevenLabs voices API error");
+  try {
-  const json = (await res.json()) as {
+    await assertOkOrThrowProviderError(response, "ElevenLabs voices API error");
-    voices?: Array<{
+    const json = (await response.json()) as {
-      voice_id?: string;
+      voices?: Array<{
-      name?: string;
+        voice_id?: string;
-      category?: string;
+        name?: string;
-      description?: string;
+        category?: string;
-    }>;
+        description?: string;
-  };
+      }>;
-  return Array.isArray(json.voices)
+    };
-    ? json.voices
+    return Array.isArray(json.voices)
-        .map((voice) => ({
+      ? json.voices
-          id: voice.voice_id?.trim() ?? "",
+          .map((voice) => ({
-          name: trimToUndefined(voice.name),
+            id: voice.voice_id?.trim() ?? "",
-          category: trimToUndefined(voice.category),
+            name: trimToUndefined(voice.name),
-          description: trimToUndefined(voice.description),
+            category: trimToUndefined(voice.category),
-        }))
+            description: trimToUndefined(voice.description),
-        .filter((voice) => voice.id.length > 0)
+          }))
-    : [];
+          .filter((voice) => voice.id.length > 0)
      : [];
  } finally {
    await release();
  }
 }
 export function buildElevenLabsSpeechProvider(): SpeechProviderPlugin {
--- a/extensions/microsoft/speech-provider.ts
+++ b/extensions/microsoft/speech-provider.ts
@@ -17,6 +17,10 @@ import type {
  SpeechVoiceOption,
 } from "openclaw/plugin-sdk/speech";
 import { asBoolean, asFiniteNumber, asObject, trimToUndefined } from "openclaw/plugin-sdk/speech";
 import {
  fetchWithSsrFGuard,
  ssrfPolicyFromHttpBaseUrlAllowedHostname,
 } from "openclaw/plugin-sdk/ssrf-runtime";
 import { resolvePreferredOpenClawTmpDir } from "openclaw/plugin-sdk/temp-path";
 import { edgeTTS, inferEdgeExtension } from "./tts.js";
@@ -138,39 +142,48 @@ export async function listMicrosoftVoices(): Promise<SpeechVoiceOption[]> {
    "https://speech.platform.bing.com/consumer/speech/synthesize/readaloud/voices/list" +
    `?trustedclienttoken=${TRUSTED_CLIENT_TOKEN}`;
  const headers = buildMicrosoftVoiceHeaders();
-  const response = await fetch(url, {
+  const { response, release } = await fetchWithSsrFGuard({
-    headers,
+    url,
    init: {
      headers,
    },
    policy: ssrfPolicyFromHttpBaseUrlAllowedHostname("https://speech.platform.bing.com"),
    auditContext: "microsoft.speech.voices",
  });
-  if (!isDebugProxyGlobalFetchPatchInstalled()) {
+  try {
-    captureHttpExchange({
+    if (!isDebugProxyGlobalFetchPatchInstalled()) {
-      url,
+      captureHttpExchange({
-      method: "GET",
+        url,
-      requestHeaders: headers,
+        method: "GET",
-      response,
+        requestHeaders: headers,
-      transport: "http",
+        response,
-      meta: {
+        transport: "http",
-        provider: "microsoft",
+        meta: {
-        capability: "speech-voices",
+          provider: "microsoft",
-      },
+          capability: "speech-voices",
-    });
+        },
      });
    }
    await assertOkOrThrowProviderError(response, "Microsoft voices API error");
    const voices = (await response.json()) as MicrosoftVoiceListEntry[];
    return Array.isArray(voices)
      ? voices
          .map((voice) => ({
            id: voice.ShortName?.trim() ?? "",
            name: trimToUndefined(voice.FriendlyName) ?? trimToUndefined(voice.ShortName),
            category: voice.VoiceTag?.ContentCategories?.find((value) => value.trim().length > 0),
            description: formatMicrosoftVoiceDescription(voice),
            locale: trimToUndefined(voice.Locale),
            gender: trimToUndefined(voice.Gender),
            personalities: voice.VoiceTag?.VoicePersonalities?.filter(
              (value): value is string => value.trim().length > 0,
            ),
          }))
          .filter((voice) => voice.id.length > 0)
      : [];
  } finally {
    await release();
  }
  await assertOkOrThrowProviderError(response, "Microsoft voices API error");
  const voices = (await response.json()) as MicrosoftVoiceListEntry[];
  return Array.isArray(voices)
    ? voices
        .map((voice) => ({
          id: voice.ShortName?.trim() ?? "",
          name: trimToUndefined(voice.FriendlyName) ?? trimToUndefined(voice.ShortName),
          category: voice.VoiceTag?.ContentCategories?.find((value) => value.trim().length > 0),
          description: formatMicrosoftVoiceDescription(voice),
          locale: trimToUndefined(voice.Locale),
          gender: trimToUndefined(voice.Gender),
          personalities: voice.VoiceTag?.VoicePersonalities?.filter(
            (value): value is string => value.trim().length > 0,
          ),
        }))
        .filter((voice) => voice.id.length > 0)
    : [];
 }
 export function buildMicrosoftSpeechProvider(): SpeechProviderPlugin {
--- a/extensions/minimax/tts.ts
+++ b/extensions/minimax/tts.ts
@@ -1,4 +1,8 @@
 import { assertOkOrThrowProviderError } from "openclaw/plugin-sdk/provider-http";
 import {
  fetchWithSsrFGuard,
  ssrfPolicyFromHttpBaseUrlAllowedHostname,
 } from "openclaw/plugin-sdk/ssrf-runtime";
 export const DEFAULT_MINIMAX_TTS_BASE_URL = "https://api.minimax.io";
@@ -51,38 +55,47 @@ export async function minimaxTTS(params: {
  const timeout = setTimeout(() => controller.abort(), timeoutMs);
  try {
-    const response = await fetch(`${baseUrl}/v1/t2a_v2`, {
+    const { response, release } = await fetchWithSsrFGuard({
-      method: "POST",
+      url: `${baseUrl}/v1/t2a_v2`,
-      headers: {
+      init: {
-        Authorization: `Bearer ${apiKey}`,
+        method: "POST",
-        "Content-Type": "application/json",
+        headers: {
          Authorization: `Bearer ${apiKey}`,
          "Content-Type": "application/json",
        },
        body: JSON.stringify({
          model,
          text,
          voice_setting: {
            voice_id: voiceId,
            speed,
            vol,
            pitch,
          },
          audio_setting: {
            format,
            sample_rate: sampleRate,
          },
        }),
        signal: controller.signal,
      },
-      body: JSON.stringify({
+      timeoutMs,
-        model,
+      policy: ssrfPolicyFromHttpBaseUrlAllowedHostname(baseUrl),
-        text,
+      auditContext: "minimax.tts",
        voice_setting: {
          voice_id: voiceId,
          speed,
          vol,
          pitch,
        },
        audio_setting: {
          format,
          sample_rate: sampleRate,
        },
      }),
      signal: controller.signal,
    });
    try {
      await assertOkOrThrowProviderError(response, "MiniMax TTS API error");
-    await assertOkOrThrowProviderError(response, "MiniMax TTS API error");
+      const body = (await response.json()) as { data?: { audio?: string } };
      const hexAudio = body?.data?.audio;
      if (!hexAudio) {
        throw new Error("MiniMax TTS API returned no audio data");
      }
-    const body = (await response.json()) as { data?: { audio?: string } };
+      return Buffer.from(hexAudio, "hex");
-    const hexAudio = body?.data?.audio;
+    } finally {
-    if (!hexAudio) {
+      await release();
      throw new Error("MiniMax TTS API returned no audio data");
    }
    return Buffer.from(hexAudio, "hex");
  } finally {
    clearTimeout(timeout);
  }