fix: guard speech provider fetches

This commit is contained in:
Peter Steinberger
2026-04-24 20:51:18 +01:00
parent 25ad66520b
commit 7425cb0549
3 changed files with 120 additions and 80 deletions

View File

@@ -18,6 +18,10 @@ import {
requireInRange, requireInRange,
trimToUndefined, trimToUndefined,
} from "openclaw/plugin-sdk/speech"; } from "openclaw/plugin-sdk/speech";
import {
fetchWithSsrFGuard,
ssrfPolicyFromHttpBaseUrlAllowedHostname,
} from "openclaw/plugin-sdk/ssrf-runtime";
import { normalizeLowercaseStringOrEmpty } from "openclaw/plugin-sdk/text-runtime"; import { normalizeLowercaseStringOrEmpty } from "openclaw/plugin-sdk/text-runtime";
import { resolveElevenLabsApiKeyWithProfileFallback } from "./config-api.js"; import { resolveElevenLabsApiKeyWithProfileFallback } from "./config-api.js";
import { isValidElevenLabsVoiceId, normalizeElevenLabsBaseUrl } from "./shared.js"; import { isValidElevenLabsVoiceId, normalizeElevenLabsBaseUrl } from "./shared.js";
@@ -293,30 +297,40 @@ export async function listElevenLabsVoices(params: {
apiKey: string; apiKey: string;
baseUrl?: string; baseUrl?: string;
}): Promise<SpeechVoiceOption[]> { }): Promise<SpeechVoiceOption[]> {
const res = await fetch(`${normalizeElevenLabsBaseUrl(params.baseUrl)}/v1/voices`, { const normalizedBaseUrl = normalizeElevenLabsBaseUrl(params.baseUrl);
headers: { const { response, release } = await fetchWithSsrFGuard({
"xi-api-key": params.apiKey, url: `${normalizedBaseUrl}/v1/voices`,
init: {
headers: {
"xi-api-key": params.apiKey,
},
}, },
policy: ssrfPolicyFromHttpBaseUrlAllowedHostname(normalizedBaseUrl),
auditContext: "elevenlabs.voices",
}); });
await assertOkOrThrowProviderError(res, "ElevenLabs voices API error"); try {
const json = (await res.json()) as { await assertOkOrThrowProviderError(response, "ElevenLabs voices API error");
voices?: Array<{ const json = (await response.json()) as {
voice_id?: string; voices?: Array<{
name?: string; voice_id?: string;
category?: string; name?: string;
description?: string; category?: string;
}>; description?: string;
}; }>;
return Array.isArray(json.voices) };
? json.voices return Array.isArray(json.voices)
.map((voice) => ({ ? json.voices
id: voice.voice_id?.trim() ?? "", .map((voice) => ({
name: trimToUndefined(voice.name), id: voice.voice_id?.trim() ?? "",
category: trimToUndefined(voice.category), name: trimToUndefined(voice.name),
description: trimToUndefined(voice.description), category: trimToUndefined(voice.category),
})) description: trimToUndefined(voice.description),
.filter((voice) => voice.id.length > 0) }))
: []; .filter((voice) => voice.id.length > 0)
: [];
} finally {
await release();
}
} }
export function buildElevenLabsSpeechProvider(): SpeechProviderPlugin { export function buildElevenLabsSpeechProvider(): SpeechProviderPlugin {

View File

@@ -17,6 +17,10 @@ import type {
SpeechVoiceOption, SpeechVoiceOption,
} from "openclaw/plugin-sdk/speech"; } from "openclaw/plugin-sdk/speech";
import { asBoolean, asFiniteNumber, asObject, trimToUndefined } from "openclaw/plugin-sdk/speech"; import { asBoolean, asFiniteNumber, asObject, trimToUndefined } from "openclaw/plugin-sdk/speech";
import {
fetchWithSsrFGuard,
ssrfPolicyFromHttpBaseUrlAllowedHostname,
} from "openclaw/plugin-sdk/ssrf-runtime";
import { resolvePreferredOpenClawTmpDir } from "openclaw/plugin-sdk/temp-path"; import { resolvePreferredOpenClawTmpDir } from "openclaw/plugin-sdk/temp-path";
import { edgeTTS, inferEdgeExtension } from "./tts.js"; import { edgeTTS, inferEdgeExtension } from "./tts.js";
@@ -138,39 +142,48 @@ export async function listMicrosoftVoices(): Promise<SpeechVoiceOption[]> {
"https://speech.platform.bing.com/consumer/speech/synthesize/readaloud/voices/list" + "https://speech.platform.bing.com/consumer/speech/synthesize/readaloud/voices/list" +
`?trustedclienttoken=${TRUSTED_CLIENT_TOKEN}`; `?trustedclienttoken=${TRUSTED_CLIENT_TOKEN}`;
const headers = buildMicrosoftVoiceHeaders(); const headers = buildMicrosoftVoiceHeaders();
const response = await fetch(url, { const { response, release } = await fetchWithSsrFGuard({
headers, url,
init: {
headers,
},
policy: ssrfPolicyFromHttpBaseUrlAllowedHostname("https://speech.platform.bing.com"),
auditContext: "microsoft.speech.voices",
}); });
if (!isDebugProxyGlobalFetchPatchInstalled()) { try {
captureHttpExchange({ if (!isDebugProxyGlobalFetchPatchInstalled()) {
url, captureHttpExchange({
method: "GET", url,
requestHeaders: headers, method: "GET",
response, requestHeaders: headers,
transport: "http", response,
meta: { transport: "http",
provider: "microsoft", meta: {
capability: "speech-voices", provider: "microsoft",
}, capability: "speech-voices",
}); },
});
}
await assertOkOrThrowProviderError(response, "Microsoft voices API error");
const voices = (await response.json()) as MicrosoftVoiceListEntry[];
return Array.isArray(voices)
? voices
.map((voice) => ({
id: voice.ShortName?.trim() ?? "",
name: trimToUndefined(voice.FriendlyName) ?? trimToUndefined(voice.ShortName),
category: voice.VoiceTag?.ContentCategories?.find((value) => value.trim().length > 0),
description: formatMicrosoftVoiceDescription(voice),
locale: trimToUndefined(voice.Locale),
gender: trimToUndefined(voice.Gender),
personalities: voice.VoiceTag?.VoicePersonalities?.filter(
(value): value is string => value.trim().length > 0,
),
}))
.filter((voice) => voice.id.length > 0)
: [];
} finally {
await release();
} }
await assertOkOrThrowProviderError(response, "Microsoft voices API error");
const voices = (await response.json()) as MicrosoftVoiceListEntry[];
return Array.isArray(voices)
? voices
.map((voice) => ({
id: voice.ShortName?.trim() ?? "",
name: trimToUndefined(voice.FriendlyName) ?? trimToUndefined(voice.ShortName),
category: voice.VoiceTag?.ContentCategories?.find((value) => value.trim().length > 0),
description: formatMicrosoftVoiceDescription(voice),
locale: trimToUndefined(voice.Locale),
gender: trimToUndefined(voice.Gender),
personalities: voice.VoiceTag?.VoicePersonalities?.filter(
(value): value is string => value.trim().length > 0,
),
}))
.filter((voice) => voice.id.length > 0)
: [];
} }
export function buildMicrosoftSpeechProvider(): SpeechProviderPlugin { export function buildMicrosoftSpeechProvider(): SpeechProviderPlugin {

View File

@@ -1,4 +1,8 @@
import { assertOkOrThrowProviderError } from "openclaw/plugin-sdk/provider-http"; import { assertOkOrThrowProviderError } from "openclaw/plugin-sdk/provider-http";
import {
fetchWithSsrFGuard,
ssrfPolicyFromHttpBaseUrlAllowedHostname,
} from "openclaw/plugin-sdk/ssrf-runtime";
export const DEFAULT_MINIMAX_TTS_BASE_URL = "https://api.minimax.io"; export const DEFAULT_MINIMAX_TTS_BASE_URL = "https://api.minimax.io";
@@ -51,38 +55,47 @@ export async function minimaxTTS(params: {
const timeout = setTimeout(() => controller.abort(), timeoutMs); const timeout = setTimeout(() => controller.abort(), timeoutMs);
try { try {
const response = await fetch(`${baseUrl}/v1/t2a_v2`, { const { response, release } = await fetchWithSsrFGuard({
method: "POST", url: `${baseUrl}/v1/t2a_v2`,
headers: { init: {
Authorization: `Bearer ${apiKey}`, method: "POST",
"Content-Type": "application/json", headers: {
Authorization: `Bearer ${apiKey}`,
"Content-Type": "application/json",
},
body: JSON.stringify({
model,
text,
voice_setting: {
voice_id: voiceId,
speed,
vol,
pitch,
},
audio_setting: {
format,
sample_rate: sampleRate,
},
}),
signal: controller.signal,
}, },
body: JSON.stringify({ timeoutMs,
model, policy: ssrfPolicyFromHttpBaseUrlAllowedHostname(baseUrl),
text, auditContext: "minimax.tts",
voice_setting: {
voice_id: voiceId,
speed,
vol,
pitch,
},
audio_setting: {
format,
sample_rate: sampleRate,
},
}),
signal: controller.signal,
}); });
try {
await assertOkOrThrowProviderError(response, "MiniMax TTS API error");
await assertOkOrThrowProviderError(response, "MiniMax TTS API error"); const body = (await response.json()) as { data?: { audio?: string } };
const hexAudio = body?.data?.audio;
if (!hexAudio) {
throw new Error("MiniMax TTS API returned no audio data");
}
const body = (await response.json()) as { data?: { audio?: string } }; return Buffer.from(hexAudio, "hex");
const hexAudio = body?.data?.audio; } finally {
if (!hexAudio) { await release();
throw new Error("MiniMax TTS API returned no audio data");
} }
return Buffer.from(hexAudio, "hex");
} finally { } finally {
clearTimeout(timeout); clearTimeout(timeout);
} }