mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 05:20:43 +00:00
230 lines
7.5 KiB
TypeScript
230 lines
7.5 KiB
TypeScript
import { normalizeResolvedSecretInputString } from "openclaw/plugin-sdk/secret-input";
|
|
import type {
|
|
SpeechDirectiveTokenParseContext,
|
|
SpeechProviderConfig,
|
|
SpeechProviderOverrides,
|
|
SpeechProviderPlugin,
|
|
} from "openclaw/plugin-sdk/speech-core";
|
|
import { asFiniteNumber, asObject, trimToUndefined } from "openclaw/plugin-sdk/speech-core";
|
|
import { volcengineTTS, type VolcengineTtsEncoding } from "./tts.js";
|
|
|
|
const DEFAULT_VOICE = "en_female_anna_mars_bigtts";
|
|
const DEFAULT_CLUSTER = "volcano_tts";
|
|
const DEFAULT_RESOURCE_ID = "seed-tts-1.0";
|
|
const DEFAULT_APP_KEY = "aGjiRDfUWi";
|
|
|
|
const VOLCENGINE_VOICES: readonly string[] = [
|
|
"en_female_anna_mars_bigtts",
|
|
"en_male_adam_mars_bigtts",
|
|
"en_female_sarah_mars_bigtts",
|
|
"en_male_smith_mars_bigtts",
|
|
"zh_female_cancan_mars_bigtts",
|
|
"zh_female_qingxinnvsheng_mars_bigtts",
|
|
"zh_female_linjia_mars_bigtts",
|
|
"zh_male_wennuanahu_moon_bigtts",
|
|
"zh_male_shaonianzixin_moon_bigtts",
|
|
"zh_female_shuangkuaisisi_moon_bigtts",
|
|
];
|
|
|
|
type VolcengineTtsProviderConfig = {
|
|
apiKey?: string;
|
|
appId?: string;
|
|
token?: string;
|
|
voice: string;
|
|
cluster: string;
|
|
resourceId: string;
|
|
appKey: string;
|
|
baseUrl?: string;
|
|
speedRatio?: number;
|
|
emotion?: string;
|
|
};
|
|
|
|
type VolcengineTtsProviderOverrides = {
|
|
voice?: string;
|
|
speedRatio?: number;
|
|
emotion?: string;
|
|
};
|
|
|
|
function normalizeVolcengineProviderConfig(
|
|
rawConfig: Record<string, unknown>,
|
|
): VolcengineTtsProviderConfig {
|
|
const providers = asObject(rawConfig.providers);
|
|
const raw = asObject(providers?.volcengine) ?? asObject(rawConfig.volcengine);
|
|
return {
|
|
apiKey: normalizeResolvedSecretInputString({
|
|
value: raw?.apiKey,
|
|
path: "messages.tts.providers.volcengine.apiKey",
|
|
}),
|
|
appId: trimToUndefined(raw?.appId),
|
|
token: normalizeResolvedSecretInputString({
|
|
value: raw?.token,
|
|
path: "messages.tts.providers.volcengine.token",
|
|
}),
|
|
voice:
|
|
trimToUndefined(raw?.voice) ??
|
|
trimToUndefined(process.env.VOLCENGINE_TTS_VOICE) ??
|
|
DEFAULT_VOICE,
|
|
cluster:
|
|
trimToUndefined(raw?.cluster) ??
|
|
trimToUndefined(process.env.VOLCENGINE_TTS_CLUSTER) ??
|
|
DEFAULT_CLUSTER,
|
|
resourceId:
|
|
trimToUndefined(raw?.resourceId) ??
|
|
trimToUndefined(process.env.VOLCENGINE_TTS_RESOURCE_ID) ??
|
|
DEFAULT_RESOURCE_ID,
|
|
appKey:
|
|
trimToUndefined(raw?.appKey) ??
|
|
trimToUndefined(process.env.VOLCENGINE_TTS_APP_KEY) ??
|
|
DEFAULT_APP_KEY,
|
|
baseUrl: trimToUndefined(raw?.baseUrl) ?? trimToUndefined(process.env.VOLCENGINE_TTS_BASE_URL),
|
|
speedRatio: asFiniteNumber(raw?.speedRatio),
|
|
emotion: trimToUndefined(raw?.emotion),
|
|
};
|
|
}
|
|
|
|
function resolveSeedSpeechApiKey(configApiKey?: string): string | undefined {
|
|
return (
|
|
configApiKey ??
|
|
trimToUndefined(process.env.VOLCENGINE_TTS_API_KEY) ??
|
|
trimToUndefined(process.env.BYTEPLUS_SEED_SPEECH_API_KEY)
|
|
);
|
|
}
|
|
|
|
function readProviderConfig(config: SpeechProviderConfig): VolcengineTtsProviderConfig {
|
|
const normalized = normalizeVolcengineProviderConfig({});
|
|
return {
|
|
apiKey:
|
|
normalizeResolvedSecretInputString({
|
|
value: config.apiKey,
|
|
path: "messages.tts.providers.volcengine.apiKey",
|
|
}) ?? normalized.apiKey,
|
|
appId: trimToUndefined(config.appId) ?? normalized.appId,
|
|
token: trimToUndefined(config.token) ?? normalized.token,
|
|
voice: trimToUndefined(config.voice) ?? normalized.voice,
|
|
cluster: trimToUndefined(config.cluster) ?? normalized.cluster,
|
|
resourceId: trimToUndefined(config.resourceId) ?? normalized.resourceId,
|
|
appKey: trimToUndefined(config.appKey) ?? normalized.appKey,
|
|
baseUrl: trimToUndefined(config.baseUrl) ?? normalized.baseUrl,
|
|
speedRatio: asFiniteNumber(config.speedRatio) ?? normalized.speedRatio,
|
|
emotion: trimToUndefined(config.emotion) ?? normalized.emotion,
|
|
};
|
|
}
|
|
|
|
function readVolcengineOverrides(
|
|
overrides: SpeechProviderOverrides | undefined,
|
|
): VolcengineTtsProviderOverrides {
|
|
if (!overrides) {
|
|
return {};
|
|
}
|
|
return {
|
|
voice: trimToUndefined(overrides.voice),
|
|
speedRatio: asFiniteNumber(overrides.speedRatio),
|
|
emotion: trimToUndefined(overrides.emotion),
|
|
};
|
|
}
|
|
|
|
function parseDirectiveToken(ctx: SpeechDirectiveTokenParseContext): {
|
|
handled: boolean;
|
|
overrides?: SpeechProviderOverrides;
|
|
warnings?: string[];
|
|
} {
|
|
switch (ctx.key) {
|
|
case "voice":
|
|
case "volcengine_voice":
|
|
case "volcenginevoice":
|
|
if (!ctx.policy.allowVoice) {
|
|
return { handled: true };
|
|
}
|
|
return { handled: true, overrides: { ...ctx.currentOverrides, voice: ctx.value } };
|
|
case "speed":
|
|
case "speedratio":
|
|
case "speed_ratio": {
|
|
if (!ctx.policy.allowVoiceSettings) {
|
|
return { handled: true };
|
|
}
|
|
const speedRatio = Number(ctx.value);
|
|
if (!Number.isFinite(speedRatio) || speedRatio < 0.2 || speedRatio > 3.0) {
|
|
return { handled: true, warnings: [`invalid Volcengine speedRatio "${ctx.value}"`] };
|
|
}
|
|
return { handled: true, overrides: { ...ctx.currentOverrides, speedRatio } };
|
|
}
|
|
case "emotion":
|
|
if (!ctx.policy.allowVoiceSettings) {
|
|
return { handled: true };
|
|
}
|
|
return { handled: true, overrides: { ...ctx.currentOverrides, emotion: ctx.value } };
|
|
default:
|
|
return { handled: false };
|
|
}
|
|
}
|
|
|
|
export function buildVolcengineSpeechProvider(): SpeechProviderPlugin {
|
|
return {
|
|
id: "volcengine",
|
|
label: "Volcengine",
|
|
autoSelectOrder: 90,
|
|
aliases: ["bytedance", "doubao"],
|
|
voices: VOLCENGINE_VOICES,
|
|
resolveConfig: ({ rawConfig }) => normalizeVolcengineProviderConfig(rawConfig),
|
|
parseDirectiveToken,
|
|
|
|
listVoices: async () =>
|
|
VOLCENGINE_VOICES.map((v) => ({
|
|
id: v,
|
|
name: v.replace(/^(?:en|zh)_(female|male)_/, "").replace(/_.*$/, ""),
|
|
locale: v.startsWith("en_") ? "en-US" : "zh-CN",
|
|
gender: v.includes("_female_") ? "female" : "male",
|
|
})),
|
|
|
|
isConfigured: ({ providerConfig }) => {
|
|
const cfg = readProviderConfig(providerConfig);
|
|
return Boolean(
|
|
resolveSeedSpeechApiKey(cfg.apiKey) ||
|
|
((cfg.appId || process.env.VOLCENGINE_TTS_APPID) &&
|
|
(cfg.token || process.env.VOLCENGINE_TTS_TOKEN)),
|
|
);
|
|
},
|
|
|
|
synthesize: async (req) => {
|
|
const cfg = readProviderConfig(req.providerConfig);
|
|
const overrides = readVolcengineOverrides(req.providerOverrides);
|
|
const apiKey = resolveSeedSpeechApiKey(cfg.apiKey);
|
|
const appId = cfg.appId || process.env.VOLCENGINE_TTS_APPID;
|
|
const token = cfg.token || process.env.VOLCENGINE_TTS_TOKEN;
|
|
|
|
if (!apiKey && (!appId || !token)) {
|
|
throw new Error(
|
|
"Volcengine TTS credentials missing. Set VOLCENGINE_TTS_API_KEY, " +
|
|
"BYTEPLUS_SEED_SPEECH_API_KEY, or legacy VOLCENGINE_TTS_APPID and VOLCENGINE_TTS_TOKEN.",
|
|
);
|
|
}
|
|
|
|
const isVoiceNote = req.target === "voice-note";
|
|
const encoding: VolcengineTtsEncoding = isVoiceNote ? "ogg_opus" : "mp3";
|
|
|
|
const audioBuffer = await volcengineTTS({
|
|
text: req.text,
|
|
apiKey,
|
|
appId,
|
|
token,
|
|
voice: overrides.voice ?? cfg.voice,
|
|
cluster: cfg.cluster,
|
|
resourceId: cfg.resourceId,
|
|
appKey: cfg.appKey,
|
|
baseUrl: cfg.baseUrl,
|
|
speedRatio: overrides.speedRatio ?? cfg.speedRatio,
|
|
emotion: overrides.emotion ?? cfg.emotion,
|
|
encoding,
|
|
timeoutMs: req.timeoutMs,
|
|
});
|
|
|
|
return {
|
|
audioBuffer,
|
|
outputFormat: encoding === "ogg_opus" ? "opus" : "mp3",
|
|
fileExtension: encoding === "ogg_opus" ? ".opus" : ".mp3",
|
|
voiceCompatible: isVoiceNote,
|
|
};
|
|
},
|
|
};
|
|
}
|