mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 17:00:50 +00:00
Add Volcengine/BytePlus Seed Speech as a bundled TTS provider with current API-key auth, legacy AppID/token fallback, native Ogg/Opus voice-note output, and MP3 audio-file output. Co-authored-by: Peter Steinberger <steipete@gmail.com>
273 lines
8.4 KiB
TypeScript
273 lines
8.4 KiB
TypeScript
import { beforeEach, describe, expect, it, vi } from "vitest";
|
|
import { buildVolcengineSpeechProvider } from "./speech-provider.js";
|
|
import { volcengineTTS } from "./tts.js";
|
|
|
|
const { fetchWithSsrFGuardMock } = vi.hoisted(() => ({
|
|
fetchWithSsrFGuardMock: vi.fn(),
|
|
}));
|
|
|
|
vi.mock("openclaw/plugin-sdk/ssrf-runtime", () => ({
|
|
fetchWithSsrFGuard: fetchWithSsrFGuardMock,
|
|
}));
|
|
|
|
function makeProviderConfig(overrides?: Record<string, unknown>) {
|
|
return {
|
|
apiKey: "test-api-key",
|
|
voice: "en_female_anna_mars_bigtts",
|
|
...overrides,
|
|
};
|
|
}
|
|
|
|
function makeLegacyProviderConfig(overrides?: Record<string, unknown>) {
|
|
return {
|
|
appId: "test-app-id",
|
|
token: "test-token",
|
|
voice: "zh_female_xiaohe_uranus_bigtts",
|
|
cluster: "volcano_tts",
|
|
...overrides,
|
|
};
|
|
}
|
|
|
|
function clearTtsEnv() {
|
|
delete process.env.BYTEPLUS_API_KEY;
|
|
delete process.env.BYTEPLUS_SEED_SPEECH_API_KEY;
|
|
delete process.env.VOLCENGINE_TTS_API_KEY;
|
|
delete process.env.VOLCENGINE_TTS_APPID;
|
|
delete process.env.VOLCENGINE_TTS_TOKEN;
|
|
}
|
|
|
|
describe("Volcengine speech provider", () => {
|
|
const provider = buildVolcengineSpeechProvider();
|
|
|
|
beforeEach(() => {
|
|
fetchWithSsrFGuardMock.mockReset();
|
|
});
|
|
|
|
it("has correct id, label, and aliases", () => {
|
|
expect(provider.id).toBe("volcengine");
|
|
expect(provider.label).toBe("Volcengine");
|
|
expect(provider.aliases).toContain("bytedance");
|
|
expect(provider.aliases).toContain("doubao");
|
|
});
|
|
|
|
it("reports configured when an API key is present in providerConfig", () => {
|
|
expect(provider.isConfigured({ providerConfig: makeProviderConfig(), timeoutMs: 30000 })).toBe(
|
|
true,
|
|
);
|
|
});
|
|
|
|
it("reports configured for legacy appId and token in providerConfig", () => {
|
|
expect(
|
|
provider.isConfigured({ providerConfig: makeLegacyProviderConfig(), timeoutMs: 30000 }),
|
|
).toBe(true);
|
|
});
|
|
|
|
it("reports not configured when credentials are missing", () => {
|
|
const oldBytePlusKey = process.env.BYTEPLUS_API_KEY;
|
|
const oldSeedKey = process.env.BYTEPLUS_SEED_SPEECH_API_KEY;
|
|
const oldApiKey = process.env.VOLCENGINE_TTS_API_KEY;
|
|
const oldAppId = process.env.VOLCENGINE_TTS_APPID;
|
|
const oldToken = process.env.VOLCENGINE_TTS_TOKEN;
|
|
clearTtsEnv();
|
|
try {
|
|
expect(provider.isConfigured({ providerConfig: {}, timeoutMs: 30000 })).toBe(false);
|
|
} finally {
|
|
if (oldBytePlusKey) {
|
|
process.env.BYTEPLUS_API_KEY = oldBytePlusKey;
|
|
}
|
|
if (oldSeedKey) {
|
|
process.env.BYTEPLUS_SEED_SPEECH_API_KEY = oldSeedKey;
|
|
}
|
|
if (oldApiKey) {
|
|
process.env.VOLCENGINE_TTS_API_KEY = oldApiKey;
|
|
}
|
|
if (oldAppId) {
|
|
process.env.VOLCENGINE_TTS_APPID = oldAppId;
|
|
}
|
|
if (oldToken) {
|
|
process.env.VOLCENGINE_TTS_TOKEN = oldToken;
|
|
}
|
|
}
|
|
});
|
|
|
|
it("falls back to env vars for credentials", () => {
|
|
const oldBytePlusKey = process.env.BYTEPLUS_API_KEY;
|
|
const oldSeedKey = process.env.BYTEPLUS_SEED_SPEECH_API_KEY;
|
|
const oldApiKey = process.env.VOLCENGINE_TTS_API_KEY;
|
|
const oldAppId = process.env.VOLCENGINE_TTS_APPID;
|
|
const oldToken = process.env.VOLCENGINE_TTS_TOKEN;
|
|
clearTtsEnv();
|
|
process.env.BYTEPLUS_SEED_SPEECH_API_KEY = "env-api-key";
|
|
try {
|
|
expect(provider.isConfigured({ providerConfig: {}, timeoutMs: 30000 })).toBe(true);
|
|
} finally {
|
|
if (oldBytePlusKey) {
|
|
process.env.BYTEPLUS_API_KEY = oldBytePlusKey;
|
|
}
|
|
if (oldSeedKey) {
|
|
process.env.BYTEPLUS_SEED_SPEECH_API_KEY = oldSeedKey;
|
|
} else {
|
|
delete process.env.BYTEPLUS_SEED_SPEECH_API_KEY;
|
|
}
|
|
if (oldApiKey) {
|
|
process.env.VOLCENGINE_TTS_API_KEY = oldApiKey;
|
|
}
|
|
if (oldAppId) {
|
|
process.env.VOLCENGINE_TTS_APPID = oldAppId;
|
|
} else {
|
|
delete process.env.VOLCENGINE_TTS_APPID;
|
|
}
|
|
if (oldToken) {
|
|
process.env.VOLCENGINE_TTS_TOKEN = oldToken;
|
|
} else {
|
|
delete process.env.VOLCENGINE_TTS_TOKEN;
|
|
}
|
|
}
|
|
});
|
|
|
|
it("lists voices with locale and gender", async () => {
|
|
const voices = await provider.listVoices!({});
|
|
expect(voices.length).toBeGreaterThan(0);
|
|
expect(voices[0]).toMatchObject({ locale: "en-US" });
|
|
expect(voices[0].gender).toBeDefined();
|
|
});
|
|
|
|
it("sends the documented Seed Speech API key payload and returns voice-note Opus metadata", async () => {
|
|
const release = vi.fn();
|
|
fetchWithSsrFGuardMock.mockResolvedValue({
|
|
response: new Response(
|
|
JSON.stringify({
|
|
code: 0,
|
|
data: Buffer.from("voice-audio").toString("base64"),
|
|
}),
|
|
),
|
|
release,
|
|
});
|
|
|
|
const result = await provider.synthesize({
|
|
text: "hello",
|
|
cfg: {},
|
|
providerConfig: makeProviderConfig({ emotion: "happy", speedRatio: 1.2 }),
|
|
target: "voice-note",
|
|
providerOverrides: { voice: "zh_male_aojiao_mars_bigtts", speedRatio: 0.9 },
|
|
timeoutMs: 1234,
|
|
});
|
|
|
|
expect(result.audioBuffer.toString()).toBe("voice-audio");
|
|
expect(result.outputFormat).toBe("opus");
|
|
expect(result.fileExtension).toBe(".opus");
|
|
expect(result.voiceCompatible).toBe(true);
|
|
|
|
const call = fetchWithSsrFGuardMock.mock.calls[0]?.[0];
|
|
expect(call).toMatchObject({
|
|
url: "https://voice.ap-southeast-1.bytepluses.com/api/v3/tts/unidirectional",
|
|
timeoutMs: 1234,
|
|
policy: { hostnameAllowlist: ["voice.ap-southeast-1.bytepluses.com"] },
|
|
auditContext: "volcengine.tts",
|
|
});
|
|
expect(call.init.headers["X-Api-Key"]).toBe("test-api-key");
|
|
expect(call.init.headers["X-Api-Resource-Id"]).toBe("seed-tts-1.0");
|
|
expect(call.init.headers["X-Api-App-Key"]).toBe("aGjiRDfUWi");
|
|
const body = JSON.parse(call.init.body);
|
|
expect(body.req_params).toMatchObject({
|
|
text: "hello",
|
|
speaker: "zh_male_aojiao_mars_bigtts",
|
|
speed_ratio: 0.9,
|
|
emotion: "happy",
|
|
audio_params: {
|
|
format: "ogg_opus",
|
|
sample_rate: 24000,
|
|
},
|
|
});
|
|
expect(release).toHaveBeenCalledTimes(1);
|
|
});
|
|
});
|
|
|
|
describe("volcengineTTS", () => {
|
|
beforeEach(() => {
|
|
fetchWithSsrFGuardMock.mockReset();
|
|
});
|
|
|
|
it("joins streamed Seed Speech audio frames", async () => {
|
|
const release = vi.fn();
|
|
fetchWithSsrFGuardMock.mockResolvedValue({
|
|
response: new Response(
|
|
[
|
|
JSON.stringify({ code: 0, message: "" }),
|
|
JSON.stringify({ code: 0, data: Buffer.from("audio-1").toString("base64") }),
|
|
JSON.stringify({ code: 0, data: Buffer.from("audio-2").toString("base64") }),
|
|
JSON.stringify({ code: 20000000, message: "ok", data: null }),
|
|
].join("\n"),
|
|
),
|
|
release,
|
|
});
|
|
|
|
const audio = await volcengineTTS({
|
|
text: "hello",
|
|
apiKey: "secret-api-key",
|
|
voice: "zh_female_xiaohe_uranus_bigtts",
|
|
encoding: "mp3",
|
|
timeoutMs: 1000,
|
|
});
|
|
|
|
expect(audio.toString()).toBe("audio-1audio-2");
|
|
expect(release).toHaveBeenCalledTimes(1);
|
|
});
|
|
|
|
it("reports Seed Speech provider errors without exposing credentials", async () => {
|
|
const release = vi.fn();
|
|
fetchWithSsrFGuardMock.mockResolvedValue({
|
|
response: new Response(
|
|
JSON.stringify({ header: { code: 45000000, message: "speaker permission denied" } }),
|
|
{ status: 403 },
|
|
),
|
|
release,
|
|
});
|
|
|
|
let error: unknown;
|
|
try {
|
|
await volcengineTTS({
|
|
text: "hello",
|
|
apiKey: "secret-api-key",
|
|
timeoutMs: 1000,
|
|
});
|
|
} catch (err) {
|
|
error = err;
|
|
}
|
|
|
|
expect(error).toBeInstanceOf(Error);
|
|
expect((error as Error).message).toBe(
|
|
"BytePlus Seed Speech TTS error 45000000: speaker permission denied",
|
|
);
|
|
expect((error as Error).message).not.toContain("secret-api-key");
|
|
expect(release).toHaveBeenCalledTimes(1);
|
|
});
|
|
|
|
it("reports provider errors without exposing credentials", async () => {
|
|
const release = vi.fn();
|
|
fetchWithSsrFGuardMock.mockResolvedValue({
|
|
response: new Response(JSON.stringify({ code: 3001, message: "load grant failed" }), {
|
|
status: 401,
|
|
}),
|
|
release,
|
|
});
|
|
|
|
let error: unknown;
|
|
try {
|
|
await volcengineTTS({
|
|
text: "hello",
|
|
appId: "app-id",
|
|
token: "secret-token",
|
|
timeoutMs: 1000,
|
|
});
|
|
} catch (err) {
|
|
error = err;
|
|
}
|
|
|
|
expect(error).toBeInstanceOf(Error);
|
|
expect((error as Error).message).toBe("Volcengine TTS error 3001: load grant failed");
|
|
expect((error as Error).message).not.toContain("secret-token");
|
|
expect(release).toHaveBeenCalledTimes(1);
|
|
});
|
|
});
|