fix(tts): route WhatsApp MP3 TTS as voice notes

This commit is contained in:
Peter Steinberger
2026-04-26 03:25:55 +01:00
parent 90cd9fce85
commit 9b91040053
4 changed files with 34 additions and 36 deletions

View File

@@ -71,7 +71,12 @@ async function expectTtsPayloadResult(params: {
text: string;
target: "voice-note" | "audio-file";
audioAsVoice: true | undefined;
providerResult?: MockSpeechSynthesisResult;
mediaExtension?: string;
}) {
if (params.providerResult) {
synthesizeMock.mockResolvedValueOnce(params.providerResult);
}
const cfg = createTtsConfig(params.prefsName);
let mediaDir: string | undefined;
try {
@@ -84,7 +89,7 @@ async function expectTtsPayloadResult(params: {
expect(synthesizeMock).toHaveBeenCalledWith(expect.objectContaining({ target: params.target }));
expect(result.audioAsVoice).toBe(params.audioAsVoice);
expect(result.mediaUrl).toMatch(/voice-\d+\.ogg$/);
expect(result.mediaUrl).toMatch(new RegExp(`voice-\\d+\\.${params.mediaExtension ?? "ogg"}$`));
mediaDir = result.mediaUrl ? path.dirname(result.mediaUrl) : undefined;
} finally {
@@ -118,35 +123,26 @@ describe("speech-core native voice-note routing", () => {
});
});
it("marks Feishu voice-note TTS for channel-side transcoding when provider returns mp3", async () => {
synthesizeMock.mockResolvedValueOnce({
audioBuffer: Buffer.from("mp3"),
outputFormat: "mp3",
fileExtension: ".mp3",
voiceCompatible: false,
});
const cfg = createTtsConfig("openclaw-speech-core-tts-feishu-mp3-test");
let mediaDir: string | undefined;
try {
const result = await maybeApplyTtsToPayload({
payload: { text: "This Feishu reply should be transcoded by the channel." },
cfg,
channel: "feishu",
kind: "final",
it.each(["feishu", "whatsapp"] as const)(
"marks %s voice-note TTS for channel-side transcoding when provider returns mp3",
async (channel) => {
expect(_test.supportsTranscodedVoiceNoteTts(channel)).toBe(true);
await expectTtsPayloadResult({
channel,
prefsName: `openclaw-speech-core-tts-${channel}-mp3-test`,
text: `This ${channel} reply should be transcoded by the channel.`,
target: "voice-note",
audioAsVoice: true,
mediaExtension: "mp3",
providerResult: {
audioBuffer: Buffer.from("mp3"),
outputFormat: "mp3",
fileExtension: ".mp3",
voiceCompatible: false,
},
});
expect(synthesizeMock).toHaveBeenCalledWith(
expect.objectContaining({ target: "voice-note" }),
);
expect(result.audioAsVoice).toBe(true);
expect(result.mediaUrl).toMatch(/voice-\d+\.mp3$/);
mediaDir = result.mediaUrl ? path.dirname(result.mediaUrl) : undefined;
} finally {
if (mediaDir) {
rmSync(mediaDir, { recursive: true, force: true });
}
}
});
},
);
it("keeps non-native voice-note channels as regular audio files", async () => {
await expectTtsPayloadResult({

View File

@@ -640,7 +640,7 @@ export function setLastTtsAttempt(entry: TtsStatusEntry | undefined): void {
}
const OPUS_CHANNELS = new Set(["telegram", "feishu", "whatsapp", "matrix", "discord"]);
const TRANSCODED_VOICE_NOTE_CHANNELS = new Set(["feishu"]);
const TRANSCODED_VOICE_NOTE_CHANNELS = new Set(["feishu", "whatsapp"]);
function resolveChannelId(channel: string | undefined): ChannelId | null {
return channel ? normalizeChannelId(channel) : null;