diff --git a/extensions/speech-core/src/tts.test.ts b/extensions/speech-core/src/tts.test.ts index 8cc9120b61d..47bf6bca631 100644 --- a/extensions/speech-core/src/tts.test.ts +++ b/extensions/speech-core/src/tts.test.ts @@ -2,10 +2,7 @@ import { rmSync } from "node:fs"; import path from "node:path"; import type { OpenClawConfig } from "openclaw/plugin-sdk/config-runtime"; import type { ReplyPayload } from "openclaw/plugin-sdk/reply-runtime"; -import type { - SpeechProviderPlugin, - SpeechSynthesisRequest, -} from "openclaw/plugin-sdk/speech-core"; +import type { SpeechProviderPlugin, SpeechSynthesisRequest } from "openclaw/plugin-sdk/speech-core"; import { afterEach, describe, expect, it, vi } from "vitest"; type MockSpeechSynthesisResult = Awaited>; @@ -53,23 +50,38 @@ vi.mock("../api.js", async () => { }; }); -const { maybeApplyTtsToPayload } = await import("./tts.js"); +const { _test, maybeApplyTtsToPayload } = await import("./tts.js"); -describe("speech-core Discord voice-note routing", () => { +const nativeVoiceNoteChannels = ["discord", "feishu", "matrix", "telegram", "whatsapp"] as const; + +function createTtsConfig(prefsName: string): OpenClawConfig { + return { + messages: { + tts: { + enabled: true, + provider: "mock", + prefsPath: `/tmp/${prefsName}.json`, + }, + }, + }; +} + +describe("speech-core native voice-note routing", () => { afterEach(() => { synthesizeMock.mockClear(); }); + it("keeps native voice-note channel support centralized", () => { + for (const channel of nativeVoiceNoteChannels) { + expect(_test.supportsNativeVoiceNoteTts(channel)).toBe(true); + expect(_test.supportsNativeVoiceNoteTts(channel.toUpperCase())).toBe(true); + } + expect(_test.supportsNativeVoiceNoteTts("slack")).toBe(false); + expect(_test.supportsNativeVoiceNoteTts(undefined)).toBe(false); + }); + it("marks Discord auto TTS replies as native voice messages", async () => { - const cfg: OpenClawConfig = { - messages: { - tts: { - enabled: true, - provider: "mock", - prefsPath: "/tmp/openclaw-speech-core-tts-test.json", - }, - }, - }; + const cfg = createTtsConfig("openclaw-speech-core-tts-test"); const payload: ReplyPayload = { text: "This Discord reply should be delivered as a native voice note.", }; @@ -96,4 +108,33 @@ describe("speech-core Discord voice-note routing", () => { } } }); + + it("keeps non-native voice-note channels as regular audio files", async () => { + const cfg = createTtsConfig("openclaw-speech-core-tts-slack-test"); + const payload: ReplyPayload = { + text: "Slack replies should be delivered as regular audio attachments.", + }; + + let mediaDir: string | undefined; + try { + const result = await maybeApplyTtsToPayload({ + payload, + cfg, + channel: "slack", + kind: "final", + }); + + expect(synthesizeMock).toHaveBeenCalledWith( + expect.objectContaining({ target: "audio-file" }), + ); + expect(result.audioAsVoice).toBeUndefined(); + expect(result.mediaUrl).toMatch(/voice-\d+\.ogg$/); + + mediaDir = result.mediaUrl ? path.dirname(result.mediaUrl) : undefined; + } finally { + if (mediaDir) { + rmSync(mediaDir, { recursive: true, force: true }); + } + } + }); }); diff --git a/extensions/speech-core/src/tts.ts b/extensions/speech-core/src/tts.ts index 52bdb5a30a4..3aa444a75ac 100644 --- a/extensions/speech-core/src/tts.ts +++ b/extensions/speech-core/src/tts.ts @@ -599,6 +599,11 @@ function resolveChannelId(channel: string | undefined): ChannelId | null { return channel ? normalizeChannelId(channel) : null; } +function supportsNativeVoiceNoteTts(channel: string | undefined): boolean { + const channelId = resolveChannelId(channel); + return channelId !== null && OPUS_CHANNELS.has(channelId); +} + export function resolveTtsProviderOrder(primary: TtsProvider, cfg?: OpenClawConfig): TtsProvider[] { const normalizedPrimary = canonicalizeSpeechProviderId(primary, cfg) ?? primary; const ordered = new Set([normalizedPrimary]); @@ -807,8 +812,7 @@ export async function synthesizeSpeech(params: { } const { config, providers } = setup; - const channelId = resolveChannelId(params.channel); - const target = channelId && OPUS_CHANNELS.has(channelId) ? "voice-note" : "audio-file"; + const target = supportsNativeVoiceNoteTts(params.channel) ? "voice-note" : "audio-file"; const errors: string[] = []; const attemptedProviders: string[] = []; @@ -1161,9 +1165,8 @@ export async function maybeApplyTtsToPayload(params: { latencyMs: result.latencyMs, }; - const channelId = resolveChannelId(params.channel); const shouldVoice = - channelId !== null && OPUS_CHANNELS.has(channelId) && result.voiceCompatible === true; + supportsNativeVoiceNoteTts(params.channel) && result.voiceCompatible === true; return { ...nextPayload, mediaUrl: result.audioPath, @@ -1189,6 +1192,7 @@ export async function maybeApplyTtsToPayload(params: { export const _test = { parseTtsDirectives, resolveModelOverridePolicy, + supportsNativeVoiceNoteTts, summarizeText, getResolvedSpeechProviderConfig, formatTtsProviderError,