refactor: centralize speech voice-note channel routing

2026-04-12 09:41:11 +00:00 · 2026-04-10 14:59:54 +01:00
parent 77bdf2f44d
commit f621fb4aba
2 changed files with 64 additions and 19 deletions
--- a/extensions/speech-core/src/tts.test.ts
+++ b/extensions/speech-core/src/tts.test.ts
@@ -2,10 +2,7 @@ import { rmSync } from "node:fs";
 import path from "node:path";
 import type { OpenClawConfig } from "openclaw/plugin-sdk/config-runtime";
 import type { ReplyPayload } from "openclaw/plugin-sdk/reply-runtime";
-import type {
-  SpeechProviderPlugin,
-  SpeechSynthesisRequest,
-} from "openclaw/plugin-sdk/speech-core";
+import type { SpeechProviderPlugin, SpeechSynthesisRequest } from "openclaw/plugin-sdk/speech-core";
 import { afterEach, describe, expect, it, vi } from "vitest";

 type MockSpeechSynthesisResult = Awaited<ReturnType<SpeechProviderPlugin["synthesize"]>>;
@@ -53,23 +50,38 @@ vi.mock("../api.js", async () => {
  };
 });

-const { maybeApplyTtsToPayload } = await import("./tts.js");
+const { _test, maybeApplyTtsToPayload } = await import("./tts.js");

-describe("speech-core Discord voice-note routing", () => {
+const nativeVoiceNoteChannels = ["discord", "feishu", "matrix", "telegram", "whatsapp"] as const;
+
+function createTtsConfig(prefsName: string): OpenClawConfig {
+  return {
+    messages: {
+      tts: {
+        enabled: true,
+        provider: "mock",
+        prefsPath: `/tmp/${prefsName}.json`,
+      },
+    },
+  };
+}
+
+describe("speech-core native voice-note routing", () => {
  afterEach(() => {
    synthesizeMock.mockClear();
  });

+  it("keeps native voice-note channel support centralized", () => {
+    for (const channel of nativeVoiceNoteChannels) {
+      expect(_test.supportsNativeVoiceNoteTts(channel)).toBe(true);
+      expect(_test.supportsNativeVoiceNoteTts(channel.toUpperCase())).toBe(true);
+    }
+    expect(_test.supportsNativeVoiceNoteTts("slack")).toBe(false);
+    expect(_test.supportsNativeVoiceNoteTts(undefined)).toBe(false);
+  });
+
  it("marks Discord auto TTS replies as native voice messages", async () => {
-    const cfg: OpenClawConfig = {
-      messages: {
-        tts: {
-          enabled: true,
-          provider: "mock",
-          prefsPath: "/tmp/openclaw-speech-core-tts-test.json",
-        },
-      },
-    };
+    const cfg = createTtsConfig("openclaw-speech-core-tts-test");
    const payload: ReplyPayload = {
      text: "This Discord reply should be delivered as a native voice note.",
    };
@@ -96,4 +108,33 @@ describe("speech-core Discord voice-note routing", () => {
      }
    }
  });
+
+  it("keeps non-native voice-note channels as regular audio files", async () => {
+    const cfg = createTtsConfig("openclaw-speech-core-tts-slack-test");
+    const payload: ReplyPayload = {
+      text: "Slack replies should be delivered as regular audio attachments.",
+    };
+
+    let mediaDir: string | undefined;
+    try {
+      const result = await maybeApplyTtsToPayload({
+        payload,
+        cfg,
+        channel: "slack",
+        kind: "final",
+      });
+
+      expect(synthesizeMock).toHaveBeenCalledWith(
+        expect.objectContaining({ target: "audio-file" }),
+      );
+      expect(result.audioAsVoice).toBeUndefined();
+      expect(result.mediaUrl).toMatch(/voice-\d+\.ogg$/);
+
+      mediaDir = result.mediaUrl ? path.dirname(result.mediaUrl) : undefined;
+    } finally {
+      if (mediaDir) {
+        rmSync(mediaDir, { recursive: true, force: true });
+      }
+    }
+  });
 });
--- a/extensions/speech-core/src/tts.ts
+++ b/extensions/speech-core/src/tts.ts
@@ -599,6 +599,11 @@ function resolveChannelId(channel: string | undefined): ChannelId | null {
  return channel ? normalizeChannelId(channel) : null;
 }

+function supportsNativeVoiceNoteTts(channel: string | undefined): boolean {
+  const channelId = resolveChannelId(channel);
+  return channelId !== null && OPUS_CHANNELS.has(channelId);
+}
+
 export function resolveTtsProviderOrder(primary: TtsProvider, cfg?: OpenClawConfig): TtsProvider[] {
  const normalizedPrimary = canonicalizeSpeechProviderId(primary, cfg) ?? primary;
  const ordered = new Set<TtsProvider>([normalizedPrimary]);
@@ -807,8 +812,7 @@ export async function synthesizeSpeech(params: {
  }

  const { config, providers } = setup;
-  const channelId = resolveChannelId(params.channel);
-  const target = channelId && OPUS_CHANNELS.has(channelId) ? "voice-note" : "audio-file";
+  const target = supportsNativeVoiceNoteTts(params.channel) ? "voice-note" : "audio-file";

  const errors: string[] = [];
  const attemptedProviders: string[] = [];
@@ -1161,9 +1165,8 @@ export async function maybeApplyTtsToPayload(params: {
      latencyMs: result.latencyMs,
    };

-    const channelId = resolveChannelId(params.channel);
    const shouldVoice =
-      channelId !== null && OPUS_CHANNELS.has(channelId) && result.voiceCompatible === true;
+      supportsNativeVoiceNoteTts(params.channel) && result.voiceCompatible === true;
    return {
      ...nextPayload,
      mediaUrl: result.audioPath,
@@ -1189,6 +1192,7 @@ export async function maybeApplyTtsToPayload(params: {
 export const _test = {
  parseTtsDirectives,
  resolveModelOverridePolicy,
+  supportsNativeVoiceNoteTts,
  summarizeText,
  getResolvedSpeechProviderConfig,
  formatTtsProviderError,