speech-core: route Discord auto TTS as voice notes

2026-06-16 20:23:05 +00:00 · 2026-04-10 13:07:56 +08:00
parent 6286810388
commit b3d7fd166a
2 changed files with 99 additions and 1 deletions
--- a/extensions/speech-core/src/tts.test.ts
+++ b/extensions/speech-core/src/tts.test.ts
@@ -0,0 +1,98 @@
+import { rmSync } from "node:fs";
+import path from "node:path";
+import type { OpenClawConfig } from "openclaw/plugin-sdk/config-runtime";
+import type { ReplyPayload } from "openclaw/plugin-sdk/reply-runtime";
+import type {
+  SpeechProviderPlugin,
+  SpeechSynthesisRequest,
+  SpeechSynthesisResult,
+} from "openclaw/plugin-sdk/speech-core";
+import { afterEach, describe, expect, it, vi } from "vitest";
+
+const synthesizeMock = vi.hoisted(() =>
+  vi.fn(
+    async (request: SpeechSynthesisRequest): Promise<SpeechSynthesisResult> => ({
+      audioBuffer: Buffer.from("voice"),
+      fileExtension: ".ogg",
+      outputFormat: "ogg",
+      voiceCompatible: request.target === "voice-note",
+    }),
+  ),
+);
+
+const listSpeechProvidersMock = vi.hoisted(() => vi.fn());
+const getSpeechProviderMock = vi.hoisted(() => vi.fn());
+
+vi.mock("openclaw/plugin-sdk/channel-targets", () => ({
+  normalizeChannelId: (channel: string | undefined) => channel?.trim().toLowerCase() ?? null,
+}));
+
+vi.mock("../api.js", async () => {
+  const actual = await vi.importActual<typeof import("../api.js")>("../api.js");
+  const mockProvider: SpeechProviderPlugin = {
+    id: "mock",
+    label: "Mock",
+    autoSelectOrder: 1,
+    isConfigured: () => true,
+    synthesize: synthesizeMock,
+  };
+  listSpeechProvidersMock.mockImplementation(() => [mockProvider]);
+  getSpeechProviderMock.mockImplementation((providerId: string) =>
+    providerId === "mock" ? mockProvider : null,
+  );
+  return {
+    ...actual,
+    canonicalizeSpeechProviderId: (providerId: string | undefined) =>
+      providerId?.trim().toLowerCase() || undefined,
+    normalizeSpeechProviderId: (providerId: string | undefined) =>
+      providerId?.trim().toLowerCase() || undefined,
+    getSpeechProvider: getSpeechProviderMock,
+    listSpeechProviders: listSpeechProvidersMock,
+    scheduleCleanup: vi.fn(),
+  };
+});
+
+const { maybeApplyTtsToPayload } = await import("./tts.js");
+
+describe("speech-core Discord voice-note routing", () => {
+  afterEach(() => {
+    synthesizeMock.mockClear();
+  });
+
+  it("marks Discord auto TTS replies as native voice messages", async () => {
+    const cfg: OpenClawConfig = {
+      messages: {
+        tts: {
+          enabled: true,
+          provider: "mock",
+          prefsPath: "/tmp/openclaw-speech-core-tts-test.json",
+        },
+      },
+    };
+    const payload: ReplyPayload = {
+      text: "This Discord reply should be delivered as a native voice note.",
+    };
+
+    let mediaDir: string | undefined;
+    try {
+      const result = await maybeApplyTtsToPayload({
+        payload,
+        cfg,
+        channel: "discord",
+        kind: "final",
+      });
+
+      expect(synthesizeMock).toHaveBeenCalledWith(
+        expect.objectContaining({ target: "voice-note" }),
+      );
+      expect(result.audioAsVoice).toBe(true);
+      expect(result.mediaUrl).toMatch(/voice-\d+\.ogg$/);
+
+      mediaDir = result.mediaUrl ? path.dirname(result.mediaUrl) : undefined;
+    } finally {
+      if (mediaDir) {
+        rmSync(mediaDir, { recursive: true, force: true });
+      }
+    }
+  });
+});
--- a/extensions/speech-core/src/tts.ts
+++ b/extensions/speech-core/src/tts.ts
@@ -593,7 +593,7 @@ export function setLastTtsAttempt(entry: TtsStatusEntry | undefined): void {
  lastTtsAttempt = entry;
 }

-const OPUS_CHANNELS = new Set(["telegram", "feishu", "whatsapp", "matrix"]);
+const OPUS_CHANNELS = new Set(["telegram", "feishu", "whatsapp", "matrix", "discord"]);

 function resolveChannelId(channel: string | undefined): ChannelId | null {
  return channel ? normalizeChannelId(channel) : null;