refactor: centralize speech voice-note channel routing

This commit is contained in:
Peter Steinberger
2026-04-10 14:59:54 +01:00
parent 77bdf2f44d
commit f621fb4aba
2 changed files with 64 additions and 19 deletions

View File

@@ -2,10 +2,7 @@ import { rmSync } from "node:fs";
import path from "node:path";
import type { OpenClawConfig } from "openclaw/plugin-sdk/config-runtime";
import type { ReplyPayload } from "openclaw/plugin-sdk/reply-runtime";
import type {
SpeechProviderPlugin,
SpeechSynthesisRequest,
} from "openclaw/plugin-sdk/speech-core";
import type { SpeechProviderPlugin, SpeechSynthesisRequest } from "openclaw/plugin-sdk/speech-core";
import { afterEach, describe, expect, it, vi } from "vitest";
type MockSpeechSynthesisResult = Awaited<ReturnType<SpeechProviderPlugin["synthesize"]>>;
@@ -53,23 +50,38 @@ vi.mock("../api.js", async () => {
};
});
const { maybeApplyTtsToPayload } = await import("./tts.js");
const { _test, maybeApplyTtsToPayload } = await import("./tts.js");
describe("speech-core Discord voice-note routing", () => {
const nativeVoiceNoteChannels = ["discord", "feishu", "matrix", "telegram", "whatsapp"] as const;
function createTtsConfig(prefsName: string): OpenClawConfig {
return {
messages: {
tts: {
enabled: true,
provider: "mock",
prefsPath: `/tmp/${prefsName}.json`,
},
},
};
}
describe("speech-core native voice-note routing", () => {
afterEach(() => {
synthesizeMock.mockClear();
});
it("keeps native voice-note channel support centralized", () => {
for (const channel of nativeVoiceNoteChannels) {
expect(_test.supportsNativeVoiceNoteTts(channel)).toBe(true);
expect(_test.supportsNativeVoiceNoteTts(channel.toUpperCase())).toBe(true);
}
expect(_test.supportsNativeVoiceNoteTts("slack")).toBe(false);
expect(_test.supportsNativeVoiceNoteTts(undefined)).toBe(false);
});
it("marks Discord auto TTS replies as native voice messages", async () => {
const cfg: OpenClawConfig = {
messages: {
tts: {
enabled: true,
provider: "mock",
prefsPath: "/tmp/openclaw-speech-core-tts-test.json",
},
},
};
const cfg = createTtsConfig("openclaw-speech-core-tts-test");
const payload: ReplyPayload = {
text: "This Discord reply should be delivered as a native voice note.",
};
@@ -96,4 +108,33 @@ describe("speech-core Discord voice-note routing", () => {
}
}
});
it("keeps non-native voice-note channels as regular audio files", async () => {
const cfg = createTtsConfig("openclaw-speech-core-tts-slack-test");
const payload: ReplyPayload = {
text: "Slack replies should be delivered as regular audio attachments.",
};
let mediaDir: string | undefined;
try {
const result = await maybeApplyTtsToPayload({
payload,
cfg,
channel: "slack",
kind: "final",
});
expect(synthesizeMock).toHaveBeenCalledWith(
expect.objectContaining({ target: "audio-file" }),
);
expect(result.audioAsVoice).toBeUndefined();
expect(result.mediaUrl).toMatch(/voice-\d+\.ogg$/);
mediaDir = result.mediaUrl ? path.dirname(result.mediaUrl) : undefined;
} finally {
if (mediaDir) {
rmSync(mediaDir, { recursive: true, force: true });
}
}
});
});

View File

@@ -599,6 +599,11 @@ function resolveChannelId(channel: string | undefined): ChannelId | null {
return channel ? normalizeChannelId(channel) : null;
}
function supportsNativeVoiceNoteTts(channel: string | undefined): boolean {
const channelId = resolveChannelId(channel);
return channelId !== null && OPUS_CHANNELS.has(channelId);
}
export function resolveTtsProviderOrder(primary: TtsProvider, cfg?: OpenClawConfig): TtsProvider[] {
const normalizedPrimary = canonicalizeSpeechProviderId(primary, cfg) ?? primary;
const ordered = new Set<TtsProvider>([normalizedPrimary]);
@@ -807,8 +812,7 @@ export async function synthesizeSpeech(params: {
}
const { config, providers } = setup;
const channelId = resolveChannelId(params.channel);
const target = channelId && OPUS_CHANNELS.has(channelId) ? "voice-note" : "audio-file";
const target = supportsNativeVoiceNoteTts(params.channel) ? "voice-note" : "audio-file";
const errors: string[] = [];
const attemptedProviders: string[] = [];
@@ -1161,9 +1165,8 @@ export async function maybeApplyTtsToPayload(params: {
latencyMs: result.latencyMs,
};
const channelId = resolveChannelId(params.channel);
const shouldVoice =
channelId !== null && OPUS_CHANNELS.has(channelId) && result.voiceCompatible === true;
supportsNativeVoiceNoteTts(params.channel) && result.voiceCompatible === true;
return {
...nextPayload,
mediaUrl: result.audioPath,
@@ -1189,6 +1192,7 @@ export async function maybeApplyTtsToPayload(params: {
export const _test = {
parseTtsDirectives,
resolveModelOverridePolicy,
supportsNativeVoiceNoteTts,
summarizeText,
getResolvedSpeechProviderConfig,
formatTtsProviderError,