mirror of
https://github.com/openclaw/openclaw.git
synced 2026-04-12 09:41:11 +00:00
refactor: centralize speech voice-note channel routing
This commit is contained in:
@@ -2,10 +2,7 @@ import { rmSync } from "node:fs";
|
||||
import path from "node:path";
|
||||
import type { OpenClawConfig } from "openclaw/plugin-sdk/config-runtime";
|
||||
import type { ReplyPayload } from "openclaw/plugin-sdk/reply-runtime";
|
||||
import type {
|
||||
SpeechProviderPlugin,
|
||||
SpeechSynthesisRequest,
|
||||
} from "openclaw/plugin-sdk/speech-core";
|
||||
import type { SpeechProviderPlugin, SpeechSynthesisRequest } from "openclaw/plugin-sdk/speech-core";
|
||||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
|
||||
type MockSpeechSynthesisResult = Awaited<ReturnType<SpeechProviderPlugin["synthesize"]>>;
|
||||
@@ -53,23 +50,38 @@ vi.mock("../api.js", async () => {
|
||||
};
|
||||
});
|
||||
|
||||
const { maybeApplyTtsToPayload } = await import("./tts.js");
|
||||
const { _test, maybeApplyTtsToPayload } = await import("./tts.js");
|
||||
|
||||
describe("speech-core Discord voice-note routing", () => {
|
||||
const nativeVoiceNoteChannels = ["discord", "feishu", "matrix", "telegram", "whatsapp"] as const;
|
||||
|
||||
function createTtsConfig(prefsName: string): OpenClawConfig {
|
||||
return {
|
||||
messages: {
|
||||
tts: {
|
||||
enabled: true,
|
||||
provider: "mock",
|
||||
prefsPath: `/tmp/${prefsName}.json`,
|
||||
},
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
describe("speech-core native voice-note routing", () => {
|
||||
afterEach(() => {
|
||||
synthesizeMock.mockClear();
|
||||
});
|
||||
|
||||
it("keeps native voice-note channel support centralized", () => {
|
||||
for (const channel of nativeVoiceNoteChannels) {
|
||||
expect(_test.supportsNativeVoiceNoteTts(channel)).toBe(true);
|
||||
expect(_test.supportsNativeVoiceNoteTts(channel.toUpperCase())).toBe(true);
|
||||
}
|
||||
expect(_test.supportsNativeVoiceNoteTts("slack")).toBe(false);
|
||||
expect(_test.supportsNativeVoiceNoteTts(undefined)).toBe(false);
|
||||
});
|
||||
|
||||
it("marks Discord auto TTS replies as native voice messages", async () => {
|
||||
const cfg: OpenClawConfig = {
|
||||
messages: {
|
||||
tts: {
|
||||
enabled: true,
|
||||
provider: "mock",
|
||||
prefsPath: "/tmp/openclaw-speech-core-tts-test.json",
|
||||
},
|
||||
},
|
||||
};
|
||||
const cfg = createTtsConfig("openclaw-speech-core-tts-test");
|
||||
const payload: ReplyPayload = {
|
||||
text: "This Discord reply should be delivered as a native voice note.",
|
||||
};
|
||||
@@ -96,4 +108,33 @@ describe("speech-core Discord voice-note routing", () => {
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
it("keeps non-native voice-note channels as regular audio files", async () => {
|
||||
const cfg = createTtsConfig("openclaw-speech-core-tts-slack-test");
|
||||
const payload: ReplyPayload = {
|
||||
text: "Slack replies should be delivered as regular audio attachments.",
|
||||
};
|
||||
|
||||
let mediaDir: string | undefined;
|
||||
try {
|
||||
const result = await maybeApplyTtsToPayload({
|
||||
payload,
|
||||
cfg,
|
||||
channel: "slack",
|
||||
kind: "final",
|
||||
});
|
||||
|
||||
expect(synthesizeMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({ target: "audio-file" }),
|
||||
);
|
||||
expect(result.audioAsVoice).toBeUndefined();
|
||||
expect(result.mediaUrl).toMatch(/voice-\d+\.ogg$/);
|
||||
|
||||
mediaDir = result.mediaUrl ? path.dirname(result.mediaUrl) : undefined;
|
||||
} finally {
|
||||
if (mediaDir) {
|
||||
rmSync(mediaDir, { recursive: true, force: true });
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
@@ -599,6 +599,11 @@ function resolveChannelId(channel: string | undefined): ChannelId | null {
|
||||
return channel ? normalizeChannelId(channel) : null;
|
||||
}
|
||||
|
||||
function supportsNativeVoiceNoteTts(channel: string | undefined): boolean {
|
||||
const channelId = resolveChannelId(channel);
|
||||
return channelId !== null && OPUS_CHANNELS.has(channelId);
|
||||
}
|
||||
|
||||
export function resolveTtsProviderOrder(primary: TtsProvider, cfg?: OpenClawConfig): TtsProvider[] {
|
||||
const normalizedPrimary = canonicalizeSpeechProviderId(primary, cfg) ?? primary;
|
||||
const ordered = new Set<TtsProvider>([normalizedPrimary]);
|
||||
@@ -807,8 +812,7 @@ export async function synthesizeSpeech(params: {
|
||||
}
|
||||
|
||||
const { config, providers } = setup;
|
||||
const channelId = resolveChannelId(params.channel);
|
||||
const target = channelId && OPUS_CHANNELS.has(channelId) ? "voice-note" : "audio-file";
|
||||
const target = supportsNativeVoiceNoteTts(params.channel) ? "voice-note" : "audio-file";
|
||||
|
||||
const errors: string[] = [];
|
||||
const attemptedProviders: string[] = [];
|
||||
@@ -1161,9 +1165,8 @@ export async function maybeApplyTtsToPayload(params: {
|
||||
latencyMs: result.latencyMs,
|
||||
};
|
||||
|
||||
const channelId = resolveChannelId(params.channel);
|
||||
const shouldVoice =
|
||||
channelId !== null && OPUS_CHANNELS.has(channelId) && result.voiceCompatible === true;
|
||||
supportsNativeVoiceNoteTts(params.channel) && result.voiceCompatible === true;
|
||||
return {
|
||||
...nextPayload,
|
||||
mediaUrl: result.audioPath,
|
||||
@@ -1189,6 +1192,7 @@ export async function maybeApplyTtsToPayload(params: {
|
||||
export const _test = {
|
||||
parseTtsDirectives,
|
||||
resolveModelOverridePolicy,
|
||||
supportsNativeVoiceNoteTts,
|
||||
summarizeText,
|
||||
getResolvedSpeechProviderConfig,
|
||||
formatTtsProviderError,
|
||||
|
||||
Reference in New Issue
Block a user