mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 06:30:42 +00:00
fix(tts): honor short tagged speech
This commit is contained in:
@@ -30,6 +30,7 @@ Docs: https://docs.openclaw.ai
|
||||
|
||||
### Fixes
|
||||
|
||||
- TTS: honor explicit short `[[tts:text]]...[[/tts:text]]` blocks while keeping untagged short auto-TTS suppressed, so tagged voice replies are synthesized instead of being dropped as empty voice-only payloads. Fixes #73758. Thanks @yfge.
|
||||
- Proxy/audio: convert standard `FormData` bodies before proxy-backed undici fetches, so audio transcription and multipart uploads no longer send `[object FormData]` when `HTTP_PROXY` or `HTTPS_PROXY` is configured. Fixes #48554. Thanks @dco5.
|
||||
- Gateway/diagnostics: include a bounded redacted startup error message in stability bundles, so crash-loop reports identify the failing plugin or contract without exposing secrets. Refs #75797. Thanks @ymebosma.
|
||||
- Gateway/pricing: abort in-flight model pricing catalog fetches when Gateway shutdown stops the refresh loop, and avoid post-stop cache writes or refresh timers. Fixes #72208. Thanks @rzcq.
|
||||
|
||||
@@ -388,6 +388,69 @@ describe("speech-core native voice-note routing", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("synthesizes explicitly tagged short hidden TTS text", async () => {
|
||||
const cfg = createTtsConfig("openclaw-speech-core-short-hidden-tts-test");
|
||||
let mediaDir: string | undefined;
|
||||
try {
|
||||
const result = await maybeApplyTtsToPayload({
|
||||
payload: {
|
||||
text: "[[tts:text]]hello[[/tts:text]]",
|
||||
audioAsVoice: true,
|
||||
},
|
||||
cfg,
|
||||
channel: "telegram",
|
||||
kind: "final",
|
||||
});
|
||||
|
||||
expect(synthesizeMock).toHaveBeenCalledWith(expect.objectContaining({ text: "hello" }));
|
||||
expect(result.mediaUrl).toMatch(/voice-\d+\.ogg$/);
|
||||
expect(result.audioAsVoice).toBe(true);
|
||||
expect(result.text).toBeUndefined();
|
||||
mediaDir = result.mediaUrl ? path.dirname(result.mediaUrl) : undefined;
|
||||
} finally {
|
||||
if (mediaDir) {
|
||||
rmSync(mediaDir, { recursive: true, force: true });
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
it("keeps skipping untagged short TTS text", async () => {
|
||||
const cfg = createTtsConfig("openclaw-speech-core-short-plain-tts-test");
|
||||
const result = await maybeApplyTtsToPayload({
|
||||
payload: {
|
||||
text: "hello",
|
||||
audioAsVoice: true,
|
||||
},
|
||||
cfg,
|
||||
channel: "telegram",
|
||||
kind: "final",
|
||||
});
|
||||
|
||||
expect(synthesizeMock).not.toHaveBeenCalled();
|
||||
expect(result).toEqual({
|
||||
text: "hello",
|
||||
audioAsVoice: true,
|
||||
});
|
||||
});
|
||||
|
||||
it("keeps skipping explicit tagged TTS text that strips to empty markdown", async () => {
|
||||
const cfg = createTtsConfig("openclaw-speech-core-empty-hidden-tts-test");
|
||||
const result = await maybeApplyTtsToPayload({
|
||||
payload: {
|
||||
text: "[[tts:text]]***[[/tts:text]]",
|
||||
audioAsVoice: true,
|
||||
},
|
||||
cfg,
|
||||
channel: "telegram",
|
||||
kind: "final",
|
||||
});
|
||||
|
||||
expect(synthesizeMock).not.toHaveBeenCalled();
|
||||
expect(result).toEqual({
|
||||
audioAsVoice: true,
|
||||
});
|
||||
});
|
||||
|
||||
it("selects persona preferred provider before config fallback", () => {
|
||||
const cfg: OpenClawConfig = {
|
||||
messages: {
|
||||
|
||||
@@ -1527,7 +1527,8 @@ export async function maybeApplyTtsToPayload(params: {
|
||||
const cleanedText = directives.cleanedText;
|
||||
const trimmedCleaned = cleanedText.trim();
|
||||
const visibleText = trimmedCleaned.length > 0 ? trimmedCleaned : "";
|
||||
const ttsText = directives.ttsText?.trim() || visibleText;
|
||||
const explicitTtsText = directives.ttsText?.trim() || "";
|
||||
const ttsText = explicitTtsText || visibleText;
|
||||
|
||||
const nextPayload =
|
||||
visibleText === text.trim()
|
||||
@@ -1558,7 +1559,7 @@ export async function maybeApplyTtsToPayload(params: {
|
||||
if (text.includes("MEDIA:")) {
|
||||
return nextPayload;
|
||||
}
|
||||
if (ttsText.trim().length < 10) {
|
||||
if (!explicitTtsText && ttsText.trim().length < 10) {
|
||||
return nextPayload;
|
||||
}
|
||||
|
||||
@@ -1598,7 +1599,10 @@ export async function maybeApplyTtsToPayload(params: {
|
||||
}
|
||||
|
||||
textForAudio = stripMarkdown(textForAudio).trim();
|
||||
if (textForAudio.length < 10) {
|
||||
if (!textForAudio) {
|
||||
return nextPayload;
|
||||
}
|
||||
if (!explicitTtsText && textForAudio.length < 10) {
|
||||
return nextPayload;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user