fix(tts): honor short tagged speech

This commit is contained in:
Peter Steinberger
2026-05-02 09:24:31 +01:00
parent d02448696c
commit 5f6adaf157
3 changed files with 71 additions and 3 deletions

View File

@@ -30,6 +30,7 @@ Docs: https://docs.openclaw.ai
### Fixes
- TTS: honor explicit short `[[tts:text]]...[[/tts:text]]` blocks while keeping untagged short auto-TTS suppressed, so tagged voice replies are synthesized instead of being dropped as empty voice-only payloads. Fixes #73758. Thanks @yfge.
- Proxy/audio: convert standard `FormData` bodies before proxy-backed undici fetches, so audio transcription and multipart uploads no longer send `[object FormData]` when `HTTP_PROXY` or `HTTPS_PROXY` is configured. Fixes #48554. Thanks @dco5.
- Gateway/diagnostics: include a bounded redacted startup error message in stability bundles, so crash-loop reports identify the failing plugin or contract without exposing secrets. Refs #75797. Thanks @ymebosma.
- Gateway/pricing: abort in-flight model pricing catalog fetches when Gateway shutdown stops the refresh loop, and avoid post-stop cache writes or refresh timers. Fixes #72208. Thanks @rzcq.

View File

@@ -388,6 +388,69 @@ describe("speech-core native voice-note routing", () => {
});
});
it("synthesizes explicitly tagged short hidden TTS text", async () => {
const cfg = createTtsConfig("openclaw-speech-core-short-hidden-tts-test");
let mediaDir: string | undefined;
try {
const result = await maybeApplyTtsToPayload({
payload: {
text: "[[tts:text]]hello[[/tts:text]]",
audioAsVoice: true,
},
cfg,
channel: "telegram",
kind: "final",
});
expect(synthesizeMock).toHaveBeenCalledWith(expect.objectContaining({ text: "hello" }));
expect(result.mediaUrl).toMatch(/voice-\d+\.ogg$/);
expect(result.audioAsVoice).toBe(true);
expect(result.text).toBeUndefined();
mediaDir = result.mediaUrl ? path.dirname(result.mediaUrl) : undefined;
} finally {
if (mediaDir) {
rmSync(mediaDir, { recursive: true, force: true });
}
}
});
it("keeps skipping untagged short TTS text", async () => {
const cfg = createTtsConfig("openclaw-speech-core-short-plain-tts-test");
const result = await maybeApplyTtsToPayload({
payload: {
text: "hello",
audioAsVoice: true,
},
cfg,
channel: "telegram",
kind: "final",
});
expect(synthesizeMock).not.toHaveBeenCalled();
expect(result).toEqual({
text: "hello",
audioAsVoice: true,
});
});
it("keeps skipping explicit tagged TTS text that strips to empty markdown", async () => {
const cfg = createTtsConfig("openclaw-speech-core-empty-hidden-tts-test");
const result = await maybeApplyTtsToPayload({
payload: {
text: "[[tts:text]]***[[/tts:text]]",
audioAsVoice: true,
},
cfg,
channel: "telegram",
kind: "final",
});
expect(synthesizeMock).not.toHaveBeenCalled();
expect(result).toEqual({
audioAsVoice: true,
});
});
it("selects persona preferred provider before config fallback", () => {
const cfg: OpenClawConfig = {
messages: {

View File

@@ -1527,7 +1527,8 @@ export async function maybeApplyTtsToPayload(params: {
const cleanedText = directives.cleanedText;
const trimmedCleaned = cleanedText.trim();
const visibleText = trimmedCleaned.length > 0 ? trimmedCleaned : "";
const ttsText = directives.ttsText?.trim() || visibleText;
const explicitTtsText = directives.ttsText?.trim() || "";
const ttsText = explicitTtsText || visibleText;
const nextPayload =
visibleText === text.trim()
@@ -1558,7 +1559,7 @@ export async function maybeApplyTtsToPayload(params: {
if (text.includes("MEDIA:")) {
return nextPayload;
}
if (ttsText.trim().length < 10) {
if (!explicitTtsText && ttsText.trim().length < 10) {
return nextPayload;
}
@@ -1598,7 +1599,10 @@ export async function maybeApplyTtsToPayload(params: {
}
textForAudio = stripMarkdown(textForAudio).trim();
if (textForAudio.length < 10) {
if (!textForAudio) {
return nextPayload;
}
if (!explicitTtsText && textForAudio.length < 10) {
return nextPayload;
}