mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-18 08:34:46 +00:00
fix(webchat): forward trustedLocalMedia on accumulated block TTS tail
Avoid per-block final-mode synthesis (duplicate with dispatch tail). Mark TTS output as trusted local media and pass the flag through the TTS-only final payload WebChat consumes after block streaming. Fixes #82628 Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
committed by
Peter Steinberger
parent
f8323f8636
commit
eec18fccb4
@@ -442,15 +442,19 @@ describe("speech-core native voice-note routing", () => {
|
||||
}
|
||||
});
|
||||
|
||||
it("applies TTS for block delivery kind in final mode (#82628)", async () => {
|
||||
await expectTtsPayloadResult({
|
||||
it("skips block delivery kind in final mode (accumulated final tail synthesizes instead)", async () => {
|
||||
synthesizeMock.mockClear();
|
||||
const cfg = createTtsConfig("openclaw-speech-core-block-kind-tts-test");
|
||||
const result = await maybeApplyTtsToPayload({
|
||||
payload: { text: "WebChat block stream chunks defer TTS to the final tail." },
|
||||
cfg,
|
||||
channel: "webchat",
|
||||
prefsName: "openclaw-speech-core-block-kind-tts-test",
|
||||
text: "WebChat block replies should synthesize audio for auto TTS.",
|
||||
target: "audio-file",
|
||||
audioAsVoice: undefined,
|
||||
kind: "block",
|
||||
});
|
||||
|
||||
expect(synthesizeMock).not.toHaveBeenCalled();
|
||||
expect(result.trustedLocalMedia).toBeUndefined();
|
||||
expect(result.text).toBe("WebChat block stream chunks defer TTS to the final tail.");
|
||||
});
|
||||
|
||||
it("skips tool delivery kind in final mode", async () => {
|
||||
|
||||
@@ -1759,7 +1759,7 @@ export async function maybeApplyTtsToPayload(params: {
|
||||
}
|
||||
|
||||
const mode = config.mode ?? "final";
|
||||
if (mode === "final" && params.kind && params.kind !== "final" && params.kind !== "block") {
|
||||
if (mode === "final" && params.kind && params.kind !== "final") {
|
||||
return nextPayload;
|
||||
}
|
||||
|
||||
|
||||
@@ -62,52 +62,62 @@ async function main() {
|
||||
},
|
||||
};
|
||||
|
||||
const blockText = "WebChat block replies should synthesize audio for auto TTS.";
|
||||
const accumulatedBlockText =
|
||||
"WebChat streams block text; dispatch synthesizes one TTS tail with kind final.";
|
||||
const blockResult = await maybeApplyTtsToPayload({
|
||||
payload: { text: blockText },
|
||||
payload: { text: accumulatedBlockText },
|
||||
cfg,
|
||||
channel: "webchat",
|
||||
kind: "block",
|
||||
});
|
||||
console.log("maybeApplyTtsToPayload(kind=block).mediaUrl =", blockResult.mediaUrl ?? "(none)");
|
||||
console.log(
|
||||
"maybeApplyTtsToPayload(kind=block).trustedLocalMedia =",
|
||||
blockResult.trustedLocalMedia ?? false,
|
||||
);
|
||||
|
||||
const toolResult = await maybeApplyTtsToPayload({
|
||||
payload: { text: "Intermediate tool output should not be spoken." },
|
||||
const tailResult = await maybeApplyTtsToPayload({
|
||||
payload: { text: accumulatedBlockText },
|
||||
cfg,
|
||||
channel: "webchat",
|
||||
kind: "tool",
|
||||
kind: "final",
|
||||
});
|
||||
console.log("maybeApplyTtsToPayload(kind=tool).mediaUrl =", toolResult.mediaUrl ?? "(none)");
|
||||
|
||||
const mediaPath = blockResult.mediaUrl;
|
||||
if (!mediaPath || !fs.existsSync(mediaPath)) {
|
||||
throw new Error("expected block TTS to write a local media file");
|
||||
}
|
||||
const localRoots = [path.dirname(mediaPath)];
|
||||
const trustedBlocks = await buildWebchatAudioContentBlocksFromReplyPayloads(
|
||||
[{ mediaUrl: mediaPath, trustedLocalMedia: true }],
|
||||
{ localRoots },
|
||||
console.log("maybeApplyTtsToPayload(kind=final).mediaUrl =", tailResult.mediaUrl ?? "(none)");
|
||||
console.log(
|
||||
"maybeApplyTtsToPayload(kind=final).trustedLocalMedia =",
|
||||
tailResult.trustedLocalMedia ?? false,
|
||||
);
|
||||
|
||||
const mediaPath = tailResult.mediaUrl;
|
||||
if (!mediaPath || !fs.existsSync(mediaPath)) {
|
||||
throw new Error("expected final-mode tail TTS to write a local media file");
|
||||
}
|
||||
|
||||
const ttsOnlyPayload = {
|
||||
mediaUrl: tailResult.mediaUrl,
|
||||
audioAsVoice: tailResult.audioAsVoice,
|
||||
spokenText: accumulatedBlockText,
|
||||
trustedLocalMedia: tailResult.trustedLocalMedia,
|
||||
};
|
||||
console.log(
|
||||
"dispatch ttsOnlyPayload.trustedLocalMedia =",
|
||||
ttsOnlyPayload.trustedLocalMedia ?? false,
|
||||
);
|
||||
|
||||
const localRoots = [path.dirname(mediaPath)];
|
||||
const trustedBlocks = await buildWebchatAudioContentBlocksFromReplyPayloads([ttsOnlyPayload], {
|
||||
localRoots,
|
||||
});
|
||||
const untrustedBlocks = await buildWebchatAudioContentBlocksFromReplyPayloads(
|
||||
[{ mediaUrl: mediaPath }],
|
||||
{ localRoots },
|
||||
);
|
||||
console.log(
|
||||
"buildWebchatAudioContentBlocksFromReplyPayloads(trustedLocalMedia=true).length =",
|
||||
"buildWebchatAudioContentBlocksFromReplyPayloads(ttsOnlyPayload).length =",
|
||||
trustedBlocks.length,
|
||||
);
|
||||
console.log(
|
||||
"buildWebchatAudioContentBlocksFromReplyPayloads(trustedLocalMedia missing).length =",
|
||||
"buildWebchatAudioContentBlocksFromReplyPayloads(untrusted).length =",
|
||||
untrustedBlocks.length,
|
||||
);
|
||||
|
||||
if (blockResult.mediaUrl) {
|
||||
fs.rmSync(path.dirname(blockResult.mediaUrl), { recursive: true, force: true });
|
||||
}
|
||||
fs.rmSync(path.dirname(mediaPath), { recursive: true, force: true });
|
||||
try {
|
||||
fs.unlinkSync(prefsPath);
|
||||
} catch {
|
||||
|
||||
@@ -153,6 +153,7 @@ const ttsMocks = vi.hoisted(() => {
|
||||
...params.payload,
|
||||
mediaUrl: "https://example.com/tts-synth.opus",
|
||||
audioAsVoice: true,
|
||||
trustedLocalMedia: true,
|
||||
};
|
||||
}
|
||||
return params.payload;
|
||||
@@ -2722,6 +2723,7 @@ describe("dispatchReplyFromConfig", () => {
|
||||
expect(finalPayload?.mediaUrls).toStrictEqual(["/tmp/openclaw-media/normalized-tts.ogg"]);
|
||||
expect(finalPayload?.audioAsVoice).toBe(true);
|
||||
expect(finalPayload?.spokenText).toBe("Hello from block streaming.");
|
||||
expect(finalPayload?.trustedLocalMedia).toBe(true);
|
||||
});
|
||||
|
||||
it("closes oneshot ACP sessions after the turn completes", async () => {
|
||||
|
||||
@@ -1700,6 +1700,7 @@ export async function dispatchReplyFromConfig(
|
||||
mediaUrl: ttsSyntheticReply.mediaUrl,
|
||||
audioAsVoice: ttsSyntheticReply.audioAsVoice,
|
||||
spokenText: accumulatedBlockTtsText,
|
||||
trustedLocalMedia: ttsSyntheticReply.trustedLocalMedia,
|
||||
};
|
||||
const normalizedTtsOnlyPayload = await normalizeReplyMediaPayload(ttsOnlyPayload);
|
||||
const result = await routeReplyToOriginating(normalizedTtsOnlyPayload);
|
||||
|
||||
Reference in New Issue
Block a user