fix(tts): keep media-only no-reply payloads

This commit is contained in:
Peter Steinberger
2026-04-25 18:04:44 +01:00
parent f3ba962fd0
commit 39343088ed
10 changed files with 94 additions and 9 deletions

View File

@@ -28,6 +28,9 @@ Docs: https://docs.openclaw.ai
- Agents/TTS: preserve legacy `[[audio_as_voice]]` hints on trusted tool-result - Agents/TTS: preserve legacy `[[audio_as_voice]]` hints on trusted tool-result
`MEDIA:` payloads so generated audio still delivers as a voice note. (#46535) `MEDIA:` payloads so generated audio still delivers as a voice note. (#46535)
Thanks @azade-c. Thanks @azade-c.
- Agents/TTS: keep queued tool media when an assistant ends with `NO_REPLY` on
non-block delivery paths, so media-only generated audio replies still send.
(#60025) Thanks @bradlind1.
- Telegram/STT: frame inbound voice-note transcripts as machine-generated, - Telegram/STT: frame inbound voice-note transcripts as machine-generated,
untrusted text in agent context while preserving raw transcript mention untrusted text in agent context while preserving raw transcript mention
detection. Closes #33360. Thanks @smartchainark. detection. Closes #33360. Thanks @smartchainark.

View File

@@ -154,6 +154,8 @@ Details: [Configuration](/gateway/config-agents#messages) and channel docs.
## Silent replies ## Silent replies
The exact silent token `NO_REPLY` / `no_reply` means “do not deliver a user-visible reply”. The exact silent token `NO_REPLY` / `no_reply` means “do not deliver a user-visible reply”.
When a turn also has pending tool media, such as generated TTS audio, OpenClaw
strips the silent text but still delivers the media attachment.
OpenClaw resolves that behavior by conversation type: OpenClaw resolves that behavior by conversation type:
- Direct conversations disallow silence by default and rewrite a bare silent - Direct conversations disallow silence by default and rewrite a bare silent

View File

@@ -90,6 +90,7 @@ export function createSubscriptionMock(): SubscriptionMock {
getMessagingToolSentTexts: () => [] as string[], getMessagingToolSentTexts: () => [] as string[],
getMessagingToolSentMediaUrls: () => [] as string[], getMessagingToolSentMediaUrls: () => [] as string[],
getMessagingToolSentTargets: () => [] as MessagingToolSend[], getMessagingToolSentTargets: () => [] as MessagingToolSend[],
getPendingToolMediaReply: () => null,
getSuccessfulCronAdds: () => 0, getSuccessfulCronAdds: () => 0,
getReplayState: () => ({ getReplayState: () => ({
replayInvalid: false, replayInvalid: false,

View File

@@ -2044,6 +2044,7 @@ export async function runEmbeddedAttempt(
getMessagingToolSentTexts, getMessagingToolSentTexts,
getMessagingToolSentMediaUrls, getMessagingToolSentMediaUrls,
getMessagingToolSentTargets, getMessagingToolSentTargets,
getPendingToolMediaReply,
getSuccessfulCronAdds, getSuccessfulCronAdds,
getReplayState, getReplayState,
didSendViaMessagingTool, didSendViaMessagingTool,
@@ -2994,6 +2995,7 @@ export async function runEmbeddedAttempt(
messagingToolSentMediaUrls: getMessagingToolSentMediaUrls(), messagingToolSentMediaUrls: getMessagingToolSentMediaUrls(),
successfulCronAdds: getSuccessfulCronAdds(), successfulCronAdds: getSuccessfulCronAdds(),
}); });
const pendingToolMediaReply = getPendingToolMediaReply();
const replayMetadata = replayMetadataFromState( const replayMetadata = replayMetadataFromState(
observeReplayMetadata(getReplayState(), observedReplayMetadata), observeReplayMetadata(getReplayState(), observedReplayMetadata),
); );
@@ -3077,6 +3079,8 @@ export async function runEmbeddedAttempt(
messagingToolSentTexts: getMessagingToolSentTexts(), messagingToolSentTexts: getMessagingToolSentTexts(),
messagingToolSentMediaUrls: getMessagingToolSentMediaUrls(), messagingToolSentMediaUrls: getMessagingToolSentMediaUrls(),
messagingToolSentTargets: getMessagingToolSentTargets(), messagingToolSentTargets: getMessagingToolSentTargets(),
toolMediaUrls: pendingToolMediaReply?.mediaUrls,
toolAudioAsVoice: pendingToolMediaReply?.audioAsVoice,
successfulCronAdds: getSuccessfulCronAdds(), successfulCronAdds: getSuccessfulCronAdds(),
cloudCodeAssistFormatError: Boolean( cloudCodeAssistFormatError: Boolean(
lastAssistant?.errorMessage && isCloudCodeAssistFormatError(lastAssistant.errorMessage), lastAssistant?.errorMessage && isCloudCodeAssistFormatError(lastAssistant.errorMessage),

View File

@@ -12,10 +12,13 @@ function createContext(
overrides?: { overrides?: {
onAgentEvent?: (event: unknown) => void; onAgentEvent?: (event: unknown) => void;
onBeforeLifecycleTerminal?: () => void | Promise<void>; onBeforeLifecycleTerminal?: () => void | Promise<void>;
onBlockReply?: ((payload: unknown) => void) | undefined;
onBlockReplyFlush?: () => void | Promise<void>; onBlockReplyFlush?: () => void | Promise<void>;
}, },
): EmbeddedPiSubscribeContext { ): EmbeddedPiSubscribeContext {
const onBlockReply = vi.fn(); const hasOnBlockReplyOverride = Boolean(overrides && "onBlockReply" in overrides);
const onBlockReply = hasOnBlockReplyOverride ? overrides?.onBlockReply : vi.fn();
const emitBlockReply = vi.fn();
return { return {
params: { params: {
runId: "run-1", runId: "run-1",
@@ -23,7 +26,7 @@ function createContext(
sessionKey: "agent:main:main", sessionKey: "agent:main:main",
onAgentEvent: overrides?.onAgentEvent, onAgentEvent: overrides?.onAgentEvent,
onBeforeLifecycleTerminal: overrides?.onBeforeLifecycleTerminal, onBeforeLifecycleTerminal: overrides?.onBeforeLifecycleTerminal,
onBlockReply, ...(onBlockReply ? { onBlockReply } : {}),
onBlockReplyFlush: overrides?.onBlockReplyFlush, onBlockReplyFlush: overrides?.onBlockReplyFlush,
}, },
state: { state: {
@@ -43,7 +46,7 @@ function createContext(
warn: vi.fn(), warn: vi.fn(),
}, },
flushBlockReplyBuffer: vi.fn(), flushBlockReplyBuffer: vi.fn(),
emitBlockReply: onBlockReply, emitBlockReply,
resolveCompactionRetry: vi.fn(), resolveCompactionRetry: vi.fn(),
maybeResolveCompactionWait: vi.fn(), maybeResolveCompactionWait: vi.fn(),
} as unknown as EmbeddedPiSubscribeContext; } as unknown as EmbeddedPiSubscribeContext;
@@ -321,6 +324,18 @@ describe("handleAgentEnd", () => {
expect(ctx.state.pendingToolAudioAsVoice).toBe(false); expect(ctx.state.pendingToolAudioAsVoice).toBe(false);
}); });
it("preserves orphaned tool media when no block reply callback is configured", async () => {
const ctx = createContext(undefined, { onBlockReply: undefined });
ctx.state.pendingToolMediaUrls = ["/tmp/reply.opus"];
ctx.state.pendingToolAudioAsVoice = true;
await handleAgentEnd(ctx);
expect(ctx.emitBlockReply).not.toHaveBeenCalled();
expect(ctx.state.pendingToolMediaUrls).toEqual(["/tmp/reply.opus"]);
expect(ctx.state.pendingToolAudioAsVoice).toBe(true);
});
it("emits orphaned tool media before the lifecycle end event", async () => { it("emits orphaned tool media before the lifecycle end event", async () => {
const onAgentEvent = vi.fn(); const onAgentEvent = vi.fn();
const ctx = createContext(undefined, { onAgentEvent }); const ctx = createContext(undefined, { onAgentEvent });

View File

@@ -167,9 +167,11 @@ export function handleAgentEnd(ctx: EmbeddedPiSubscribeContext): void | Promise<
}; };
const flushPendingMediaAndChannel = () => { const flushPendingMediaAndChannel = () => {
const pendingToolMediaReply = consumePendingToolMediaReply(ctx.state); if (ctx.params.onBlockReply) {
if (pendingToolMediaReply && hasAssistantVisibleReply(pendingToolMediaReply)) { const pendingToolMediaReply = consumePendingToolMediaReply(ctx.state);
ctx.emitBlockReply(pendingToolMediaReply); if (pendingToolMediaReply && hasAssistantVisibleReply(pendingToolMediaReply)) {
ctx.emitBlockReply(pendingToolMediaReply);
}
} }
const postMediaFlushResult = ctx.flushBlockReplyBuffer(); const postMediaFlushResult = ctx.flushBlockReplyBuffer();

View File

@@ -9,6 +9,7 @@ import {
handleMessageEnd, handleMessageEnd,
handleMessageUpdate, handleMessageUpdate,
hasAssistantVisibleReply, hasAssistantVisibleReply,
readPendingToolMediaReply,
recordPendingAssistantReplyDirectives, recordPendingAssistantReplyDirectives,
resolveSilentReplyFallbackText, resolveSilentReplyFallbackText,
} from "./pi-embedded-subscribe.handlers.messages.js"; } from "./pi-embedded-subscribe.handlers.messages.js";
@@ -394,6 +395,21 @@ describe("consumePendingToolMediaIntoReply", () => {
}); });
describe("consumePendingToolMediaReply", () => { describe("consumePendingToolMediaReply", () => {
it("reads a media-only reply without consuming queued tool media", () => {
const state = {
pendingToolMediaUrls: ["/tmp/reply.opus"],
pendingToolAudioAsVoice: true,
pendingToolTrustedLocalMedia: false,
};
expect(readPendingToolMediaReply(state)).toEqual({
mediaUrls: ["/tmp/reply.opus"],
audioAsVoice: true,
});
expect(state.pendingToolMediaUrls).toEqual(["/tmp/reply.opus"]);
expect(state.pendingToolAudioAsVoice).toBe(true);
});
it("builds a media-only reply for orphaned tool media", () => { it("builds a media-only reply for orphaned tool media", () => {
const state = { const state = {
pendingToolMediaUrls: ["/tmp/reply.opus"], pendingToolMediaUrls: ["/tmp/reply.opus"],

View File

@@ -212,6 +212,20 @@ export function consumePendingToolMediaReply(
EmbeddedPiSubscribeState, EmbeddedPiSubscribeState,
"pendingToolMediaUrls" | "pendingToolAudioAsVoice" | "pendingToolTrustedLocalMedia" "pendingToolMediaUrls" | "pendingToolAudioAsVoice" | "pendingToolTrustedLocalMedia"
>, >,
): BlockReplyPayload | null {
const payload = readPendingToolMediaReply(state);
if (!payload) {
return null;
}
clearPendingToolMedia(state);
return payload;
}
export function readPendingToolMediaReply(
state: Pick<
EmbeddedPiSubscribeState,
"pendingToolMediaUrls" | "pendingToolAudioAsVoice" | "pendingToolTrustedLocalMedia"
>,
): BlockReplyPayload | null { ): BlockReplyPayload | null {
if ( if (
state.pendingToolMediaUrls.length === 0 && state.pendingToolMediaUrls.length === 0 &&
@@ -220,15 +234,13 @@ export function consumePendingToolMediaReply(
) { ) {
return null; return null;
} }
const payload: BlockReplyPayload = { return {
mediaUrls: state.pendingToolMediaUrls.length mediaUrls: state.pendingToolMediaUrls.length
? Array.from(new Set(state.pendingToolMediaUrls)) ? Array.from(new Set(state.pendingToolMediaUrls))
: undefined, : undefined,
audioAsVoice: state.pendingToolAudioAsVoice || undefined, audioAsVoice: state.pendingToolAudioAsVoice || undefined,
trustedLocalMedia: state.pendingToolTrustedLocalMedia || undefined, trustedLocalMedia: state.pendingToolTrustedLocalMedia || undefined,
}; };
clearPendingToolMedia(state);
return payload;
} }
function hasReplyDirectiveMetadata(parsed: ReplyDirectiveParseResult | null | undefined): boolean { function hasReplyDirectiveMetadata(parsed: ReplyDirectiveParseResult | null | undefined): boolean {

View File

@@ -414,6 +414,34 @@ describe("subscribeEmbeddedPiSession", () => {
); );
}); });
it("keeps orphaned tool media available for non-block final payload assembly", () => {
const { emit, subscription } = createSubscribedSessionHarness({
runId: "run",
builtinToolNames: new Set(["tts"]),
});
emit({
type: "tool_execution_end",
toolName: "tts",
toolCallId: "tc-1",
isError: false,
result: {
details: {
media: {
mediaUrl: "/tmp/reply.opus",
audioAsVoice: true,
},
},
},
});
emit({ type: "agent_end" });
expect(subscription.getPendingToolMediaReply()).toEqual({
mediaUrls: ["/tmp/reply.opus"],
audioAsVoice: true,
});
});
it.each(THINKING_TAG_CASES)( it.each(THINKING_TAG_CASES)(
"suppresses <%s> blocks across chunk boundaries", "suppresses <%s> blocks across chunk boundaries",
async ({ open, close }) => { async ({ open, close }) => {

View File

@@ -24,6 +24,7 @@ import { createEmbeddedPiSessionEventHandler } from "./pi-embedded-subscribe.han
import { import {
consumePendingAssistantReplyDirectivesIntoReply, consumePendingAssistantReplyDirectivesIntoReply,
consumePendingToolMediaIntoReply, consumePendingToolMediaIntoReply,
readPendingToolMediaReply,
} from "./pi-embedded-subscribe.handlers.messages.js"; } from "./pi-embedded-subscribe.handlers.messages.js";
import type { import type {
EmbeddedPiSubscribeContext, EmbeddedPiSubscribeContext,
@@ -866,6 +867,7 @@ export function subscribeEmbeddedPiSession(params: SubscribeEmbeddedPiSessionPar
getMessagingToolSentTexts: () => messagingToolSentTexts.slice(), getMessagingToolSentTexts: () => messagingToolSentTexts.slice(),
getMessagingToolSentMediaUrls: () => messagingToolSentMediaUrls.slice(), getMessagingToolSentMediaUrls: () => messagingToolSentMediaUrls.slice(),
getMessagingToolSentTargets: () => messagingToolSentTargets.slice(), getMessagingToolSentTargets: () => messagingToolSentTargets.slice(),
getPendingToolMediaReply: () => readPendingToolMediaReply(state),
getSuccessfulCronAdds: () => state.successfulCronAdds, getSuccessfulCronAdds: () => state.successfulCronAdds,
getReplayState: () => ({ ...state.replayState }), getReplayState: () => ({ ...state.replayState }),
// Returns true if any messaging tool successfully sent a message. // Returns true if any messaging tool successfully sent a message.