From 62edfdffbdd027c0c19ee0a3d01c1ae089b20ec2 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Wed, 18 Mar 2026 18:14:36 +0000 Subject: [PATCH] refactor: deduplicate reply payload handling --- .../src/monitor/message-handler.process.ts | 4 +- .../discord/src/monitor/native-command.ts | 29 ++-- .../discord/src/monitor/reply-delivery.ts | 26 +-- extensions/feishu/src/reply-dispatcher.ts | 148 +++++++++--------- extensions/googlechat/src/monitor.ts | 16 +- extensions/imessage/src/monitor/deliver.ts | 16 +- .../matrix/src/matrix/monitor/replies.ts | 20 ++- .../src/mattermost/reply-delivery.ts | 17 +- extensions/msteams/src/messenger.ts | 32 ++-- extensions/signal/src/monitor.ts | 8 +- .../src/monitor/message-handler/dispatch.ts | 23 ++- extensions/slack/src/monitor/replies.ts | 45 +++--- .../telegram/src/bot-message-dispatch.ts | 9 +- .../src/lane-delivery-text-deliverer.ts | 4 +- .../src/auto-reply/heartbeat-runner.ts | 14 +- .../src/auto-reply/monitor/process-message.ts | 6 +- extensions/whatsapp/src/outbound-adapter.ts | 3 +- extensions/zalo/src/monitor.ts | 7 +- extensions/zalouser/src/monitor.ts | 7 +- src/agents/pi-embedded-runner/run/payloads.ts | 3 +- ...bedded-subscribe.handlers.messages.test.ts | 34 +++- ...pi-embedded-subscribe.handlers.messages.ts | 76 +++++---- src/auto-reply/heartbeat-reply-payload.ts | 3 +- .../reply/agent-runner-execution.ts | 6 +- src/auto-reply/reply/agent-runner-helpers.ts | 22 +-- src/auto-reply/reply/agent-runner-payloads.ts | 15 +- src/auto-reply/reply/block-reply-coalescer.ts | 8 +- src/auto-reply/reply/block-reply-pipeline.ts | 23 +-- src/auto-reply/reply/dispatch-acp-delivery.ts | 3 +- src/auto-reply/reply/dispatch-from-config.ts | 3 +- src/auto-reply/reply/followup-runner.ts | 11 +- src/auto-reply/reply/normalize-reply.ts | 63 ++------ src/auto-reply/reply/reply-delivery.ts | 8 +- src/auto-reply/reply/reply-media-paths.ts | 3 +- src/auto-reply/reply/reply-payloads.ts | 11 +- src/auto-reply/reply/route-reply.ts | 18 ++- src/auto-reply/reply/streaming-directives.ts | 6 +- .../plugins/outbound/direct-text-media.ts | 3 +- src/commands/agent-via-gateway.ts | 17 +- src/cron/heartbeat-policy.ts | 3 +- src/cron/isolated-agent/helpers.ts | 5 +- src/cron/isolated-agent/run.ts | 10 +- src/gateway/server-methods/send.ts | 6 +- src/gateway/ws-log.ts | 9 +- src/infra/heartbeat-runner.ts | 14 +- src/infra/outbound/deliver.ts | 28 ++-- src/infra/outbound/message-action-runner.ts | 20 ++- src/infra/outbound/message.ts | 6 +- src/infra/outbound/payloads.ts | 23 ++- src/interactive/payload.test.ts | 36 +++++ src/interactive/payload.ts | 24 +++ src/line/auto-reply-delivery.ts | 4 +- src/plugin-sdk/msteams.ts | 2 +- src/plugin-sdk/reply-payload.test.ts | 121 ++++++++++++++ src/plugin-sdk/reply-payload.ts | 62 +++++++- src/plugin-sdk/subpaths.test.ts | 4 + src/plugin-sdk/zalouser.ts | 1 + src/tts/tts.ts | 6 +- 58 files changed, 704 insertions(+), 450 deletions(-) diff --git a/extensions/discord/src/monitor/message-handler.process.ts b/extensions/discord/src/monitor/message-handler.process.ts index 526ca4ecb71..f24a9e27774 100644 --- a/extensions/discord/src/monitor/message-handler.process.ts +++ b/extensions/discord/src/monitor/message-handler.process.ts @@ -16,6 +16,7 @@ import { resolveDiscordPreviewStreamMode } from "openclaw/plugin-sdk/config-runt import { resolveMarkdownTableMode } from "openclaw/plugin-sdk/config-runtime"; import { readSessionUpdatedAt, resolveStorePath } from "openclaw/plugin-sdk/config-runtime"; import { getAgentScopedMediaLocalRoots } from "openclaw/plugin-sdk/media-runtime"; +import { resolveSendableOutboundReplyParts } from "openclaw/plugin-sdk/reply-payload"; import { resolveChunkMode } from "openclaw/plugin-sdk/reply-runtime"; import { dispatchInboundMessage } from "openclaw/plugin-sdk/reply-runtime"; import { @@ -610,7 +611,8 @@ export async function processDiscordMessage(ctx: DiscordMessagePreflightContext) } if (draftStream && isFinal) { await flushDraft(); - const hasMedia = Boolean(payload.mediaUrl) || (payload.mediaUrls?.length ?? 0) > 0; + const reply = resolveSendableOutboundReplyParts(payload); + const hasMedia = reply.hasMedia; const finalText = payload.text; const previewFinalText = resolvePreviewFinalText(finalText); const previewMessageId = draftStream.messageId(); diff --git a/extensions/discord/src/monitor/native-command.ts b/extensions/discord/src/monitor/native-command.ts index 61e225d4f32..39bdad5b738 100644 --- a/extensions/discord/src/monitor/native-command.ts +++ b/extensions/discord/src/monitor/native-command.ts @@ -26,7 +26,7 @@ import { buildPairingReply } from "openclaw/plugin-sdk/conversation-runtime"; import { getAgentScopedMediaLocalRoots } from "openclaw/plugin-sdk/media-runtime"; import { executePluginCommand, matchPluginCommand } from "openclaw/plugin-sdk/plugin-runtime"; import { - resolveOutboundMediaUrls, + resolveSendableOutboundReplyParts, resolveTextChunksWithFallback, } from "openclaw/plugin-sdk/reply-payload"; import { resolveChunkMode, resolveTextChunkLimit } from "openclaw/plugin-sdk/reply-runtime"; @@ -236,13 +236,7 @@ function isDiscordUnknownInteraction(error: unknown): boolean { } function hasRenderableReplyPayload(payload: ReplyPayload): boolean { - if ((payload.text ?? "").trim()) { - return true; - } - if ((payload.mediaUrl ?? "").trim()) { - return true; - } - if (payload.mediaUrls?.some((entry) => entry.trim())) { + if (resolveSendableOutboundReplyParts(payload).hasContent) { return true; } const discordData = payload.channelData?.discord as @@ -891,8 +885,7 @@ async function deliverDiscordInteractionReply(params: { chunkMode: "length" | "newline"; }) { const { interaction, payload, textLimit, maxLinesPerMessage, preferFollowUp, chunkMode } = params; - const mediaList = resolveOutboundMediaUrls(payload); - const text = payload.text ?? ""; + const reply = resolveSendableOutboundReplyParts(payload); const discordData = payload.channelData?.discord as | { components?: TopLevelComponents[] } | undefined; @@ -937,9 +930,9 @@ async function deliverDiscordInteractionReply(params: { }); }; - if (mediaList.length > 0) { + if (reply.hasMedia) { const media = await Promise.all( - mediaList.map(async (url) => { + reply.mediaUrls.map(async (url) => { const loaded = await loadWebMedia(url, { localRoots: params.mediaLocalRoots, }); @@ -950,8 +943,8 @@ async function deliverDiscordInteractionReply(params: { }), ); const chunks = resolveTextChunksWithFallback( - text, - chunkDiscordTextWithMode(text, { + reply.text, + chunkDiscordTextWithMode(reply.text, { maxChars: textLimit, maxLines: maxLinesPerMessage, chunkMode, @@ -968,14 +961,14 @@ async function deliverDiscordInteractionReply(params: { return; } - if (!text.trim() && !firstMessageComponents) { + if (!reply.hasText && !firstMessageComponents) { return; } const chunks = - text || firstMessageComponents + reply.text || firstMessageComponents ? resolveTextChunksWithFallback( - text, - chunkDiscordTextWithMode(text, { + reply.text, + chunkDiscordTextWithMode(reply.text, { maxChars: textLimit, maxLines: maxLinesPerMessage, chunkMode, diff --git a/extensions/discord/src/monitor/reply-delivery.ts b/extensions/discord/src/monitor/reply-delivery.ts index 84efdb24237..a098c41d056 100644 --- a/extensions/discord/src/monitor/reply-delivery.ts +++ b/extensions/discord/src/monitor/reply-delivery.ts @@ -9,7 +9,7 @@ import { type RetryConfig, } from "openclaw/plugin-sdk/infra-runtime"; import { - resolveOutboundMediaUrls, + resolveSendableOutboundReplyParts, resolveTextChunksWithFallback, sendMediaWithLeadingCaption, } from "openclaw/plugin-sdk/reply-payload"; @@ -268,18 +268,18 @@ export async function deliverDiscordReply(params: { : undefined; let deliveredAny = false; for (const payload of params.replies) { - const mediaList = resolveOutboundMediaUrls(payload); - const rawText = payload.text ?? ""; const tableMode = params.tableMode ?? "code"; - const text = convertMarkdownTables(rawText, tableMode); - if (!text && mediaList.length === 0) { + const reply = resolveSendableOutboundReplyParts(payload, { + text: convertMarkdownTables(payload.text ?? "", tableMode), + }); + if (!reply.hasContent) { continue; } - if (mediaList.length === 0) { + if (!reply.hasMedia) { const mode = params.chunkMode ?? "length"; const chunks = resolveTextChunksWithFallback( - text, - chunkDiscordTextWithMode(text, { + reply.text, + chunkDiscordTextWithMode(reply.text, { maxChars: chunkLimit, maxLines: params.maxLinesPerMessage, chunkMode: mode, @@ -312,7 +312,7 @@ export async function deliverDiscordReply(params: { continue; } - const firstMedia = mediaList[0]; + const firstMedia = reply.mediaUrls[0]; if (!firstMedia) { continue; } @@ -331,7 +331,7 @@ export async function deliverDiscordReply(params: { await sendDiscordChunkWithFallback({ cfg: params.cfg, target: params.target, - text, + text: reply.text, token: params.token, rest: params.rest, accountId: params.accountId, @@ -347,7 +347,7 @@ export async function deliverDiscordReply(params: { }); // Additional media items are sent as regular attachments (voice is single-file only). await sendMediaWithLeadingCaption({ - mediaUrls: mediaList.slice(1), + mediaUrls: reply.mediaUrls.slice(1), caption: "", send: async ({ mediaUrl }) => { const replyTo = resolveReplyTo(); @@ -370,8 +370,8 @@ export async function deliverDiscordReply(params: { } await sendMediaWithLeadingCaption({ - mediaUrls: mediaList, - caption: text, + mediaUrls: reply.mediaUrls, + caption: reply.text, send: async ({ mediaUrl, caption }) => { const replyTo = resolveReplyTo(); await sendWithRetry( diff --git a/extensions/feishu/src/reply-dispatcher.ts b/extensions/feishu/src/reply-dispatcher.ts index 8c2d533fbfa..ff787bc7cb0 100644 --- a/extensions/feishu/src/reply-dispatcher.ts +++ b/extensions/feishu/src/reply-dispatcher.ts @@ -1,3 +1,8 @@ +import { + resolveSendableOutboundReplyParts, + resolveTextChunksWithFallback, + sendMediaWithLeadingCaption, +} from "openclaw/plugin-sdk/reply-payload"; import { createReplyPrefixContext, createTypingCallbacks, @@ -13,12 +18,7 @@ import { sendMediaFeishu } from "./media.js"; import type { MentionTarget } from "./mention.js"; import { buildMentionedCardContent } from "./mention.js"; import { getFeishuRuntime } from "./runtime.js"; -import { - sendMarkdownCardFeishu, - sendMessageFeishu, - sendStructuredCardFeishu, - type CardHeaderConfig, -} from "./send.js"; +import { sendMessageFeishu, sendStructuredCardFeishu, type CardHeaderConfig } from "./send.js"; import { FeishuStreamingSession, mergeStreamingText } from "./streaming-card.js"; import { resolveReceiveIdType } from "./targets.js"; import { addTypingIndicator, removeTypingIndicator, type TypingIndicatorState } from "./typing.js"; @@ -300,37 +300,43 @@ export function createFeishuReplyDispatcher(params: CreateFeishuReplyDispatcherP text: string; useCard: boolean; infoKind?: string; + sendChunk: (params: { chunk: string; isFirst: boolean }) => Promise; }) => { - let first = true; const chunkSource = params.useCard ? params.text : core.channel.text.convertMarkdownTables(params.text, tableMode); - for (const chunk of core.channel.text.chunkTextWithMode( + const chunks = resolveTextChunksWithFallback( chunkSource, - textChunkLimit, - chunkMode, - )) { - const message = { - cfg, - to: chatId, - text: chunk, - replyToMessageId: sendReplyToMessageId, - replyInThread: effectiveReplyInThread, - mentions: first ? mentionTargets : undefined, - accountId, - }; - if (params.useCard) { - await sendMarkdownCardFeishu(message); - } else { - await sendMessageFeishu(message); - } - first = false; + core.channel.text.chunkTextWithMode(chunkSource, textChunkLimit, chunkMode), + ); + for (const [index, chunk] of chunks.entries()) { + await params.sendChunk({ + chunk, + isFirst: index === 0, + }); } if (params.infoKind === "final") { deliveredFinalTexts.add(params.text); } }; + const sendMediaReplies = async (payload: ReplyPayload) => { + await sendMediaWithLeadingCaption({ + mediaUrls: resolveSendableOutboundReplyParts(payload).mediaUrls, + caption: "", + send: async ({ mediaUrl }) => { + await sendMediaFeishu({ + cfg, + to: chatId, + mediaUrl, + replyToMessageId: sendReplyToMessageId, + replyInThread: effectiveReplyInThread, + accountId, + }); + }, + }); + }; + const { dispatcher, replyOptions, markDispatchIdle } = core.channel.reply.createReplyDispatcherWithTyping({ responsePrefix: prefixContext.responsePrefix, @@ -344,15 +350,10 @@ export function createFeishuReplyDispatcher(params: CreateFeishuReplyDispatcherP void typingCallbacks.onReplyStart?.(); }, deliver: async (payload: ReplyPayload, info) => { - const text = payload.text ?? ""; - const mediaList = - payload.mediaUrls && payload.mediaUrls.length > 0 - ? payload.mediaUrls - : payload.mediaUrl - ? [payload.mediaUrl] - : []; - const hasText = Boolean(text.trim()); - const hasMedia = mediaList.length > 0; + const reply = resolveSendableOutboundReplyParts(payload); + const text = reply.text; + const hasText = reply.hasText; + const hasMedia = reply.hasMedia; const skipTextForDuplicateFinal = info?.kind === "final" && hasText && deliveredFinalTexts.has(text); const shouldDeliverText = hasText && !skipTextForDuplicateFinal; @@ -363,7 +364,6 @@ export function createFeishuReplyDispatcher(params: CreateFeishuReplyDispatcherP if (shouldDeliverText) { const useCard = renderMode === "card" || (renderMode === "auto" && shouldUseCard(text)); - let first = true; if (info?.kind === "block") { // Drop internal block chunks unless we can safely consume them as @@ -397,16 +397,7 @@ export function createFeishuReplyDispatcher(params: CreateFeishuReplyDispatcherP } // Send media even when streaming handled the text if (hasMedia) { - for (const mediaUrl of mediaList) { - await sendMediaFeishu({ - cfg, - to: chatId, - mediaUrl, - replyToMessageId: sendReplyToMessageId, - replyInThread: effectiveReplyInThread, - accountId, - }); - } + await sendMediaReplies(payload); } return; } @@ -414,43 +405,46 @@ export function createFeishuReplyDispatcher(params: CreateFeishuReplyDispatcherP if (useCard) { const cardHeader = resolveCardHeader(agentId, identity); const cardNote = resolveCardNote(agentId, identity, prefixContext.prefixContext); - for (const chunk of core.channel.text.chunkTextWithMode( + await sendChunkedTextReply({ text, - textChunkLimit, - chunkMode, - )) { - await sendStructuredCardFeishu({ - cfg, - to: chatId, - text: chunk, - replyToMessageId: sendReplyToMessageId, - replyInThread: effectiveReplyInThread, - mentions: first ? mentionTargets : undefined, - accountId, - header: cardHeader, - note: cardNote, - }); - first = false; - } - if (info?.kind === "final") { - deliveredFinalTexts.add(text); - } + useCard: true, + infoKind: info?.kind, + sendChunk: async ({ chunk, isFirst }) => { + await sendStructuredCardFeishu({ + cfg, + to: chatId, + text: chunk, + replyToMessageId: sendReplyToMessageId, + replyInThread: effectiveReplyInThread, + mentions: isFirst ? mentionTargets : undefined, + accountId, + header: cardHeader, + note: cardNote, + }); + }, + }); } else { - await sendChunkedTextReply({ text, useCard: false, infoKind: info?.kind }); + await sendChunkedTextReply({ + text, + useCard: false, + infoKind: info?.kind, + sendChunk: async ({ chunk, isFirst }) => { + await sendMessageFeishu({ + cfg, + to: chatId, + text: chunk, + replyToMessageId: sendReplyToMessageId, + replyInThread: effectiveReplyInThread, + mentions: isFirst ? mentionTargets : undefined, + accountId, + }); + }, + }); } } if (hasMedia) { - for (const mediaUrl of mediaList) { - await sendMediaFeishu({ - cfg, - to: chatId, - mediaUrl, - replyToMessageId: sendReplyToMessageId, - replyInThread: effectiveReplyInThread, - accountId, - }); - } + await sendMediaReplies(payload); } }, onError: async (error, info) => { diff --git a/extensions/googlechat/src/monitor.ts b/extensions/googlechat/src/monitor.ts index e6eeecb5138..b0612842919 100644 --- a/extensions/googlechat/src/monitor.ts +++ b/extensions/googlechat/src/monitor.ts @@ -1,5 +1,8 @@ import type { IncomingMessage, ServerResponse } from "node:http"; -import { deliverTextOrMediaReply } from "openclaw/plugin-sdk/reply-payload"; +import { + deliverTextOrMediaReply, + resolveSendableOutboundReplyParts, +} from "openclaw/plugin-sdk/reply-payload"; import type { OpenClawConfig } from "../runtime-api.js"; import { createWebhookInFlightLimiter, @@ -376,8 +379,10 @@ async function deliverGoogleChatReply(params: { }): Promise { const { payload, account, spaceId, runtime, core, config, statusSink, typingMessageName } = params; - const hasMedia = Boolean(payload.mediaUrls?.length) || Boolean(payload.mediaUrl); - const text = payload.text ?? ""; + const reply = resolveSendableOutboundReplyParts(payload); + const mediaCount = reply.mediaCount; + const hasMedia = reply.hasMedia; + const text = reply.text; let firstTextChunk = true; let suppressCaption = false; @@ -390,8 +395,7 @@ async function deliverGoogleChatReply(params: { }); } catch (err) { runtime.error?.(`Google Chat typing cleanup failed: ${String(err)}`); - const mediaCount = payload.mediaUrls?.length ?? (payload.mediaUrl ? 1 : 0); - const fallbackText = text.trim() + const fallbackText = reply.hasText ? text : mediaCount > 1 ? "Sent attachments." @@ -414,7 +418,7 @@ async function deliverGoogleChatReply(params: { const chunkMode = core.channel.text.resolveChunkMode(config, "googlechat", account.accountId); await deliverTextOrMediaReply({ payload, - text: suppressCaption ? "" : text, + text: suppressCaption ? "" : reply.text, chunkText: (value) => core.channel.text.chunkMarkdownTextWithMode(value, chunkLimit, chunkMode), sendText: async (chunk) => { try { diff --git a/extensions/imessage/src/monitor/deliver.ts b/extensions/imessage/src/monitor/deliver.ts index d7b434a4e2d..708d319b640 100644 --- a/extensions/imessage/src/monitor/deliver.ts +++ b/extensions/imessage/src/monitor/deliver.ts @@ -1,6 +1,9 @@ import { loadConfig } from "openclaw/plugin-sdk/config-runtime"; import { resolveMarkdownTableMode } from "openclaw/plugin-sdk/config-runtime"; -import { deliverTextOrMediaReply } from "openclaw/plugin-sdk/reply-payload"; +import { + deliverTextOrMediaReply, + resolveSendableOutboundReplyParts, +} from "openclaw/plugin-sdk/reply-payload"; import { chunkTextWithMode, resolveChunkMode } from "openclaw/plugin-sdk/reply-runtime"; import type { ReplyPayload } from "openclaw/plugin-sdk/reply-runtime"; import type { RuntimeEnv } from "openclaw/plugin-sdk/runtime-env"; @@ -32,14 +35,15 @@ export async function deliverReplies(params: { const chunkMode = resolveChunkMode(cfg, "imessage", accountId); for (const payload of replies) { const rawText = sanitizeOutboundText(payload.text ?? ""); - const text = convertMarkdownTables(rawText, tableMode); - const hasMedia = Boolean(payload.mediaUrls?.length ?? payload.mediaUrl); - if (!hasMedia && text) { - sentMessageCache?.remember(scope, { text }); + const reply = resolveSendableOutboundReplyParts(payload, { + text: convertMarkdownTables(rawText, tableMode), + }); + if (!reply.hasMedia && reply.hasText) { + sentMessageCache?.remember(scope, { text: reply.text }); } const delivered = await deliverTextOrMediaReply({ payload, - text, + text: reply.text, chunkText: (value) => chunkTextWithMode(value, textLimit, chunkMode), sendText: async (chunk) => { const sent = await sendMessageIMessage(target, chunk, { diff --git a/extensions/matrix/src/matrix/monitor/replies.ts b/extensions/matrix/src/matrix/monitor/replies.ts index b1ab30b20ef..dac58c680ed 100644 --- a/extensions/matrix/src/matrix/monitor/replies.ts +++ b/extensions/matrix/src/matrix/monitor/replies.ts @@ -1,5 +1,8 @@ import type { MatrixClient } from "@vector-im/matrix-bot-sdk"; -import { deliverTextOrMediaReply } from "openclaw/plugin-sdk/reply-payload"; +import { + deliverTextOrMediaReply, + resolveSendableOutboundReplyParts, +} from "openclaw/plugin-sdk/reply-payload"; import type { MarkdownTableMode, ReplyPayload, RuntimeEnv } from "../../../runtime-api.js"; import { getMatrixRuntime } from "../../runtime.js"; import { sendMessageMatrix } from "../send.js"; @@ -33,8 +36,10 @@ export async function deliverMatrixReplies(params: { const chunkMode = core.channel.text.resolveChunkMode(cfg, "matrix", params.accountId); let hasReplied = false; for (const reply of params.replies) { - const hasMedia = Boolean(reply?.mediaUrl) || (reply?.mediaUrls?.length ?? 0) > 0; - if (!reply?.text && !hasMedia) { + const rawText = reply.text ?? ""; + const text = core.channel.text.convertMarkdownTables(rawText, tableMode); + const replyContent = resolveSendableOutboundReplyParts(reply, { text }); + if (!replyContent.hasContent) { if (reply?.audioAsVoice) { logVerbose("matrix reply has audioAsVoice without media/text; skipping"); continue; @@ -49,13 +54,6 @@ export async function deliverMatrixReplies(params: { } const replyToIdRaw = reply.replyToId?.trim(); const replyToId = params.threadId || params.replyToMode === "off" ? undefined : replyToIdRaw; - const rawText = reply.text ?? ""; - const text = core.channel.text.convertMarkdownTables(rawText, tableMode); - const mediaList = reply.mediaUrls?.length - ? reply.mediaUrls - : reply.mediaUrl - ? [reply.mediaUrl] - : []; const shouldIncludeReply = (id?: string) => Boolean(id) && (params.replyToMode === "all" || !hasReplied); @@ -63,7 +61,7 @@ export async function deliverMatrixReplies(params: { const delivered = await deliverTextOrMediaReply({ payload: reply, - text, + text: replyContent.text, chunkText: (value) => core.channel.text .chunkMarkdownTextWithMode(value, chunkLimit, chunkMode) diff --git a/extensions/mattermost/src/mattermost/reply-delivery.ts b/extensions/mattermost/src/mattermost/reply-delivery.ts index 492d31ba0fc..5f2c2e7191d 100644 --- a/extensions/mattermost/src/mattermost/reply-delivery.ts +++ b/extensions/mattermost/src/mattermost/reply-delivery.ts @@ -1,4 +1,7 @@ -import { deliverTextOrMediaReply } from "openclaw/plugin-sdk/reply-payload"; +import { + deliverTextOrMediaReply, + resolveSendableOutboundReplyParts, +} from "openclaw/plugin-sdk/reply-payload"; import type { OpenClawConfig, PluginRuntime, ReplyPayload } from "../runtime-api.js"; import { getAgentScopedMediaLocalRoots } from "../runtime-api.js"; @@ -27,10 +30,12 @@ export async function deliverMattermostReplyPayload(params: { tableMode: MarkdownTableMode; sendMessage: SendMattermostMessage; }): Promise { - const text = params.core.channel.text.convertMarkdownTables( - params.payload.text ?? "", - params.tableMode, - ); + const reply = resolveSendableOutboundReplyParts(params.payload, { + text: params.core.channel.text.convertMarkdownTables( + params.payload.text ?? "", + params.tableMode, + ), + }); const mediaLocalRoots = getAgentScopedMediaLocalRoots(params.cfg, params.agentId); const chunkMode = params.core.channel.text.resolveChunkMode( params.cfg, @@ -39,7 +44,7 @@ export async function deliverMattermostReplyPayload(params: { ); await deliverTextOrMediaReply({ payload: params.payload, - text, + text: reply.text, chunkText: (value) => params.core.channel.text.chunkMarkdownTextWithMode(value, params.textLimit, chunkMode), sendText: async (chunk) => { diff --git a/extensions/msteams/src/messenger.ts b/extensions/msteams/src/messenger.ts index b024b53c1f5..c2263a4975f 100644 --- a/extensions/msteams/src/messenger.ts +++ b/extensions/msteams/src/messenger.ts @@ -5,7 +5,7 @@ import { type MarkdownTableMode, type MSTeamsReplyStyle, type ReplyPayload, - resolveOutboundMediaUrls, + resolveSendableOutboundReplyParts, SILENT_REPLY_TOKEN, sleep, } from "../runtime-api.js"; @@ -217,41 +217,39 @@ export function renderReplyPayloadsToMessages( }); for (const payload of replies) { - const mediaList = resolveOutboundMediaUrls(payload); - const text = getMSTeamsRuntime().channel.text.convertMarkdownTables( - payload.text ?? "", - tableMode, - ); + const reply = resolveSendableOutboundReplyParts(payload, { + text: getMSTeamsRuntime().channel.text.convertMarkdownTables(payload.text ?? "", tableMode), + }); - if (!text && mediaList.length === 0) { + if (!reply.hasContent) { continue; } - if (mediaList.length === 0) { - pushTextMessages(out, text, { chunkText, chunkLimit, chunkMode }); + if (!reply.hasMedia) { + pushTextMessages(out, reply.text, { chunkText, chunkLimit, chunkMode }); continue; } if (mediaMode === "inline") { // For inline mode, combine text with first media as attachment - const firstMedia = mediaList[0]; + const firstMedia = reply.mediaUrls[0]; if (firstMedia) { - out.push({ text: text || undefined, mediaUrl: firstMedia }); + out.push({ text: reply.text || undefined, mediaUrl: firstMedia }); // Additional media URLs as separate messages - for (let i = 1; i < mediaList.length; i++) { - if (mediaList[i]) { - out.push({ mediaUrl: mediaList[i] }); + for (let i = 1; i < reply.mediaUrls.length; i++) { + if (reply.mediaUrls[i]) { + out.push({ mediaUrl: reply.mediaUrls[i] }); } } } else { - pushTextMessages(out, text, { chunkText, chunkLimit, chunkMode }); + pushTextMessages(out, reply.text, { chunkText, chunkLimit, chunkMode }); } continue; } // mediaMode === "split" - pushTextMessages(out, text, { chunkText, chunkLimit, chunkMode }); - for (const mediaUrl of mediaList) { + pushTextMessages(out, reply.text, { chunkText, chunkLimit, chunkMode }); + for (const mediaUrl of reply.mediaUrls) { if (!mediaUrl) { continue; } diff --git a/extensions/signal/src/monitor.ts b/extensions/signal/src/monitor.ts index 5a4882b1068..20f0c943823 100644 --- a/extensions/signal/src/monitor.ts +++ b/extensions/signal/src/monitor.ts @@ -9,7 +9,10 @@ import type { SignalReactionNotificationMode } from "openclaw/plugin-sdk/config- import type { BackoffPolicy } from "openclaw/plugin-sdk/infra-runtime"; import { waitForTransportReady } from "openclaw/plugin-sdk/infra-runtime"; import { saveMediaBuffer } from "openclaw/plugin-sdk/media-runtime"; -import { deliverTextOrMediaReply } from "openclaw/plugin-sdk/reply-payload"; +import { + deliverTextOrMediaReply, + resolveSendableOutboundReplyParts, +} from "openclaw/plugin-sdk/reply-payload"; import { chunkTextWithMode, resolveChunkMode, @@ -297,9 +300,10 @@ async function deliverReplies(params: { const { replies, target, baseUrl, account, accountId, runtime, maxBytes, textLimit, chunkMode } = params; for (const payload of replies) { + const reply = resolveSendableOutboundReplyParts(payload); const delivered = await deliverTextOrMediaReply({ payload, - text: payload.text ?? "", + text: reply.text, chunkText: (value) => chunkTextWithMode(value, textLimit, chunkMode), sendText: async (chunk) => { await sendMessageSignal(target, chunk, { diff --git a/extensions/slack/src/monitor/message-handler/dispatch.ts b/extensions/slack/src/monitor/message-handler/dispatch.ts index 569ca8f60a7..5fac27f002b 100644 --- a/extensions/slack/src/monitor/message-handler/dispatch.ts +++ b/extensions/slack/src/monitor/message-handler/dispatch.ts @@ -5,6 +5,7 @@ import { createReplyPrefixOptions } from "openclaw/plugin-sdk/channel-runtime"; import { createTypingCallbacks } from "openclaw/plugin-sdk/channel-runtime"; import { resolveStorePath, updateLastRoute } from "openclaw/plugin-sdk/config-runtime"; import { resolveAgentOutboundIdentity } from "openclaw/plugin-sdk/infra-runtime"; +import { resolveSendableOutboundReplyParts } from "openclaw/plugin-sdk/reply-payload"; import { dispatchInboundMessage } from "openclaw/plugin-sdk/reply-runtime"; import { clearHistoryEntriesIfEnabled } from "openclaw/plugin-sdk/reply-runtime"; import { createReplyDispatcherWithTyping } from "openclaw/plugin-sdk/reply-runtime"; @@ -33,7 +34,7 @@ import { import type { PreparedSlackMessage } from "./types.js"; function hasMedia(payload: ReplyPayload): boolean { - return Boolean(payload.mediaUrl) || (payload.mediaUrls?.length ?? 0) > 0; + return resolveSendableOutboundReplyParts(payload).hasMedia; } export function isSlackStreamingEnabled(params: { @@ -250,17 +251,13 @@ export async function dispatchPreparedSlackMessage(prepared: PreparedSlackMessag }; const deliverWithStreaming = async (payload: ReplyPayload): Promise => { - if ( - streamFailed || - hasMedia(payload) || - readSlackReplyBlocks(payload)?.length || - !payload.text?.trim() - ) { + const reply = resolveSendableOutboundReplyParts(payload); + if (streamFailed || reply.hasMedia || readSlackReplyBlocks(payload)?.length || !reply.hasText) { await deliverNormally(payload, streamSession?.threadTs); return; } - const text = payload.text.trim(); + const text = reply.trimmedText; let plannedThreadTs: string | undefined; try { if (!streamSession) { @@ -311,16 +308,16 @@ export async function dispatchPreparedSlackMessage(prepared: PreparedSlackMessag return; } - const mediaCount = payload.mediaUrls?.length ?? (payload.mediaUrl ? 1 : 0); + const reply = resolveSendableOutboundReplyParts(payload); const slackBlocks = readSlackReplyBlocks(payload); const draftMessageId = draftStream?.messageId(); const draftChannelId = draftStream?.channelId(); - const finalText = payload.text ?? ""; - const trimmedFinalText = finalText.trim(); + const finalText = reply.text; + const trimmedFinalText = reply.trimmedText; const canFinalizeViaPreviewEdit = previewStreamingEnabled && streamMode !== "status_final" && - mediaCount === 0 && + !reply.hasMedia && !payload.isError && (trimmedFinalText.length > 0 || Boolean(slackBlocks?.length)) && typeof draftMessageId === "string" && @@ -361,7 +358,7 @@ export async function dispatchPreparedSlackMessage(prepared: PreparedSlackMessag } catch (err) { logVerbose(`slack: status_final completion update failed (${String(err)})`); } - } else if (mediaCount > 0) { + } else if (reply.hasMedia) { await draftStream?.clear(); hasStreamedMessage = false; } diff --git a/extensions/slack/src/monitor/replies.ts b/extensions/slack/src/monitor/replies.ts index 935adaab3bc..f25e58673ca 100644 --- a/extensions/slack/src/monitor/replies.ts +++ b/extensions/slack/src/monitor/replies.ts @@ -1,5 +1,8 @@ import type { MarkdownTableMode } from "openclaw/plugin-sdk/config-runtime"; -import { deliverTextOrMediaReply } from "openclaw/plugin-sdk/reply-payload"; +import { + deliverTextOrMediaReply, + resolveSendableOutboundReplyParts, +} from "openclaw/plugin-sdk/reply-payload"; import type { ChunkMode } from "openclaw/plugin-sdk/reply-runtime"; import { chunkMarkdownTextWithMode } from "openclaw/plugin-sdk/reply-runtime"; import { createReplyReferencePlanner } from "openclaw/plugin-sdk/reply-runtime"; @@ -38,15 +41,14 @@ export async function deliverReplies(params: { // must not force threading. const inlineReplyToId = params.replyToMode === "off" ? undefined : payload.replyToId; const threadTs = inlineReplyToId ?? params.replyThreadTs; - const mediaList = payload.mediaUrls ?? (payload.mediaUrl ? [payload.mediaUrl] : []); - const text = payload.text ?? ""; + const reply = resolveSendableOutboundReplyParts(payload); const slackBlocks = readSlackReplyBlocks(payload); - if (!text && mediaList.length === 0 && !slackBlocks?.length) { + if (!reply.hasContent && !slackBlocks?.length) { continue; } - if (mediaList.length === 0 && slackBlocks?.length) { - const trimmed = text.trim(); + if (!reply.hasMedia && slackBlocks?.length) { + const trimmed = reply.trimmedText; if (!trimmed && !slackBlocks?.length) { continue; } @@ -66,17 +68,16 @@ export async function deliverReplies(params: { const delivered = await deliverTextOrMediaReply({ payload, - text, - chunkText: - mediaList.length === 0 - ? (value) => { - const trimmed = value.trim(); - if (!trimmed || isSilentReplyText(trimmed, SILENT_REPLY_TOKEN)) { - return []; - } - return [trimmed]; + text: reply.text, + chunkText: !reply.hasMedia + ? (value) => { + const trimmed = value.trim(); + if (!trimmed || isSilentReplyText(trimmed, SILENT_REPLY_TOKEN)) { + return []; } - : undefined, + return [trimmed]; + } + : undefined, sendText: async (trimmed) => { await sendMessageSlack(params.target, trimmed, { token: params.token, @@ -189,12 +190,12 @@ export async function deliverSlackSlashReplies(params: { const messages: string[] = []; const chunkLimit = Math.min(params.textLimit, 4000); for (const payload of params.replies) { - const textRaw = payload.text?.trim() ?? ""; - const text = textRaw && !isSilentReplyText(textRaw, SILENT_REPLY_TOKEN) ? textRaw : undefined; - const mediaList = payload.mediaUrls ?? (payload.mediaUrl ? [payload.mediaUrl] : []); - const combined = [text ?? "", ...mediaList.map((url) => url.trim()).filter(Boolean)] - .filter(Boolean) - .join("\n"); + const reply = resolveSendableOutboundReplyParts(payload); + const text = + reply.hasText && !isSilentReplyText(reply.trimmedText, SILENT_REPLY_TOKEN) + ? reply.trimmedText + : undefined; + const combined = [text ?? "", ...reply.mediaUrls].filter(Boolean).join("\n"); if (!combined) { continue; } diff --git a/extensions/telegram/src/bot-message-dispatch.ts b/extensions/telegram/src/bot-message-dispatch.ts index 75df3bd5f2c..b6c3c01763c 100644 --- a/extensions/telegram/src/bot-message-dispatch.ts +++ b/extensions/telegram/src/bot-message-dispatch.ts @@ -22,6 +22,7 @@ import type { TelegramAccountConfig, } from "openclaw/plugin-sdk/config-runtime"; import { getAgentScopedMediaLocalRoots } from "openclaw/plugin-sdk/media-runtime"; +import { resolveSendableOutboundReplyParts } from "openclaw/plugin-sdk/reply-payload"; import { resolveChunkMode } from "openclaw/plugin-sdk/reply-runtime"; import { clearHistoryEntriesIfEnabled } from "openclaw/plugin-sdk/reply-runtime"; import type { ReplyPayload } from "openclaw/plugin-sdk/reply-runtime"; @@ -567,7 +568,8 @@ export const dispatchTelegramMessage = async ({ )?.buttons; const split = splitTextIntoLaneSegments(payload.text); const segments = split.segments; - const hasMedia = Boolean(payload.mediaUrl) || (payload.mediaUrls?.length ?? 0) > 0; + const reply = resolveSendableOutboundReplyParts(payload); + const hasMedia = reply.hasMedia; const flushBufferedFinalAnswer = async () => { const buffered = reasoningStepState.takeBufferedFinalAnswer(); @@ -631,7 +633,7 @@ export const dispatchTelegramMessage = async ({ return; } if (split.suppressedReasoningOnly) { - if (hasMedia) { + if (reply.hasMedia) { const payloadWithoutSuppressedReasoning = typeof payload.text === "string" ? { ...payload, text: "" } : payload; await sendPayload(payloadWithoutSuppressedReasoning); @@ -647,8 +649,7 @@ export const dispatchTelegramMessage = async ({ await reasoningLane.stream?.stop(); reasoningStepState.resetForNextStep(); } - const canSendAsIs = - hasMedia || (typeof payload.text === "string" && payload.text.length > 0); + const canSendAsIs = reply.hasMedia || reply.text.length > 0; if (!canSendAsIs) { if (info.kind === "final") { await flushBufferedFinalAnswer(); diff --git a/extensions/telegram/src/lane-delivery-text-deliverer.ts b/extensions/telegram/src/lane-delivery-text-deliverer.ts index c99dc52661a..c67a091995e 100644 --- a/extensions/telegram/src/lane-delivery-text-deliverer.ts +++ b/extensions/telegram/src/lane-delivery-text-deliverer.ts @@ -1,3 +1,4 @@ +import { resolveSendableOutboundReplyParts } from "openclaw/plugin-sdk/reply-payload"; import type { ReplyPayload } from "openclaw/plugin-sdk/reply-runtime"; import type { TelegramInlineButtons } from "./button-types.js"; import type { TelegramDraftStream } from "./draft-stream.js"; @@ -459,7 +460,8 @@ export function createLaneTextDeliverer(params: CreateLaneTextDelivererParams) { allowPreviewUpdateForNonFinal = false, }: DeliverLaneTextParams): Promise => { const lane = params.lanes[laneName]; - const hasMedia = Boolean(payload.mediaUrl) || (payload.mediaUrls?.length ?? 0) > 0; + const reply = resolveSendableOutboundReplyParts(payload, { text }); + const hasMedia = reply.hasMedia; const canEditViaPreview = !hasMedia && text.length > 0 && text.length <= params.draftMaxChars && !payload.isError; diff --git a/extensions/whatsapp/src/auto-reply/heartbeat-runner.ts b/extensions/whatsapp/src/auto-reply/heartbeat-runner.ts index 7aa35705f43..8fb27a39fe4 100644 --- a/extensions/whatsapp/src/auto-reply/heartbeat-runner.ts +++ b/extensions/whatsapp/src/auto-reply/heartbeat-runner.ts @@ -9,6 +9,10 @@ import { } from "openclaw/plugin-sdk/config-runtime"; import { emitHeartbeatEvent, resolveIndicatorType } from "openclaw/plugin-sdk/infra-runtime"; import { resolveHeartbeatVisibility } from "openclaw/plugin-sdk/infra-runtime"; +import { + hasOutboundReplyContent, + resolveSendableOutboundReplyParts, +} from "openclaw/plugin-sdk/reply-payload"; import { resolveHeartbeatReplyPayload } from "openclaw/plugin-sdk/reply-runtime"; import { DEFAULT_HEARTBEAT_ACK_MAX_CHARS, @@ -178,10 +182,7 @@ export async function runWebHeartbeatOnce(opts: { ); const replyPayload = resolveHeartbeatReplyPayload(replyResult); - if ( - !replyPayload || - (!replyPayload.text && !replyPayload.mediaUrl && !replyPayload.mediaUrls?.length) - ) { + if (!replyPayload || !hasOutboundReplyContent(replyPayload)) { heartbeatLogger.info( { to: redactedTo, @@ -201,7 +202,8 @@ export async function runWebHeartbeatOnce(opts: { return; } - const hasMedia = Boolean(replyPayload.mediaUrl || (replyPayload.mediaUrls?.length ?? 0) > 0); + const reply = resolveSendableOutboundReplyParts(replyPayload); + const hasMedia = reply.hasMedia; const ackMaxChars = Math.max( 0, cfg.agents?.defaults?.heartbeat?.ackMaxChars ?? DEFAULT_HEARTBEAT_ACK_MAX_CHARS, @@ -250,7 +252,7 @@ export async function runWebHeartbeatOnce(opts: { ); } - const finalText = stripped.text || replyPayload.text || ""; + const finalText = stripped.text || reply.text; // Check if alerts are disabled for WhatsApp if (!visibility.showAlerts) { diff --git a/extensions/whatsapp/src/auto-reply/monitor/process-message.ts b/extensions/whatsapp/src/auto-reply/monitor/process-message.ts index beaa564fe28..5db9cb31d0a 100644 --- a/extensions/whatsapp/src/auto-reply/monitor/process-message.ts +++ b/extensions/whatsapp/src/auto-reply/monitor/process-message.ts @@ -6,6 +6,7 @@ import type { loadConfig } from "openclaw/plugin-sdk/config-runtime"; import { resolveMarkdownTableMode } from "openclaw/plugin-sdk/config-runtime"; import { recordSessionMetaFromInbound } from "openclaw/plugin-sdk/config-runtime"; import { getAgentScopedMediaLocalRoots } from "openclaw/plugin-sdk/media-runtime"; +import { resolveSendableOutboundReplyParts } from "openclaw/plugin-sdk/reply-payload"; import { resolveChunkMode, resolveTextChunkLimit } from "openclaw/plugin-sdk/reply-runtime"; import { shouldComputeCommandAuthorized } from "openclaw/plugin-sdk/reply-runtime"; import { formatInboundEnvelope } from "openclaw/plugin-sdk/reply-runtime"; @@ -429,10 +430,11 @@ export async function processMessage(params: { }); const fromDisplay = params.msg.chatType === "group" ? conversationId : (params.msg.from ?? "unknown"); - const hasMedia = Boolean(payload.mediaUrl || payload.mediaUrls?.length); + const reply = resolveSendableOutboundReplyParts(payload); + const hasMedia = reply.hasMedia; whatsappOutboundLog.info(`Auto-replied to ${fromDisplay}${hasMedia ? " (media)" : ""}`); if (shouldLogVerbose()) { - const preview = payload.text != null ? elide(payload.text, 400) : ""; + const preview = payload.text != null ? elide(reply.text, 400) : ""; whatsappOutboundLog.debug(`Reply body: ${preview}${hasMedia ? " (media)" : ""}`); } }, diff --git a/extensions/whatsapp/src/outbound-adapter.ts b/extensions/whatsapp/src/outbound-adapter.ts index d9710afb557..4800e2ded43 100644 --- a/extensions/whatsapp/src/outbound-adapter.ts +++ b/extensions/whatsapp/src/outbound-adapter.ts @@ -5,6 +5,7 @@ import { createAttachedChannelResultAdapter, createEmptyChannelResult, } from "openclaw/plugin-sdk/channel-send-result"; +import { resolveSendableOutboundReplyParts } from "openclaw/plugin-sdk/reply-payload"; import { chunkText } from "openclaw/plugin-sdk/reply-runtime"; import { shouldLogVerbose } from "openclaw/plugin-sdk/runtime-env"; import { resolveWhatsAppOutboundTarget } from "./runtime-api.js"; @@ -24,7 +25,7 @@ export const whatsappOutbound: ChannelOutboundAdapter = { resolveWhatsAppOutboundTarget({ to, allowFrom, mode }), sendPayload: async (ctx) => { const text = trimLeadingWhitespace(ctx.payload.text); - const hasMedia = Boolean(ctx.payload.mediaUrl) || (ctx.payload.mediaUrls?.length ?? 0) > 0; + const hasMedia = resolveSendableOutboundReplyParts(ctx.payload).hasMedia; if (!text && !hasMedia) { return createEmptyChannelResult("whatsapp"); } diff --git a/extensions/zalo/src/monitor.ts b/extensions/zalo/src/monitor.ts index 768c556fd7b..b21476fbf8f 100644 --- a/extensions/zalo/src/monitor.ts +++ b/extensions/zalo/src/monitor.ts @@ -1,4 +1,5 @@ import type { IncomingMessage, ServerResponse } from "node:http"; +import { resolveSendableOutboundReplyParts } from "openclaw/plugin-sdk/reply-payload"; import type { ResolvedZaloAccount } from "./accounts.js"; import { ZaloApiError, @@ -579,11 +580,13 @@ async function deliverZaloReply(params: { }): Promise { const { payload, token, chatId, runtime, core, config, accountId, statusSink, fetcher } = params; const tableMode = params.tableMode ?? "code"; - const text = core.channel.text.convertMarkdownTables(payload.text ?? "", tableMode); + const reply = resolveSendableOutboundReplyParts(payload, { + text: core.channel.text.convertMarkdownTables(payload.text ?? "", tableMode), + }); const chunkMode = core.channel.text.resolveChunkMode(config, "zalo", accountId); await deliverTextOrMediaReply({ payload, - text, + text: reply.text, chunkText: (value) => core.channel.text.chunkMarkdownTextWithMode(value, ZALO_TEXT_LIMIT, chunkMode), sendText: async (chunk) => { diff --git a/extensions/zalouser/src/monitor.ts b/extensions/zalouser/src/monitor.ts index d269345572c..7f455d93166 100644 --- a/extensions/zalouser/src/monitor.ts +++ b/extensions/zalouser/src/monitor.ts @@ -28,6 +28,7 @@ import { mergeAllowlist, resolveMentionGatingWithBypass, resolveOpenProviderRuntimeGroupPolicy, + resolveSendableOutboundReplyParts, resolveDefaultGroupPolicy, resolveSenderCommandAuthorization, resolveSenderScopedGroupPolicy, @@ -706,14 +707,16 @@ async function deliverZalouserReply(params: { const { payload, profile, chatId, isGroup, runtime, core, config, accountId, statusSink } = params; const tableMode = params.tableMode ?? "code"; - const text = core.channel.text.convertMarkdownTables(payload.text ?? "", tableMode); + const reply = resolveSendableOutboundReplyParts(payload, { + text: core.channel.text.convertMarkdownTables(payload.text ?? "", tableMode), + }); const chunkMode = core.channel.text.resolveChunkMode(config, "zalouser", accountId); const textChunkLimit = core.channel.text.resolveTextChunkLimit(config, "zalouser", accountId, { fallbackLimit: ZALOUSER_TEXT_LIMIT, }); await deliverTextOrMediaReply({ payload, - text, + text: reply.text, sendText: async (chunk) => { try { await sendMessageZalouser(chatId, chunk, { diff --git a/src/agents/pi-embedded-runner/run/payloads.ts b/src/agents/pi-embedded-runner/run/payloads.ts index c0e0ded136e..6b0cf33e980 100644 --- a/src/agents/pi-embedded-runner/run/payloads.ts +++ b/src/agents/pi-embedded-runner/run/payloads.ts @@ -4,6 +4,7 @@ import type { ReasoningLevel, VerboseLevel } from "../../../auto-reply/thinking. import { isSilentReplyText, SILENT_REPLY_TOKEN } from "../../../auto-reply/tokens.js"; import { formatToolAggregate } from "../../../auto-reply/tool-meta.js"; import type { OpenClawConfig } from "../../../config/config.js"; +import { hasOutboundReplyContent } from "../../../plugin-sdk/reply-payload.js"; import { BILLING_ERROR_USER_MESSAGE, formatAssistantErrorText, @@ -336,7 +337,7 @@ export function buildEmbeddedRunPayloads(params: { audioAsVoice: item.audioAsVoice || Boolean(hasAudioAsVoiceTag && item.media?.length), })) .filter((p) => { - if (!p.text && !p.mediaUrl && (!p.mediaUrls || p.mediaUrls.length === 0)) { + if (!hasOutboundReplyContent(p)) { return false; } if (p.text && isSilentReplyText(p.text, SILENT_REPLY_TOKEN)) { diff --git a/src/agents/pi-embedded-subscribe.handlers.messages.test.ts b/src/agents/pi-embedded-subscribe.handlers.messages.test.ts index 6c508bdbdb6..1ecdd45f9af 100644 --- a/src/agents/pi-embedded-subscribe.handlers.messages.test.ts +++ b/src/agents/pi-embedded-subscribe.handlers.messages.test.ts @@ -1,5 +1,9 @@ import { describe, expect, it } from "vitest"; -import { resolveSilentReplyFallbackText } from "./pi-embedded-subscribe.handlers.messages.js"; +import { + buildAssistantStreamData, + hasAssistantVisibleReply, + resolveSilentReplyFallbackText, +} from "./pi-embedded-subscribe.handlers.messages.js"; describe("resolveSilentReplyFallbackText", () => { it("replaces NO_REPLY with latest messaging tool text when available", () => { @@ -29,3 +33,31 @@ describe("resolveSilentReplyFallbackText", () => { ).toBe("NO_REPLY"); }); }); + +describe("hasAssistantVisibleReply", () => { + it("treats audio-only payloads as visible", () => { + expect(hasAssistantVisibleReply({ audioAsVoice: true })).toBe(true); + }); + + it("detects text or media visibility", () => { + expect(hasAssistantVisibleReply({ text: "hello" })).toBe(true); + expect(hasAssistantVisibleReply({ mediaUrls: ["https://example.com/a.png"] })).toBe(true); + expect(hasAssistantVisibleReply({})).toBe(false); + }); +}); + +describe("buildAssistantStreamData", () => { + it("normalizes media payloads for assistant stream events", () => { + expect( + buildAssistantStreamData({ + text: "hello", + delta: "he", + mediaUrl: "https://example.com/a.png", + }), + ).toEqual({ + text: "hello", + delta: "he", + mediaUrls: ["https://example.com/a.png"], + }); + }); +}); diff --git a/src/agents/pi-embedded-subscribe.handlers.messages.ts b/src/agents/pi-embedded-subscribe.handlers.messages.ts index 04f47e67cde..d790eb912ca 100644 --- a/src/agents/pi-embedded-subscribe.handlers.messages.ts +++ b/src/agents/pi-embedded-subscribe.handlers.messages.ts @@ -3,6 +3,7 @@ import { parseReplyDirectives } from "../auto-reply/reply/reply-directives.js"; import { SILENT_REPLY_TOKEN } from "../auto-reply/tokens.js"; import { emitAgentEvent } from "../infra/agent-events.js"; import { createInlineCodeState } from "../markdown/code-spans.js"; +import { resolveSendableOutboundReplyParts } from "../plugin-sdk/reply-payload.js"; import { isMessagingToolDuplicateNormalized, normalizeTextForComparison, @@ -56,6 +57,29 @@ export function resolveSilentReplyFallbackText(params: { return fallback; } +export function hasAssistantVisibleReply(params: { + text?: string; + mediaUrls?: string[]; + mediaUrl?: string; + audioAsVoice?: boolean; +}): boolean { + return resolveSendableOutboundReplyParts(params).hasContent || Boolean(params.audioAsVoice); +} + +export function buildAssistantStreamData(params: { + text?: string; + delta?: string; + mediaUrls?: string[]; + mediaUrl?: string; +}): { text: string; delta: string; mediaUrls?: string[] } { + const mediaUrls = resolveSendableOutboundReplyParts(params).mediaUrls; + return { + text: params.text ?? "", + delta: params.delta ?? "", + mediaUrls: mediaUrls.length ? mediaUrls : undefined, + }; +} + export function handleMessageStart( ctx: EmbeddedPiSubscribeContext, evt: AgentEvent & { message: AgentMessage }, @@ -196,14 +220,13 @@ export function handleMessageUpdate( const parsedDelta = visibleDelta ? ctx.consumePartialReplyDirectives(visibleDelta) : null; const parsedFull = parseReplyDirectives(stripTrailingDirective(next)); const cleanedText = parsedFull.text; - const mediaUrls = parsedDelta?.mediaUrls; - const hasMedia = Boolean(mediaUrls && mediaUrls.length > 0); + const { mediaUrls, hasMedia } = resolveSendableOutboundReplyParts(parsedDelta ?? {}); const hasAudio = Boolean(parsedDelta?.audioAsVoice); const previousCleaned = ctx.state.lastStreamedAssistantCleaned ?? ""; let shouldEmit = false; let deltaText = ""; - if (!cleanedText && !hasMedia && !hasAudio) { + if (!hasAssistantVisibleReply({ text: cleanedText, mediaUrls, audioAsVoice: hasAudio })) { shouldEmit = false; } else if (previousCleaned && !cleanedText.startsWith(previousCleaned)) { shouldEmit = false; @@ -216,29 +239,23 @@ export function handleMessageUpdate( ctx.state.lastStreamedAssistantCleaned = cleanedText; if (shouldEmit) { + const data = buildAssistantStreamData({ + text: cleanedText, + delta: deltaText, + mediaUrls, + }); emitAgentEvent({ runId: ctx.params.runId, stream: "assistant", - data: { - text: cleanedText, - delta: deltaText, - mediaUrls: hasMedia ? mediaUrls : undefined, - }, + data, }); void ctx.params.onAgentEvent?.({ stream: "assistant", - data: { - text: cleanedText, - delta: deltaText, - mediaUrls: hasMedia ? mediaUrls : undefined, - }, + data, }); ctx.state.emittedAssistantUpdate = true; if (ctx.params.onPartialReply && ctx.state.shouldEmitPartialReplies) { - void ctx.params.onPartialReply({ - text: cleanedText, - mediaUrls: hasMedia ? mediaUrls : undefined, - }); + void ctx.params.onPartialReply(data); } } } @@ -291,8 +308,7 @@ export function handleMessageEnd( const trimmedText = text.trim(); const parsedText = trimmedText ? parseReplyDirectives(stripTrailingDirective(trimmedText)) : null; let cleanedText = parsedText?.text ?? ""; - let mediaUrls = parsedText?.mediaUrls; - let hasMedia = Boolean(mediaUrls && mediaUrls.length > 0); + let { mediaUrls, hasMedia } = resolveSendableOutboundReplyParts(parsedText ?? {}); if (!cleanedText && !hasMedia && !ctx.params.enforceFinalTag) { const rawTrimmed = rawText.trim(); @@ -301,28 +317,24 @@ export function handleMessageEnd( if (rawCandidate) { const parsedFallback = parseReplyDirectives(stripTrailingDirective(rawCandidate)); cleanedText = parsedFallback.text ?? rawCandidate; - mediaUrls = parsedFallback.mediaUrls; - hasMedia = Boolean(mediaUrls && mediaUrls.length > 0); + ({ mediaUrls, hasMedia } = resolveSendableOutboundReplyParts(parsedFallback)); } } if (!ctx.state.emittedAssistantUpdate && (cleanedText || hasMedia)) { + const data = buildAssistantStreamData({ + text: cleanedText, + delta: cleanedText, + mediaUrls, + }); emitAgentEvent({ runId: ctx.params.runId, stream: "assistant", - data: { - text: cleanedText, - delta: cleanedText, - mediaUrls: hasMedia ? mediaUrls : undefined, - }, + data, }); void ctx.params.onAgentEvent?.({ stream: "assistant", - data: { - text: cleanedText, - delta: cleanedText, - mediaUrls: hasMedia ? mediaUrls : undefined, - }, + data, }); ctx.state.emittedAssistantUpdate = true; } @@ -377,7 +389,7 @@ export function handleMessageEnd( replyToCurrent, } = splitResult; // Emit if there's content OR audioAsVoice flag (to propagate the flag). - if (cleanedText || (mediaUrls && mediaUrls.length > 0) || audioAsVoice) { + if (hasAssistantVisibleReply({ text: cleanedText, mediaUrls, audioAsVoice })) { emitBlockReplySafely({ text: cleanedText, mediaUrls: mediaUrls?.length ? mediaUrls : undefined, diff --git a/src/auto-reply/heartbeat-reply-payload.ts b/src/auto-reply/heartbeat-reply-payload.ts index 4bdf9e3a57b..3a235bc4273 100644 --- a/src/auto-reply/heartbeat-reply-payload.ts +++ b/src/auto-reply/heartbeat-reply-payload.ts @@ -1,3 +1,4 @@ +import { hasOutboundReplyContent } from "../plugin-sdk/reply-payload.js"; import type { ReplyPayload } from "./types.js"; export function resolveHeartbeatReplyPayload( @@ -14,7 +15,7 @@ export function resolveHeartbeatReplyPayload( if (!payload) { continue; } - if (payload.text || payload.mediaUrl || (payload.mediaUrls && payload.mediaUrls.length > 0)) { + if (hasOutboundReplyContent(payload)) { return payload; } } diff --git a/src/auto-reply/reply/agent-runner-execution.ts b/src/auto-reply/reply/agent-runner-execution.ts index 5c9b78c208f..7b22a5bdba1 100644 --- a/src/auto-reply/reply/agent-runner-execution.ts +++ b/src/auto-reply/reply/agent-runner-execution.ts @@ -23,6 +23,7 @@ import { } from "../../config/sessions.js"; import { logVerbose } from "../../globals.js"; import { emitAgentEvent, registerAgentRunContext } from "../../infra/agent-events.js"; +import { resolveSendableOutboundReplyParts } from "../../plugin-sdk/reply-payload.js"; import { defaultRuntime } from "../../runtime.js"; import { isMarkdownCapableMessageChannel, @@ -148,6 +149,7 @@ export async function runAgentTurnWithFallback(params: { try { const normalizeStreamingText = (payload: ReplyPayload): { text?: string; skip: boolean } => { let text = payload.text; + const reply = resolveSendableOutboundReplyParts(payload); if (!params.isHeartbeat && text?.includes("HEARTBEAT_OK")) { const stripped = stripHeartbeatToken(text, { mode: "message", @@ -156,7 +158,7 @@ export async function runAgentTurnWithFallback(params: { didLogHeartbeatStrip = true; logVerbose("Stripped stray HEARTBEAT_OK token from reply"); } - if (stripped.shouldSkip && (payload.mediaUrls?.length ?? 0) === 0) { + if (stripped.shouldSkip && !reply.hasMedia) { return { skip: true }; } text = stripped.text; @@ -172,7 +174,7 @@ export async function runAgentTurnWithFallback(params: { } if (!text) { // Allow media-only payloads (e.g. tool result screenshots) through. - if ((payload.mediaUrls?.length ?? 0) > 0) { + if (reply.hasMedia) { return { text: undefined, skip: false }; } return { skip: true }; diff --git a/src/auto-reply/reply/agent-runner-helpers.ts b/src/auto-reply/reply/agent-runner-helpers.ts index 11ea0fe9f53..b62e4683308 100644 --- a/src/auto-reply/reply/agent-runner-helpers.ts +++ b/src/auto-reply/reply/agent-runner-helpers.ts @@ -1,5 +1,9 @@ import { loadSessionStore } from "../../config/sessions.js"; import { isAudioFileName } from "../../media/mime.js"; +import { + hasOutboundReplyContent, + resolveSendableOutboundReplyParts, +} from "../../plugin-sdk/reply-payload.js"; import { normalizeVerboseLevel, type VerboseLevel } from "../thinking.js"; import type { ReplyPayload } from "../types.js"; import { scheduleFollowupDrain } from "./queue.js"; @@ -9,7 +13,7 @@ const hasAudioMedia = (urls?: string[]): boolean => Boolean(urls?.some((url) => isAudioFileName(url))); export const isAudioPayload = (payload: ReplyPayload): boolean => - hasAudioMedia(payload.mediaUrls ?? (payload.mediaUrl ? [payload.mediaUrl] : undefined)); + hasAudioMedia(resolveSendableOutboundReplyParts(payload).mediaUrls); type VerboseGateParams = { sessionKey?: string; @@ -63,19 +67,9 @@ export const signalTypingIfNeeded = async ( payloads: ReplyPayload[], typingSignals: TypingSignaler, ): Promise => { - const shouldSignalTyping = payloads.some((payload) => { - const trimmed = payload.text?.trim(); - if (trimmed) { - return true; - } - if (payload.mediaUrl) { - return true; - } - if (payload.mediaUrls && payload.mediaUrls.length > 0) { - return true; - } - return false; - }); + const shouldSignalTyping = payloads.some((payload) => + hasOutboundReplyContent(payload, { trimText: true }), + ); if (shouldSignalTyping) { await typingSignals.signalRunStart(); } diff --git a/src/auto-reply/reply/agent-runner-payloads.ts b/src/auto-reply/reply/agent-runner-payloads.ts index 9e89c921407..5f052b8f4f9 100644 --- a/src/auto-reply/reply/agent-runner-payloads.ts +++ b/src/auto-reply/reply/agent-runner-payloads.ts @@ -1,5 +1,6 @@ import type { ReplyToMode } from "../../config/types.js"; import { logVerbose } from "../../globals.js"; +import { resolveSendableOutboundReplyParts } from "../../plugin-sdk/reply-payload.js"; import { stripHeartbeatToken } from "../heartbeat.js"; import type { OriginatingChannelType } from "../templating.js"; import { SILENT_REPLY_TOKEN } from "../tokens.js"; @@ -20,15 +21,11 @@ import { shouldSuppressMessagingToolReplies, } from "./reply-payloads.js"; -function hasPayloadMedia(payload: ReplyPayload): boolean { - return Boolean(payload.mediaUrl) || (payload.mediaUrls?.length ?? 0) > 0; -} - async function normalizeReplyPayloadMedia(params: { payload: ReplyPayload; normalizeMediaPaths?: (payload: ReplyPayload) => Promise; }): Promise { - if (!params.normalizeMediaPaths || !hasPayloadMedia(params.payload)) { + if (!params.normalizeMediaPaths || !resolveSendableOutboundReplyParts(params.payload).hasMedia) { return params.payload; } @@ -69,11 +66,7 @@ async function normalizeSentMediaUrlsForDedupe(params: { mediaUrl: trimmed, mediaUrls: [trimmed], }); - const normalizedMediaUrls = normalized.mediaUrls?.length - ? normalized.mediaUrls - : normalized.mediaUrl - ? [normalized.mediaUrl] - : []; + const normalizedMediaUrls = resolveSendableOutboundReplyParts(normalized).mediaUrls; for (const mediaUrl of normalizedMediaUrls) { const candidate = mediaUrl.trim(); if (!candidate || seen.has(candidate)) { @@ -130,7 +123,7 @@ export async function buildReplyPayloads(params: { didLogHeartbeatStrip = true; logVerbose("Stripped stray HEARTBEAT_OK token from reply"); } - const hasMedia = Boolean(payload.mediaUrl) || (payload.mediaUrls?.length ?? 0) > 0; + const hasMedia = resolveSendableOutboundReplyParts(payload).hasMedia; if (stripped.shouldSkip && !hasMedia) { return []; } diff --git a/src/auto-reply/reply/block-reply-coalescer.ts b/src/auto-reply/reply/block-reply-coalescer.ts index 130f57b3d07..ea1022a469c 100644 --- a/src/auto-reply/reply/block-reply-coalescer.ts +++ b/src/auto-reply/reply/block-reply-coalescer.ts @@ -1,3 +1,4 @@ +import { resolveSendableOutboundReplyParts } from "../../plugin-sdk/reply-payload.js"; import type { ReplyPayload } from "../types.js"; import type { BlockStreamingCoalescing } from "./block-streaming.js"; @@ -75,9 +76,10 @@ export function createBlockReplyCoalescer(params: { if (shouldAbort()) { return; } - const hasMedia = Boolean(payload.mediaUrl) || (payload.mediaUrls?.length ?? 0) > 0; - const text = payload.text ?? ""; - const hasText = text.trim().length > 0; + const reply = resolveSendableOutboundReplyParts(payload); + const hasMedia = reply.hasMedia; + const text = reply.text; + const hasText = reply.hasText; if (hasMedia) { void flush({ force: true }); void onFlush(payload); diff --git a/src/auto-reply/reply/block-reply-pipeline.ts b/src/auto-reply/reply/block-reply-pipeline.ts index 9ce85334238..53a9e46c313 100644 --- a/src/auto-reply/reply/block-reply-pipeline.ts +++ b/src/auto-reply/reply/block-reply-pipeline.ts @@ -1,4 +1,5 @@ import { logVerbose } from "../../globals.js"; +import { resolveSendableOutboundReplyParts } from "../../plugin-sdk/reply-payload.js"; import type { ReplyPayload } from "../types.js"; import { createBlockReplyCoalescer } from "./block-reply-coalescer.js"; import type { BlockStreamingCoalescing } from "./block-streaming.js"; @@ -35,30 +36,20 @@ export function createAudioAsVoiceBuffer(params: { } export function createBlockReplyPayloadKey(payload: ReplyPayload): string { - const text = payload.text?.trim() ?? ""; - const mediaList = payload.mediaUrls?.length - ? payload.mediaUrls - : payload.mediaUrl - ? [payload.mediaUrl] - : []; + const reply = resolveSendableOutboundReplyParts(payload); return JSON.stringify({ - text, - mediaList, + text: reply.trimmedText, + mediaList: reply.mediaUrls, replyToId: payload.replyToId ?? null, }); } export function createBlockReplyContentKey(payload: ReplyPayload): string { - const text = payload.text?.trim() ?? ""; - const mediaList = payload.mediaUrls?.length - ? payload.mediaUrls - : payload.mediaUrl - ? [payload.mediaUrl] - : []; + const reply = resolveSendableOutboundReplyParts(payload); // Content-only key used for final-payload suppression after block streaming. // This intentionally ignores replyToId so a streamed threaded payload and the // later final payload still collapse when they carry the same content. - return JSON.stringify({ text, mediaList }); + return JSON.stringify({ text: reply.trimmedText, mediaList: reply.mediaUrls }); } const withTimeout = async ( @@ -217,7 +208,7 @@ export function createBlockReplyPipeline(params: { if (bufferPayload(payload)) { return; } - const hasMedia = Boolean(payload.mediaUrl) || (payload.mediaUrls?.length ?? 0) > 0; + const hasMedia = resolveSendableOutboundReplyParts(payload).hasMedia; if (hasMedia) { void coalescer?.flush({ force: true }); sendPayload(payload, /* bypassSeenCheck */ false); diff --git a/src/auto-reply/reply/dispatch-acp-delivery.ts b/src/auto-reply/reply/dispatch-acp-delivery.ts index 6624f9868a2..a9d50521be2 100644 --- a/src/auto-reply/reply/dispatch-acp-delivery.ts +++ b/src/auto-reply/reply/dispatch-acp-delivery.ts @@ -2,6 +2,7 @@ import type { OpenClawConfig } from "../../config/config.js"; import type { TtsAutoMode } from "../../config/types.tts.js"; import { logVerbose } from "../../globals.js"; import { runMessageAction } from "../../infra/outbound/message-action-runner.js"; +import { hasOutboundReplyContent } from "../../plugin-sdk/reply-payload.js"; import { maybeApplyTtsToPayload } from "../../tts/tts.js"; import type { FinalizedMsgContext } from "../templating.js"; import type { ReplyPayload } from "../types.js"; @@ -127,7 +128,7 @@ export function createAcpDispatchDeliveryCoordinator(params: { state.blockCount += 1; } - if ((payload.text?.trim() ?? "").length > 0 || payload.mediaUrl || payload.mediaUrls?.length) { + if (hasOutboundReplyContent(payload, { trimText: true })) { await startReplyLifecycleOnce(); } diff --git a/src/auto-reply/reply/dispatch-from-config.ts b/src/auto-reply/reply/dispatch-from-config.ts index 34950c20950..3893d1d8138 100644 --- a/src/auto-reply/reply/dispatch-from-config.ts +++ b/src/auto-reply/reply/dispatch-from-config.ts @@ -29,6 +29,7 @@ import { logMessageQueued, logSessionStateChange, } from "../../logging/diagnostic.js"; +import { resolveSendableOutboundReplyParts } from "../../plugin-sdk/reply-payload.js"; import { buildPluginBindingDeclinedText, buildPluginBindingErrorText, @@ -532,7 +533,7 @@ export async function dispatchReplyFromConfig(params: { } // Group/native flows intentionally suppress tool summary text, but media-only // tool results (for example TTS audio) must still be delivered. - const hasMedia = Boolean(payload.mediaUrl) || (payload.mediaUrls?.length ?? 0) > 0; + const hasMedia = resolveSendableOutboundReplyParts(payload).hasMedia; if (!hasMedia) { return null; } diff --git a/src/auto-reply/reply/followup-runner.ts b/src/auto-reply/reply/followup-runner.ts index 339883e730b..3e21490b990 100644 --- a/src/auto-reply/reply/followup-runner.ts +++ b/src/auto-reply/reply/followup-runner.ts @@ -9,6 +9,10 @@ import type { SessionEntry } from "../../config/sessions.js"; import type { TypingMode } from "../../config/types.js"; import { logVerbose } from "../../globals.js"; import { registerAgentRunContext } from "../../infra/agent-events.js"; +import { + hasOutboundReplyContent, + resolveSendableOutboundReplyParts, +} from "../../plugin-sdk/reply-payload.js"; import { defaultRuntime } from "../../runtime.js"; import { isInternalMessageChannel } from "../../utils/message-channel.js"; import { stripHeartbeatToken } from "../heartbeat.js"; @@ -81,13 +85,12 @@ export function createFollowupRunner(params: { } for (const payload of payloads) { - if (!payload?.text && !payload?.mediaUrl && !payload?.mediaUrls?.length) { + if (!payload || !hasOutboundReplyContent(payload)) { continue; } if ( isSilentReplyText(payload.text, SILENT_REPLY_TOKEN) && - !payload.mediaUrl && - !payload.mediaUrls?.length + !resolveSendableOutboundReplyParts(payload).hasMedia ) { continue; } @@ -289,7 +292,7 @@ export function createFollowupRunner(params: { return [payload]; } const stripped = stripHeartbeatToken(text, { mode: "message" }); - const hasMedia = Boolean(payload.mediaUrl) || (payload.mediaUrls?.length ?? 0) > 0; + const hasMedia = resolveSendableOutboundReplyParts(payload).hasMedia; if (stripped.shouldSkip && !hasMedia) { return []; } diff --git a/src/auto-reply/reply/normalize-reply.ts b/src/auto-reply/reply/normalize-reply.ts index 52faa463bdb..a3ae3417d7d 100644 --- a/src/auto-reply/reply/normalize-reply.ts +++ b/src/auto-reply/reply/normalize-reply.ts @@ -1,5 +1,5 @@ import { sanitizeUserFacingText } from "../../agents/pi-embedded-helpers.js"; -import { hasReplyChannelData, hasReplyContent } from "../../interactive/payload.js"; +import { hasReplyPayloadContent } from "../../interactive/payload.js"; import { stripHeartbeatToken } from "../heartbeat.js"; import { HEARTBEAT_TOKEN, @@ -32,17 +32,18 @@ export function normalizeReplyPayload( payload: ReplyPayload, opts: NormalizeReplyOptions = {}, ): ReplyPayload | null { - const hasChannelData = hasReplyChannelData(payload.channelData); + const hasContent = (text: string | undefined) => + hasReplyPayloadContent( + { + ...payload, + text, + }, + { + trimText: true, + }, + ); const trimmed = payload.text?.trim() ?? ""; - if ( - !hasReplyContent({ - text: trimmed, - mediaUrl: payload.mediaUrl, - mediaUrls: payload.mediaUrls, - interactive: payload.interactive, - hasChannelData, - }) - ) { + if (!hasContent(trimmed)) { opts.onSkip?.("empty"); return null; } @@ -50,14 +51,7 @@ export function normalizeReplyPayload( const silentToken = opts.silentToken ?? SILENT_REPLY_TOKEN; let text = payload.text ?? undefined; if (text && isSilentReplyText(text, silentToken)) { - if ( - !hasReplyContent({ - mediaUrl: payload.mediaUrl, - mediaUrls: payload.mediaUrls, - interactive: payload.interactive, - hasChannelData, - }) - ) { + if (!hasContent("")) { opts.onSkip?.("silent"); return null; } @@ -68,15 +62,7 @@ export function normalizeReplyPayload( // silent just like the exact-match path above. (#30916, #30955) if (text && text.includes(silentToken) && !isSilentReplyText(text, silentToken)) { text = stripSilentToken(text, silentToken); - if ( - !hasReplyContent({ - text, - mediaUrl: payload.mediaUrl, - mediaUrls: payload.mediaUrls, - interactive: payload.interactive, - hasChannelData, - }) - ) { + if (!hasContent(text)) { opts.onSkip?.("silent"); return null; } @@ -92,16 +78,7 @@ export function normalizeReplyPayload( if (stripped.didStrip) { opts.onHeartbeatStrip?.(); } - if ( - stripped.shouldSkip && - !hasReplyContent({ - text: stripped.text, - mediaUrl: payload.mediaUrl, - mediaUrls: payload.mediaUrls, - interactive: payload.interactive, - hasChannelData, - }) - ) { + if (stripped.shouldSkip && !hasContent(stripped.text)) { opts.onSkip?.("heartbeat"); return null; } @@ -111,15 +88,7 @@ export function normalizeReplyPayload( if (text) { text = sanitizeUserFacingText(text, { errorContext: Boolean(payload.isError) }); } - if ( - !hasReplyContent({ - text, - mediaUrl: payload.mediaUrl, - mediaUrls: payload.mediaUrls, - interactive: payload.interactive, - hasChannelData, - }) - ) { + if (!hasContent(text)) { opts.onSkip?.("empty"); return null; } diff --git a/src/auto-reply/reply/reply-delivery.ts b/src/auto-reply/reply/reply-delivery.ts index cacd6b083cb..0a410319959 100644 --- a/src/auto-reply/reply/reply-delivery.ts +++ b/src/auto-reply/reply/reply-delivery.ts @@ -1,4 +1,5 @@ import { logVerbose } from "../../globals.js"; +import { resolveSendableOutboundReplyParts } from "../../plugin-sdk/reply-payload.js"; import { SILENT_REPLY_TOKEN } from "../tokens.js"; import type { BlockReplyContext, ReplyPayload } from "../types.js"; import type { BlockReplyPipeline } from "./block-reply-pipeline.js"; @@ -57,9 +58,6 @@ export function normalizeReplyPayloadDirectives(params: { }; } -const hasRenderableMedia = (payload: ReplyPayload): boolean => - Boolean(payload.mediaUrl) || (payload.mediaUrls?.length ?? 0) > 0; - export function createBlockReplyDeliveryHandler(params: { onBlockReply: (payload: ReplyPayload, context?: BlockReplyContext) => Promise | void; currentMessageId?: string; @@ -73,7 +71,7 @@ export function createBlockReplyDeliveryHandler(params: { }): (payload: ReplyPayload) => Promise { return async (payload) => { const { text, skip } = params.normalizeStreamingText(payload); - if (skip && !hasRenderableMedia(payload)) { + if (skip && !resolveSendableOutboundReplyParts(payload).hasMedia) { return; } @@ -106,7 +104,7 @@ export function createBlockReplyDeliveryHandler(params: { ? await params.normalizeMediaPaths(normalized.payload) : normalized.payload; const blockPayload = params.applyReplyToMode(mediaNormalizedPayload); - const blockHasMedia = hasRenderableMedia(blockPayload); + const blockHasMedia = resolveSendableOutboundReplyParts(blockPayload).hasMedia; // Skip empty payloads unless they have audioAsVoice flag (need to track it). if (!blockPayload.text && !blockHasMedia && !blockPayload.audioAsVoice) { diff --git a/src/auto-reply/reply/reply-media-paths.ts b/src/auto-reply/reply/reply-media-paths.ts index 1c09316afad..45447e7b82d 100644 --- a/src/auto-reply/reply/reply-media-paths.ts +++ b/src/auto-reply/reply/reply-media-paths.ts @@ -2,6 +2,7 @@ import { resolvePathFromInput } from "../../agents/path-policy.js"; import { assertMediaNotDataUrl, resolveSandboxedMediaSource } from "../../agents/sandbox-paths.js"; import { ensureSandboxWorkspaceForSession } from "../../agents/sandbox.js"; import type { OpenClawConfig } from "../../config/config.js"; +import { resolveSendableOutboundReplyParts } from "../../plugin-sdk/reply-payload.js"; import type { ReplyPayload } from "../types.js"; const HTTP_URL_RE = /^https?:\/\//i; @@ -25,7 +26,7 @@ function isLikelyLocalMediaSource(media: string): boolean { } function getPayloadMediaList(payload: ReplyPayload): string[] { - return payload.mediaUrls?.length ? payload.mediaUrls : payload.mediaUrl ? [payload.mediaUrl] : []; + return resolveSendableOutboundReplyParts(payload).mediaUrls; } export function createReplyMediaPathNormalizer(params: { diff --git a/src/auto-reply/reply/reply-payloads.ts b/src/auto-reply/reply/reply-payloads.ts index 7d7ae82975c..1826d1872af 100644 --- a/src/auto-reply/reply/reply-payloads.ts +++ b/src/auto-reply/reply/reply-payloads.ts @@ -4,7 +4,7 @@ import { normalizeChannelId } from "../../channels/plugins/index.js"; import { parseExplicitTargetForChannel } from "../../channels/plugins/target-parsing.js"; import type { ReplyToMode } from "../../config/types.js"; import { normalizeTargetForProvider } from "../../infra/outbound/target-normalization.js"; -import { hasReplyChannelData, hasReplyContent } from "../../interactive/payload.js"; +import { hasReplyPayloadContent } from "../../interactive/payload.js"; import { normalizeOptionalAccountId } from "../../routing/account-id.js"; import type { OriginatingChannelType } from "../templating.js"; import type { ReplyPayload } from "../types.js"; @@ -75,14 +75,7 @@ export function applyReplyTagsToPayload( } export function isRenderablePayload(payload: ReplyPayload): boolean { - return hasReplyContent({ - text: payload.text, - mediaUrl: payload.mediaUrl, - mediaUrls: payload.mediaUrls, - interactive: payload.interactive, - hasChannelData: hasReplyChannelData(payload.channelData), - extraContent: payload.audioAsVoice, - }); + return hasReplyPayloadContent(payload, { extraContent: payload.audioAsVoice }); } export function shouldSuppressReasoningPayload(payload: ReplyPayload): boolean { diff --git a/src/auto-reply/reply/route-reply.ts b/src/auto-reply/reply/route-reply.ts index 3836ceb5ab6..3fed4655d99 100644 --- a/src/auto-reply/reply/route-reply.ts +++ b/src/auto-reply/reply/route-reply.ts @@ -12,7 +12,7 @@ import { resolveEffectiveMessagesConfig } from "../../agents/identity.js"; import { getChannelPlugin, normalizeChannelId } from "../../channels/plugins/index.js"; import type { OpenClawConfig } from "../../config/config.js"; import { buildOutboundSessionContext } from "../../infra/outbound/session-context.js"; -import { hasReplyContent } from "../../interactive/payload.js"; +import { hasReplyPayloadContent } from "../../interactive/payload.js"; import { INTERNAL_MESSAGE_CHANNEL, normalizeMessageChannel } from "../../utils/message-channel.js"; import type { OriginatingChannelType } from "../templating.js"; import type { ReplyPayload } from "../types.js"; @@ -126,12 +126,16 @@ export async function routeReply(params: RouteReplyParams): Promise - Boolean(parsed.text) || - Boolean(parsed.mediaUrl) || - (parsed.mediaUrls?.length ?? 0) > 0 || - Boolean(parsed.audioAsVoice); + hasOutboundReplyContent(parsed) || Boolean(parsed.audioAsVoice); export function createStreamingDirectiveAccumulator() { let pendingTail = ""; diff --git a/src/channels/plugins/outbound/direct-text-media.ts b/src/channels/plugins/outbound/direct-text-media.ts index d6e13a4fce7..0209027342d 100644 --- a/src/channels/plugins/outbound/direct-text-media.ts +++ b/src/channels/plugins/outbound/direct-text-media.ts @@ -1,6 +1,7 @@ import { chunkText } from "../../../auto-reply/chunk.js"; import type { OpenClawConfig } from "../../../config/config.js"; import type { OutboundSendDeps } from "../../../infra/outbound/deliver.js"; +import { resolveOutboundMediaUrls } from "../../../plugin-sdk/reply-payload.js"; import { resolveChannelMediaMaxBytes } from "../media-limits.js"; import type { ChannelOutboundAdapter } from "../types.js"; @@ -29,7 +30,7 @@ type SendPayloadAdapter = Pick< >; export function resolvePayloadMediaUrls(payload: SendPayloadContext["payload"]): string[] { - return payload.mediaUrls?.length ? payload.mediaUrls : payload.mediaUrl ? [payload.mediaUrl] : []; + return resolveOutboundMediaUrls(payload); } export async function sendPayloadMediaSequence(params: { diff --git a/src/commands/agent-via-gateway.ts b/src/commands/agent-via-gateway.ts index a44caa3f3bf..c37166218d1 100644 --- a/src/commands/agent-via-gateway.ts +++ b/src/commands/agent-via-gateway.ts @@ -4,6 +4,7 @@ import type { CliDeps } from "../cli/deps.js"; import { withProgress } from "../cli/progress.js"; import { loadConfig } from "../config/config.js"; import { callGateway, randomIdempotencyKey } from "../gateway/call.js"; +import { resolveSendableOutboundReplyParts } from "../plugin-sdk/reply-payload.js"; import { normalizeAgentId } from "../routing/session-key.js"; import type { RuntimeEnv } from "../runtime.js"; import { @@ -69,16 +70,16 @@ function formatPayloadForLog(payload: { mediaUrls?: string[]; mediaUrl?: string | null; }) { + const parts = resolveSendableOutboundReplyParts({ + text: payload.text, + mediaUrls: payload.mediaUrls, + mediaUrl: typeof payload.mediaUrl === "string" ? payload.mediaUrl : undefined, + }); const lines: string[] = []; - if (payload.text) { - lines.push(payload.text.trimEnd()); + if (parts.text) { + lines.push(parts.text.trimEnd()); } - const mediaUrl = - typeof payload.mediaUrl === "string" && payload.mediaUrl.trim() - ? payload.mediaUrl.trim() - : undefined; - const media = payload.mediaUrls ?? (mediaUrl ? [mediaUrl] : []); - for (const url of media) { + for (const url of parts.mediaUrls) { lines.push(`MEDIA:${url}`); } return lines.join("\n").trimEnd(); diff --git a/src/cron/heartbeat-policy.ts b/src/cron/heartbeat-policy.ts index 61edfa0701f..d356bcdbda5 100644 --- a/src/cron/heartbeat-policy.ts +++ b/src/cron/heartbeat-policy.ts @@ -1,4 +1,5 @@ import { stripHeartbeatToken } from "../auto-reply/heartbeat.js"; +import { resolveSendableOutboundReplyParts } from "../plugin-sdk/reply-payload.js"; export type HeartbeatDeliveryPayload = { text?: string; @@ -14,7 +15,7 @@ export function shouldSkipHeartbeatOnlyDelivery( return true; } const hasAnyMedia = payloads.some( - (payload) => (payload.mediaUrls?.length ?? 0) > 0 || Boolean(payload.mediaUrl), + (payload) => resolveSendableOutboundReplyParts(payload).hasMedia, ); if (hasAnyMedia) { return false; diff --git a/src/cron/isolated-agent/helpers.ts b/src/cron/isolated-agent/helpers.ts index 448ef1c59ae..66a07a58844 100644 --- a/src/cron/isolated-agent/helpers.ts +++ b/src/cron/isolated-agent/helpers.ts @@ -1,5 +1,6 @@ import { DEFAULT_HEARTBEAT_ACK_MAX_CHARS } from "../../auto-reply/heartbeat.js"; import type { ReplyPayload } from "../../auto-reply/types.js"; +import { hasOutboundReplyContent } from "../../plugin-sdk/reply-payload.js"; import { truncateUtf16Safe } from "../../utils.js"; import { shouldSkipHeartbeatOnlyDelivery } from "../heartbeat-policy.js"; @@ -61,11 +62,9 @@ export function pickLastNonEmptyTextFromPayloads( export function pickLastDeliverablePayload(payloads: DeliveryPayload[]) { const isDeliverable = (p: DeliveryPayload) => { - const text = (p?.text ?? "").trim(); - const hasMedia = Boolean(p?.mediaUrl) || (p?.mediaUrls?.length ?? 0) > 0; const hasInteractive = (p?.interactive?.blocks?.length ?? 0) > 0; const hasChannelData = Object.keys(p?.channelData ?? {}).length > 0; - return text || hasMedia || hasInteractive || hasChannelData; + return hasOutboundReplyContent(p, { trimText: true }) || hasInteractive || hasChannelData; }; for (let i = payloads.length - 1; i >= 0; i--) { if (payloads[i]?.isError) { diff --git a/src/cron/isolated-agent/run.ts b/src/cron/isolated-agent/run.ts index 78f045d03cf..2ca8cf2b824 100644 --- a/src/cron/isolated-agent/run.ts +++ b/src/cron/isolated-agent/run.ts @@ -48,6 +48,7 @@ import { import type { AgentDefaultsConfig } from "../../config/types.js"; import { registerAgentRunContext } from "../../infra/agent-events.js"; import { logWarn } from "../../logger.js"; +import { resolveSendableOutboundReplyParts } from "../../plugin-sdk/reply-payload.js"; import { normalizeAgentId } from "../../routing/session-key.js"; import { buildSafeExternalPrompt, @@ -687,9 +688,9 @@ export async function runCronIsolatedAgentTurn(params: { const interimPayloads = interimRunResult.payloads ?? []; const interimDeliveryPayload = pickLastDeliverablePayload(interimPayloads); const interimPayloadHasStructuredContent = - Boolean(interimDeliveryPayload?.mediaUrl) || - (interimDeliveryPayload?.mediaUrls?.length ?? 0) > 0 || - Object.keys(interimDeliveryPayload?.channelData ?? {}).length > 0; + (interimDeliveryPayload + ? resolveSendableOutboundReplyParts(interimDeliveryPayload).hasMedia + : false) || Object.keys(interimDeliveryPayload?.channelData ?? {}).length > 0; const interimText = pickLastNonEmptyTextFromPayloads(interimPayloads)?.trim() ?? ""; const hasDescendantsSinceRunStart = listDescendantRunsForRequester(agentSessionKey).some( (entry) => { @@ -809,8 +810,7 @@ export async function runCronIsolatedAgentTurn(params: { ? [{ text: synthesizedText }] : []; const deliveryPayloadHasStructuredContent = - Boolean(deliveryPayload?.mediaUrl) || - (deliveryPayload?.mediaUrls?.length ?? 0) > 0 || + (deliveryPayload ? resolveSendableOutboundReplyParts(deliveryPayload).hasMedia : false) || Object.keys(deliveryPayload?.channelData ?? {}).length > 0; const deliveryBestEffort = resolveCronDeliveryBestEffort(params.job); const hasErrorPayload = payloads.some((payload) => payload?.isError === true); diff --git a/src/gateway/server-methods/send.ts b/src/gateway/server-methods/send.ts index 5cf36e39af2..b980d9e890d 100644 --- a/src/gateway/server-methods/send.ts +++ b/src/gateway/server-methods/send.ts @@ -13,7 +13,7 @@ import { normalizeReplyPayloadsForDelivery } from "../../infra/outbound/payloads import { buildOutboundSessionContext } from "../../infra/outbound/session-context.js"; import { maybeResolveIdLikeTarget } from "../../infra/outbound/target-resolver.js"; import { resolveOutboundTarget } from "../../infra/outbound/targets.js"; -import { resolveOutboundMediaUrls } from "../../plugin-sdk/reply-payload.js"; +import { resolveSendableOutboundReplyParts } from "../../plugin-sdk/reply-payload.js"; import { normalizePollInput } from "../../polls.js"; import { ErrorCodes, @@ -211,8 +211,8 @@ export const sendHandlers: GatewayRequestHandlers = { .map((payload) => payload.text) .filter(Boolean) .join("\n"); - const mirrorMediaUrls = mirrorPayloads.flatMap((payload) => - resolveOutboundMediaUrls(payload), + const mirrorMediaUrls = mirrorPayloads.flatMap( + (payload) => resolveSendableOutboundReplyParts(payload).mediaUrls, ); const providedSessionKey = typeof request.sessionKey === "string" && request.sessionKey.trim() diff --git a/src/gateway/ws-log.ts b/src/gateway/ws-log.ts index f987ccf8d37..52e07806dd1 100644 --- a/src/gateway/ws-log.ts +++ b/src/gateway/ws-log.ts @@ -3,6 +3,7 @@ import { isVerbose } from "../globals.js"; import { shouldLogSubsystemToConsole } from "../logging/console.js"; import { getDefaultRedactPatterns, redactSensitiveText } from "../logging/redact.js"; import { createSubsystemLogger } from "../logging/subsystem.js"; +import { resolveSendableOutboundReplyParts } from "../plugin-sdk/reply-payload.js"; import { parseAgentSessionKey } from "../routing/session-key.js"; import { DEFAULT_WS_SLOW_MS, getGatewayWsLogStyle } from "./ws-logging.js"; @@ -204,9 +205,11 @@ export function summarizeAgentEventForWsLog(payload: unknown): Record 0) { - extra.media = mediaUrls.length; + const mediaCount = resolveSendableOutboundReplyParts({ + mediaUrls: Array.isArray(data.mediaUrls) ? data.mediaUrls : undefined, + }).mediaCount; + if (mediaCount > 0) { + extra.media = mediaCount; } return extra; } diff --git a/src/infra/heartbeat-runner.ts b/src/infra/heartbeat-runner.ts index 34b3a7b5f86..cf5b45f8993 100644 --- a/src/infra/heartbeat-runner.ts +++ b/src/infra/heartbeat-runner.ts @@ -35,6 +35,10 @@ import { import type { AgentDefaultsConfig } from "../config/types.agent-defaults.js"; import { resolveCronSession } from "../cron/isolated-agent/session.js"; import { createSubsystemLogger } from "../logging/subsystem.js"; +import { + hasOutboundReplyContent, + resolveSendableOutboundReplyParts, +} from "../plugin-sdk/reply-payload.js"; import { getQueueSize } from "../process/command-queue.js"; import { CommandLane } from "../process/lanes.js"; import { @@ -368,7 +372,7 @@ function normalizeHeartbeatReply( mode: "heartbeat", maxAckChars: ackMaxChars, }); - const hasMedia = Boolean(payload.mediaUrl || (payload.mediaUrls?.length ?? 0) > 0); + const hasMedia = resolveSendableOutboundReplyParts(payload).hasMedia; if (stripped.shouldSkip && !hasMedia) { return { shouldSkip: true, @@ -720,10 +724,7 @@ export async function runHeartbeatOnce(opts: { ? resolveHeartbeatReasoningPayloads(replyResult).filter((payload) => payload !== replyPayload) : []; - if ( - !replyPayload || - (!replyPayload.text && !replyPayload.mediaUrl && !replyPayload.mediaUrls?.length) - ) { + if (!replyPayload || !hasOutboundReplyContent(replyPayload)) { await restoreHeartbeatUpdatedAt({ storePath, sessionKey, @@ -780,8 +781,7 @@ export async function runHeartbeatOnce(opts: { return { status: "ran", durationMs: Date.now() - startedAt }; } - const mediaUrls = - replyPayload.mediaUrls ?? (replyPayload.mediaUrl ? [replyPayload.mediaUrl] : []); + const mediaUrls = resolveSendableOutboundReplyParts(replyPayload).mediaUrls; // Suppress duplicate heartbeats (same payload) within a short window. // This prevents "nagging" when nothing changed but the model repeats the same items. diff --git a/src/infra/outbound/deliver.ts b/src/infra/outbound/deliver.ts index b8bbc115988..84e1808e4f0 100644 --- a/src/infra/outbound/deliver.ts +++ b/src/infra/outbound/deliver.ts @@ -23,11 +23,11 @@ import { toPluginMessageContext, toPluginMessageSentEvent, } from "../../hooks/message-hook-mappers.js"; -import { hasReplyChannelData, hasReplyContent } from "../../interactive/payload.js"; +import { hasReplyPayloadContent } from "../../interactive/payload.js"; import { createSubsystemLogger } from "../../logging/subsystem.js"; import { getAgentScopedMediaLocalRoots } from "../../media/local-roots.js"; import { - resolveOutboundMediaUrls, + resolveSendableOutboundReplyParts, sendMediaWithLeadingCaption, } from "../../plugin-sdk/reply-payload.js"; import { getGlobalHookRunner } from "../../plugins/hook-runner-global.js"; @@ -284,17 +284,8 @@ type MessageSentEvent = { function normalizeEmptyPayloadForDelivery(payload: ReplyPayload): ReplyPayload | null { const text = typeof payload.text === "string" ? payload.text : ""; - const hasChannelData = hasReplyChannelData(payload.channelData); if (!text.trim()) { - if ( - !hasReplyContent({ - text, - mediaUrl: payload.mediaUrl, - mediaUrls: payload.mediaUrls, - interactive: payload.interactive, - hasChannelData, - }) - ) { + if (!hasReplyPayloadContent({ ...payload, text })) { return null; } if (text) { @@ -340,9 +331,10 @@ function normalizePayloadsForChannelDelivery( } function buildPayloadSummary(payload: ReplyPayload): NormalizedOutboundPayload { + const parts = resolveSendableOutboundReplyParts(payload); return { - text: payload.text ?? "", - mediaUrls: resolveOutboundMediaUrls(payload), + text: parts.text, + mediaUrls: parts.mediaUrls, interactive: payload.interactive, channelData: payload.channelData, }; @@ -669,10 +661,10 @@ async function deliverOutboundPayloadsCore( }; if ( handler.sendPayload && - (effectivePayload.channelData || - hasReplyContent({ - interactive: effectivePayload.interactive, - })) + hasReplyPayloadContent({ + interactive: effectivePayload.interactive, + channelData: effectivePayload.channelData, + }) ) { const delivery = await handler.sendPayload(effectivePayload, sendOverrides); results.push(delivery); diff --git a/src/infra/outbound/message-action-runner.ts b/src/infra/outbound/message-action-runner.ts index 1777fbb32e3..635c9df1005 100644 --- a/src/infra/outbound/message-action-runner.ts +++ b/src/infra/outbound/message-action-runner.ts @@ -14,7 +14,7 @@ import type { ChannelThreadingToolContext, } from "../../channels/plugins/types.js"; import type { OpenClawConfig } from "../../config/config.js"; -import { hasInteractiveReplyBlocks, hasReplyContent } from "../../interactive/payload.js"; +import { hasInteractiveReplyBlocks, hasReplyPayloadContent } from "../../interactive/payload.js"; import { getAgentScopedMediaLocalRoots } from "../../media/local-roots.js"; import { hasPollCreationParams, resolveTelegramPollVisibility } from "../../poll-params.js"; import { resolvePollMaxSelections } from "../../polls.js"; @@ -484,13 +484,17 @@ async function handleSendAction(ctx: ResolvedActionContext): Promise payload.text) .filter(Boolean) .join("\n"); - const mirrorMediaUrls = normalizedPayloads.flatMap((payload) => - resolveOutboundMediaUrls(payload), + const mirrorMediaUrls = normalizedPayloads.flatMap( + (payload) => resolveSendableOutboundReplyParts(payload).mediaUrls, ); const primaryMediaUrl = mirrorMediaUrls[0] ?? params.mediaUrl ?? null; diff --git a/src/infra/outbound/payloads.ts b/src/infra/outbound/payloads.ts index fa9790888a4..2d90bb85a09 100644 --- a/src/infra/outbound/payloads.ts +++ b/src/infra/outbound/payloads.ts @@ -8,10 +8,10 @@ import type { ReplyPayload } from "../../auto-reply/types.js"; import { hasInteractiveReplyBlocks, hasReplyChannelData, - hasReplyContent, + hasReplyPayloadContent, type InteractiveReply, } from "../../interactive/payload.js"; -import { resolveOutboundMediaUrls } from "../../plugin-sdk/reply-payload.js"; +import { resolveSendableOutboundReplyParts } from "../../plugin-sdk/reply-payload.js"; export type NormalizedOutboundPayload = { text: string; @@ -97,25 +97,20 @@ export function normalizeOutboundPayloads( ): NormalizedOutboundPayload[] { const normalizedPayloads: NormalizedOutboundPayload[] = []; for (const payload of normalizeReplyPayloadsForDelivery(payloads)) { - const mediaUrls = resolveOutboundMediaUrls(payload); + const parts = resolveSendableOutboundReplyParts(payload); const interactive = payload.interactive; const channelData = payload.channelData; const hasChannelData = hasReplyChannelData(channelData); const hasInteractive = hasInteractiveReplyBlocks(interactive); - const text = payload.text ?? ""; + const text = parts.text; if ( - !hasReplyContent({ - text, - mediaUrls, - interactive, - hasChannelData, - }) + !hasReplyPayloadContent({ ...payload, text, mediaUrls: parts.mediaUrls }, { hasChannelData }) ) { continue; } normalizedPayloads.push({ text, - mediaUrls, + mediaUrls: parts.mediaUrls, ...(hasInteractive ? { interactive } : {}), ...(hasChannelData ? { channelData } : {}), }); @@ -128,11 +123,11 @@ export function normalizeOutboundPayloadsForJson( ): OutboundPayloadJson[] { const normalized: OutboundPayloadJson[] = []; for (const payload of normalizeReplyPayloadsForDelivery(payloads)) { - const mediaUrls = resolveOutboundMediaUrls(payload); + const parts = resolveSendableOutboundReplyParts(payload); normalized.push({ - text: payload.text ?? "", + text: parts.text, mediaUrl: payload.mediaUrl ?? null, - mediaUrls: mediaUrls.length ? mediaUrls : undefined, + mediaUrls: parts.mediaUrls.length ? parts.mediaUrls : undefined, interactive: payload.interactive, channelData: payload.channelData, }); diff --git a/src/interactive/payload.test.ts b/src/interactive/payload.test.ts index 3000716cd2e..12c071d5652 100644 --- a/src/interactive/payload.test.ts +++ b/src/interactive/payload.test.ts @@ -2,6 +2,7 @@ import { describe, expect, it } from "vitest"; import { hasReplyChannelData, hasReplyContent, + hasReplyPayloadContent, normalizeInteractiveReply, resolveInteractiveTextFallback, } from "./payload.js"; @@ -44,6 +45,41 @@ describe("hasReplyContent", () => { }); }); +describe("hasReplyPayloadContent", () => { + it("trims text and falls back to channel data by default", () => { + expect( + hasReplyPayloadContent({ + text: " ", + channelData: { slack: { blocks: [] } }, + }), + ).toBe(true); + }); + + it("accepts explicit channel-data overrides and extra content", () => { + expect( + hasReplyPayloadContent( + { + text: " ", + channelData: {}, + }, + { + hasChannelData: true, + }, + ), + ).toBe(true); + expect( + hasReplyPayloadContent( + { + text: " ", + }, + { + extraContent: true, + }, + ), + ).toBe(true); + }); +}); + describe("interactive payload helpers", () => { it("normalizes interactive replies and resolves text fallbacks", () => { const interactive = normalizeInteractiveReply({ diff --git a/src/interactive/payload.ts b/src/interactive/payload.ts index 5ccd55d0eff..8ab80131a8e 100644 --- a/src/interactive/payload.ts +++ b/src/interactive/payload.ts @@ -160,6 +160,30 @@ export function hasReplyContent(params: { ); } +export function hasReplyPayloadContent( + payload: { + text?: string | null; + mediaUrl?: string | null; + mediaUrls?: ReadonlyArray; + interactive?: unknown; + channelData?: unknown; + }, + options?: { + trimText?: boolean; + hasChannelData?: boolean; + extraContent?: boolean; + }, +): boolean { + return hasReplyContent({ + text: options?.trimText ? payload.text?.trim() : payload.text, + mediaUrl: payload.mediaUrl, + mediaUrls: payload.mediaUrls, + interactive: payload.interactive, + hasChannelData: options?.hasChannelData ?? hasReplyChannelData(payload.channelData), + extraContent: options?.extraContent, + }); +} + export function resolveInteractiveTextFallback(params: { text?: string; interactive?: InteractiveReply; diff --git a/src/line/auto-reply-delivery.ts b/src/line/auto-reply-delivery.ts index aea6210dda4..91b2633f47c 100644 --- a/src/line/auto-reply-delivery.ts +++ b/src/line/auto-reply-delivery.ts @@ -1,6 +1,6 @@ import type { messagingApi } from "@line/bot-sdk"; import type { ReplyPayload } from "../auto-reply/types.js"; -import { resolveOutboundMediaUrls } from "../plugin-sdk/reply-payload.js"; +import { resolveSendableOutboundReplyParts } from "../plugin-sdk/reply-payload.js"; import type { FlexContainer } from "./flex-templates.js"; import type { ProcessedLineMessage } from "./markdown-to-line.js"; import type { SendLineReplyChunksParams } from "./reply-chunks.js"; @@ -124,7 +124,7 @@ export async function deliverLineAutoReply(params: { const chunks = processed.text ? deps.chunkMarkdownText(processed.text, textLimit) : []; - const mediaUrls = resolveOutboundMediaUrls(payload); + const mediaUrls = resolveSendableOutboundReplyParts(payload).mediaUrls; const mediaMessages = mediaUrls .map((url) => url?.trim()) .filter((url): url is string => Boolean(url)) diff --git a/src/plugin-sdk/msteams.ts b/src/plugin-sdk/msteams.ts index 02650a4a009..51f8ef257b2 100644 --- a/src/plugin-sdk/msteams.ts +++ b/src/plugin-sdk/msteams.ts @@ -46,7 +46,7 @@ export { splitSetupEntries, } from "../channels/plugins/setup-wizard-helpers.js"; export { PAIRING_APPROVED_MESSAGE } from "../channels/plugins/pairing-message.js"; -export { resolveOutboundMediaUrls } from "./reply-payload.js"; +export { resolveOutboundMediaUrls, resolveSendableOutboundReplyParts } from "./reply-payload.js"; export type { BaseProbeResult, ChannelDirectoryEntry, diff --git a/src/plugin-sdk/reply-payload.test.ts b/src/plugin-sdk/reply-payload.test.ts index 171b17f0e7e..ce393a9ecd3 100644 --- a/src/plugin-sdk/reply-payload.test.ts +++ b/src/plugin-sdk/reply-payload.test.ts @@ -1,9 +1,14 @@ import { describe, expect, it, vi } from "vitest"; import { + countOutboundMedia, deliverFormattedTextWithAttachments, deliverTextOrMediaReply, + hasOutboundMedia, + hasOutboundReplyContent, + hasOutboundText, isNumericTargetId, resolveOutboundMediaUrls, + resolveSendableOutboundReplyParts, resolveTextChunksWithFallback, sendMediaWithLeadingCaption, sendPayloadWithChunkedTextAndMedia, @@ -84,6 +89,102 @@ describe("resolveOutboundMediaUrls", () => { }); }); +describe("countOutboundMedia", () => { + it("counts normalized media entries", () => { + expect( + countOutboundMedia({ + mediaUrls: ["https://example.com/a.png", "https://example.com/b.png"], + }), + ).toBe(2); + }); + + it("counts legacy single-media payloads", () => { + expect( + countOutboundMedia({ + mediaUrl: "https://example.com/legacy.png", + }), + ).toBe(1); + }); +}); + +describe("hasOutboundMedia", () => { + it("reports whether normalized payloads include media", () => { + expect(hasOutboundMedia({ mediaUrls: ["https://example.com/a.png"] })).toBe(true); + expect(hasOutboundMedia({ mediaUrl: "https://example.com/legacy.png" })).toBe(true); + expect(hasOutboundMedia({})).toBe(false); + }); +}); + +describe("hasOutboundText", () => { + it("checks raw text presence by default", () => { + expect(hasOutboundText({ text: "hello" })).toBe(true); + expect(hasOutboundText({ text: " " })).toBe(true); + expect(hasOutboundText({})).toBe(false); + }); + + it("can trim whitespace-only text", () => { + expect(hasOutboundText({ text: " " }, { trim: true })).toBe(false); + expect(hasOutboundText({ text: " hi " }, { trim: true })).toBe(true); + }); +}); + +describe("hasOutboundReplyContent", () => { + it("detects text or media content", () => { + expect(hasOutboundReplyContent({ text: "hello" })).toBe(true); + expect(hasOutboundReplyContent({ mediaUrl: "https://example.com/a.png" })).toBe(true); + expect(hasOutboundReplyContent({})).toBe(false); + }); + + it("can ignore whitespace-only text unless media exists", () => { + expect(hasOutboundReplyContent({ text: " " }, { trimText: true })).toBe(false); + expect( + hasOutboundReplyContent( + { text: " ", mediaUrls: ["https://example.com/a.png"] }, + { trimText: true }, + ), + ).toBe(true); + }); +}); + +describe("resolveSendableOutboundReplyParts", () => { + it("normalizes missing text and trims media urls", () => { + expect( + resolveSendableOutboundReplyParts({ + mediaUrls: [" https://example.com/a.png ", " "], + }), + ).toEqual({ + text: "", + trimmedText: "", + mediaUrls: ["https://example.com/a.png"], + mediaCount: 1, + hasText: false, + hasMedia: true, + hasContent: true, + }); + }); + + it("accepts transformed text overrides", () => { + expect( + resolveSendableOutboundReplyParts( + { + text: "ignored", + }, + { + text: " hello ", + }, + ), + ).toEqual({ + text: " hello ", + trimmedText: "hello", + mediaUrls: [], + mediaCount: 0, + hasText: true, + hasMedia: false, + hasContent: true, + }); + }); +}); + describe("resolveTextChunksWithFallback", () => { it("returns existing chunks unchanged", () => { expect(resolveTextChunksWithFallback("hello", ["a", "b"])).toEqual(["a", "b"]); @@ -161,6 +262,26 @@ describe("deliverTextOrMediaReply", () => { expect(sendText).not.toHaveBeenCalled(); expect(sendMedia).not.toHaveBeenCalled(); }); + + it("ignores blank media urls before sending", async () => { + const sendMedia = vi.fn(async () => undefined); + const sendText = vi.fn(async () => undefined); + + await expect( + deliverTextOrMediaReply({ + payload: { text: "hello", mediaUrls: [" ", " https://a "] }, + text: "hello", + sendText, + sendMedia, + }), + ).resolves.toBe("media"); + + expect(sendMedia).toHaveBeenCalledTimes(1); + expect(sendMedia).toHaveBeenCalledWith({ + mediaUrl: "https://a", + caption: "hello", + }); + }); }); describe("sendMediaWithLeadingCaption", () => { diff --git a/src/plugin-sdk/reply-payload.ts b/src/plugin-sdk/reply-payload.ts index 3bee0c9e81b..52cc878c83d 100644 --- a/src/plugin-sdk/reply-payload.ts +++ b/src/plugin-sdk/reply-payload.ts @@ -5,6 +5,16 @@ export type OutboundReplyPayload = { replyToId?: string; }; +export type SendableOutboundReplyParts = { + text: string; + trimmedText: string; + mediaUrls: string[]; + mediaCount: number; + hasText: boolean; + hasMedia: boolean; + hasContent: boolean; +}; + /** Extract the supported outbound reply fields from loose tool or agent payload objects. */ export function normalizeOutboundReplyPayload( payload: Record, @@ -52,6 +62,54 @@ export function resolveOutboundMediaUrls(payload: { return []; } +/** Count outbound media items after legacy single-media fallback normalization. */ +export function countOutboundMedia(payload: { mediaUrls?: string[]; mediaUrl?: string }): number { + return resolveOutboundMediaUrls(payload).length; +} + +/** Check whether an outbound payload includes any media after normalization. */ +export function hasOutboundMedia(payload: { mediaUrls?: string[]; mediaUrl?: string }): boolean { + return countOutboundMedia(payload) > 0; +} + +/** Check whether an outbound payload includes text, optionally trimming whitespace first. */ +export function hasOutboundText(payload: { text?: string }, options?: { trim?: boolean }): boolean { + const text = options?.trim ? payload.text?.trim() : payload.text; + return Boolean(text); +} + +/** Check whether an outbound payload includes any sendable text or media. */ +export function hasOutboundReplyContent( + payload: { text?: string; mediaUrls?: string[]; mediaUrl?: string }, + options?: { trimText?: boolean }, +): boolean { + return hasOutboundText(payload, { trim: options?.trimText }) || hasOutboundMedia(payload); +} + +/** Normalize reply payload text/media into a trimmed, sendable shape for delivery paths. */ +export function resolveSendableOutboundReplyParts( + payload: { text?: string; mediaUrls?: string[]; mediaUrl?: string }, + options?: { text?: string }, +): SendableOutboundReplyParts { + const text = options?.text ?? payload.text ?? ""; + const trimmedText = text.trim(); + const mediaUrls = resolveOutboundMediaUrls(payload) + .map((entry) => entry.trim()) + .filter(Boolean); + const mediaCount = mediaUrls.length; + const hasText = Boolean(trimmedText); + const hasMedia = mediaCount > 0; + return { + text, + trimmedText, + mediaUrls, + mediaCount, + hasText, + hasMedia, + hasContent: hasText || hasMedia, + }; +} + /** Preserve caller-provided chunking, but fall back to the full text when chunkers return nothing. */ export function resolveTextChunksWithFallback(text: string, chunks: readonly string[]): string[] { if (chunks.length > 0) { @@ -188,7 +246,9 @@ export async function deliverTextOrMediaReply(params: { isFirst: boolean; }) => Promise | void; }): Promise<"empty" | "text" | "media"> { - const mediaUrls = resolveOutboundMediaUrls(params.payload); + const { mediaUrls } = resolveSendableOutboundReplyParts(params.payload, { + text: params.text, + }); const sentMedia = await sendMediaWithLeadingCaption({ mediaUrls, caption: params.text, diff --git a/src/plugin-sdk/subpaths.test.ts b/src/plugin-sdk/subpaths.test.ts index 2f4a30ae5ce..6a63b0f57ba 100644 --- a/src/plugin-sdk/subpaths.test.ts +++ b/src/plugin-sdk/subpaths.test.ts @@ -98,9 +98,13 @@ describe("plugin-sdk subpath exports", () => { }); it("exports reply payload helpers from the dedicated subpath", () => { + expect(typeof replyPayloadSdk.countOutboundMedia).toBe("function"); expect(typeof replyPayloadSdk.deliverFormattedTextWithAttachments).toBe("function"); expect(typeof replyPayloadSdk.deliverTextOrMediaReply).toBe("function"); expect(typeof replyPayloadSdk.formatTextWithAttachmentLinks).toBe("function"); + expect(typeof replyPayloadSdk.hasOutboundMedia).toBe("function"); + expect(typeof replyPayloadSdk.hasOutboundReplyContent).toBe("function"); + expect(typeof replyPayloadSdk.hasOutboundText).toBe("function"); expect(typeof replyPayloadSdk.resolveOutboundMediaUrls).toBe("function"); expect(typeof replyPayloadSdk.resolveTextChunksWithFallback).toBe("function"); expect(typeof replyPayloadSdk.sendMediaWithLeadingCaption).toBe("function"); diff --git a/src/plugin-sdk/zalouser.ts b/src/plugin-sdk/zalouser.ts index b02800880ec..e7fb506f227 100644 --- a/src/plugin-sdk/zalouser.ts +++ b/src/plugin-sdk/zalouser.ts @@ -71,6 +71,7 @@ export { deliverTextOrMediaReply, isNumericTargetId, resolveOutboundMediaUrls, + resolveSendableOutboundReplyParts, sendMediaWithLeadingCaption, sendPayloadWithChunkedTextAndMedia, } from "./reply-payload.js"; diff --git a/src/tts/tts.ts b/src/tts/tts.ts index 7d48dfb8e07..019cffdb2e4 100644 --- a/src/tts/tts.ts +++ b/src/tts/tts.ts @@ -24,6 +24,7 @@ import type { import { logVerbose } from "../globals.js"; import { resolvePreferredOpenClawTmpDir } from "../infra/tmp-openclaw-dir.js"; import { stripMarkdown } from "../line/markdown-to-line.js"; +import { resolveSendableOutboundReplyParts } from "../plugin-sdk/reply-payload.js"; import { CONFIG_DIR, resolveUserPath } from "../utils.js"; import { getSpeechProvider, @@ -793,7 +794,8 @@ export async function maybeApplyTtsToPayload(params: { return params.payload; } - const text = params.payload.text ?? ""; + const reply = resolveSendableOutboundReplyParts(params.payload); + const text = reply.text; const directives = parseTtsDirectives(text, config.modelOverrides, config.openai.baseUrl); if (directives.warnings.length > 0) { logVerbose(`TTS: ignored directive overrides (${directives.warnings.join("; ")})`); @@ -827,7 +829,7 @@ export async function maybeApplyTtsToPayload(params: { if (!ttsText.trim()) { return nextPayload; } - if (params.payload.mediaUrl || (params.payload.mediaUrls?.length ?? 0) > 0) { + if (reply.hasMedia) { return nextPayload; } if (text.includes("MEDIA:")) {