From 8dd6a2d323e1fb0f1134899f761a5049c18bf68a Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sun, 3 May 2026 12:17:01 +0100 Subject: [PATCH] fix(channels): preserve degraded voice text and mention boundaries --- CHANGELOG.md | 2 ++ extensions/feishu/src/media.test.ts | 3 +- extensions/feishu/src/media.ts | 21 +++++++++++-- extensions/feishu/src/outbound.test.ts | 29 +++++++++++++++++ extensions/feishu/src/outbound.ts | 12 ++++++- .../feishu/src/reply-dispatcher.test.ts | 31 +++++++++++++++++++ extensions/feishu/src/reply-dispatcher.ts | 21 ++++++++++++- .../src/inbound/outbound-mentions.test.ts | 13 ++++++++ .../whatsapp/src/inbound/outbound-mentions.ts | 8 +++-- 9 files changed, 132 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8c904fa5902..99f2eb42d3b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -43,12 +43,14 @@ Docs: https://docs.openclaw.ai - CLI/sessions: keep intentional empty agent replies silent after tool-delivered channel output, instead of surfacing a misleading "No reply from agent." fallback. Thanks @vincentkoc. - Config/doctor: cap `.clobbered.*` forensic snapshots per config path and serialize snapshot writes so repeated `doctor --fix` recovery loops cannot flood the config directory. Fixes #76454; carries forward #65649. Thanks @JUSTICEESSIELP, @rsnow, and @vincentkoc. - Feishu: suppress duplicate text when replies send native voice media while preserving captions for ordinary audio files and falling back to text plus attachment links when voice uploads fail. +- Feishu: send the skipped reply text when `audioAsVoice` falls back to a generic file attachment after transcode failure, so voice-intent replies do not lose their caption. - Feishu: keep packaged Feishu startup from bundling the Lark SDK's ESM `__dirname` path by loading the SDK as a plugin-local runtime dependency. Fixes #76291 and #76494. (#76392) Thanks @zqchris. - Plugins/npm: build package-local runtime dist files for publishable plugins and stop listing root-package-excluded plugin sidecars in the core package metadata, so npm plugin installs such as `@openclaw/diffs` and `@openclaw/discord` no longer publish source-only runtime payloads. Fixes #76426. Thanks @PrinceOfEgypt. - Channels/secrets: resolve SecretRef-backed channel credentials through external plugin secret contracts after the plugin split, covering runtime startup, target discovery, webhook auth, disabled-account enumeration, and late-bound web_search config. Fixes #76371. (#76449) Thanks @joshavant and @neeravmakwana. - Docker/Gateway: pass Docker setup `.env` values into gateway and CLI containers and preserve exec SecretRef `passEnv` keys in managed service plans, so 1Password Connect-backed Discord tokens keep resolving after doctor or plugin repair. Thanks @vincentkoc. - Control UI/WebChat: explain compaction boundaries in chat history and link directly to session checkpoint controls so pre-compaction turns no longer look silently lost after refresh. Fixes #76415. Thanks @BunsDev. - Channels/WhatsApp: attach native outbound mention metadata for group text and media captions by resolving `@+` and `@` tokens against WhatsApp participant data, including LID groups. Fixes #39879; carries forward #56863. Thanks @kengi1437, @joe2643, and @fridayck. +- Channels/WhatsApp: require outbound mention tokens to end at a word boundary so phone-number prefixes inside longer strings no longer trigger hidden native mentions. - Plugins/uninstall: remove empty managed git install parent directories after deleting cloned plugin repos and cover npm/git uninstall residue in Docker plugin lifecycle tests. Thanks @vincentkoc. - Plugins/install: resolve bare official external plugin IDs such as `brave` through the official catalog when no bundled source is available, so packaged installs fetch the intended scoped npm package instead of an unrelated unscoped package. Fixes #76373. Thanks @bek91 and @vincentkoc. - Plugins/install: require OpenClaw-owned install provenance before granting official npm plugin scanner trust, so direct npm package names no longer bypass launch-code scanning while catalog, onboarding, and doctor installs stay trusted. Thanks @fede-kamel and @vincentkoc. diff --git a/extensions/feishu/src/media.test.ts b/extensions/feishu/src/media.test.ts index a4958759bd9..5a3b3bd9994 100644 --- a/extensions/feishu/src/media.test.ts +++ b/extensions/feishu/src/media.test.ts @@ -361,7 +361,7 @@ describe("sendMediaFeishu msg_type routing", () => { contentType: "audio/mpeg", }); - await sendMediaFeishu({ + const result = await sendMediaFeishu({ cfg: emptyConfig, to: "user:ou_target", mediaUrl: "https://example.com/reply.mp3", @@ -382,6 +382,7 @@ describe("sendMediaFeishu msg_type routing", () => { data: expect.objectContaining({ msg_type: "file" }), }), ); + expect(result).toEqual(expect.objectContaining({ voiceIntentDegradedToFile: true })); expect(warnSpy).toHaveBeenCalledWith( expect.stringContaining("audioAsVoice transcode failed"), expect.any(Error), diff --git a/extensions/feishu/src/media.ts b/extensions/feishu/src/media.ts index 5b328735613..a7b44220f51 100644 --- a/extensions/feishu/src/media.ts +++ b/extensions/feishu/src/media.ts @@ -399,6 +399,7 @@ export type UploadFileResult = { export type SendMediaResult = { messageId: string; chatId: string; + voiceIntentDegradedToFile?: boolean; }; /** @@ -872,10 +873,22 @@ export async function sendMediaFeishu(params: { contentType = prepared.contentType; const routing = resolveFeishuOutboundMediaKind({ fileName: name, contentType }); + const voiceIntentDegradedToFile = audioAsVoice === true && routing.msgType !== "audio"; if (routing.msgType === "image") { const { imageKey } = await uploadImageFeishu({ cfg, image: buffer, accountId }); - return sendImageFeishu({ cfg, to, imageKey, replyToMessageId, replyInThread, accountId }); + const result = await sendImageFeishu({ + cfg, + to, + imageKey, + replyToMessageId, + replyInThread, + accountId, + }); + return { + ...result, + ...(voiceIntentDegradedToFile ? { voiceIntentDegradedToFile: true } : {}), + }; } const { fileKey } = await uploadFileFeishu({ cfg, @@ -884,7 +897,7 @@ export async function sendMediaFeishu(params: { fileType: routing.fileType ?? "stream", accountId, }); - return sendFileFeishu({ + const result = await sendFileFeishu({ cfg, to, fileKey, @@ -893,4 +906,8 @@ export async function sendMediaFeishu(params: { replyInThread, accountId, }); + return { + ...result, + ...(voiceIntentDegradedToFile ? { voiceIntentDegradedToFile: true } : {}), + }; } diff --git a/extensions/feishu/src/outbound.test.ts b/extensions/feishu/src/outbound.test.ts index 313e11828c8..0999acf6cb1 100644 --- a/extensions/feishu/src/outbound.test.ts +++ b/extensions/feishu/src/outbound.test.ts @@ -880,6 +880,35 @@ describe("feishuOutbound.sendMedia replyToId forwarding", () => { ); }); + it("sends skipped voice text when voice media degrades to a file attachment", async () => { + sendMediaFeishuMock.mockResolvedValueOnce({ + messageId: "file_msg", + voiceIntentDegradedToFile: true, + }); + + await feishuOutbound.sendMedia?.({ + cfg: emptyConfig, + to: "chat_1", + text: "spoken reply", + mediaUrl: "https://example.com/reply.mp3", + audioAsVoice: true, + accountId: "main", + }); + + expect(sendMediaFeishuMock).toHaveBeenCalledWith( + expect.objectContaining({ + mediaUrl: "https://example.com/reply.mp3", + audioAsVoice: true, + }), + ); + expect(sendMessageFeishuMock).toHaveBeenCalledTimes(1); + expect(sendMessageFeishuMock).toHaveBeenCalledWith( + expect.objectContaining({ + text: "spoken reply", + }), + ); + }); + it("suppresses duplicate text for native voice media without audioAsVoice", async () => { await feishuOutbound.sendMedia?.({ cfg: emptyConfig, diff --git a/extensions/feishu/src/outbound.ts b/extensions/feishu/src/outbound.ts index 947b5797836..3624a49ff1e 100644 --- a/extensions/feishu/src/outbound.ts +++ b/extensions/feishu/src/outbound.ts @@ -693,7 +693,7 @@ export const feishuOutbound: ChannelOutboundAdapter = { // Upload and send media if URL or local path provided if (mediaUrl) { try { - return await sendMediaFeishu({ + const result = await sendMediaFeishu({ cfg, to, mediaUrl, @@ -702,6 +702,16 @@ export const feishuOutbound: ChannelOutboundAdapter = { replyToMessageId, ...(audioAsVoice === true ? { audioAsVoice: true } : {}), }); + if (result.voiceIntentDegradedToFile && text?.trim()) { + await sendOutboundText({ + cfg, + to, + text, + accountId: accountId ?? undefined, + replyToMessageId, + }); + } + return result; } catch (err) { // Log the error for debugging console.error(`[feishu] sendMediaFeishu failed:`, err); diff --git a/extensions/feishu/src/reply-dispatcher.test.ts b/extensions/feishu/src/reply-dispatcher.test.ts index ecb7d5bca49..40b8330f8c5 100644 --- a/extensions/feishu/src/reply-dispatcher.test.ts +++ b/extensions/feishu/src/reply-dispatcher.test.ts @@ -648,6 +648,37 @@ describe("createFeishuReplyDispatcher streaming behavior", () => { ); }); + it("sends skipped voice text when final voice media degrades to a file attachment", async () => { + sendMediaFeishuMock.mockResolvedValueOnce({ + messageId: "file_msg", + voiceIntentDegradedToFile: true, + }); + + const { options } = createDispatcherHarness(); + await options.deliver( + { + text: "spoken reply", + mediaUrl: "https://example.com/reply.mp3", + audioAsVoice: true, + }, + { kind: "final" }, + ); + + expect(sendMediaFeishuMock).toHaveBeenCalledTimes(1); + expect(sendMediaFeishuMock).toHaveBeenCalledWith( + expect.objectContaining({ + mediaUrl: "https://example.com/reply.mp3", + audioAsVoice: true, + }), + ); + expect(sendMessageFeishuMock).toHaveBeenCalledTimes(1); + expect(sendMessageFeishuMock).toHaveBeenCalledWith( + expect.objectContaining({ + text: "spoken reply", + }), + ); + }); + it("suppresses duplicate text for native voice media without audioAsVoice", async () => { const { options } = createDispatcherHarness(); await options.deliver( diff --git a/extensions/feishu/src/reply-dispatcher.ts b/extensions/feishu/src/reply-dispatcher.ts index c02ad204a03..3b3e47247b2 100644 --- a/extensions/feishu/src/reply-dispatcher.ts +++ b/extensions/feishu/src/reply-dispatcher.ts @@ -444,7 +444,7 @@ export function createFeishuReplyDispatcher(params: CreateFeishuReplyDispatcherP mediaUrls, caption: "", send: async ({ mediaUrl }) => { - await sendMediaFeishu({ + const result = await sendMediaFeishu({ cfg, to: chatId, mediaUrl, @@ -453,6 +453,25 @@ export function createFeishuReplyDispatcher(params: CreateFeishuReplyDispatcherP accountId, ...(payload.audioAsVoice === true ? { audioAsVoice: true } : {}), }); + if (result?.voiceIntentDegradedToFile && options?.fallbackText && !sentFallbackText) { + sentFallbackText = true; + await sendChunkedTextReply({ + text: options.fallbackText, + useCard: false, + infoKind: "final", + sendChunk: async ({ chunk, isFirst }) => { + await sendMessageFeishu({ + cfg, + to: chatId, + text: chunk, + replyToMessageId: sendReplyToMessageId, + replyInThread: effectiveReplyInThread, + mentions: isFirst ? mentionTargets : undefined, + accountId, + }); + }, + }); + } }, onError: options?.fallbackText === undefined diff --git a/extensions/whatsapp/src/inbound/outbound-mentions.test.ts b/extensions/whatsapp/src/inbound/outbound-mentions.test.ts index 4457e0e9450..8f605210519 100644 --- a/extensions/whatsapp/src/inbound/outbound-mentions.test.ts +++ b/extensions/whatsapp/src/inbound/outbound-mentions.test.ts @@ -114,6 +114,19 @@ describe("resolveWhatsAppOutboundMentions", () => { }); }); + it("does not mention numeric prefixes inside longer tokens", () => { + expect( + resolveWhatsAppOutboundMentions({ + chatJid: "120363000000000000@g.us", + text: "literal @15551234567abc and x@15551234567", + participants: [{ id: "15551234567@s.whatsapp.net" }], + }), + ).toEqual({ + text: "literal @15551234567abc and x@15551234567", + mentionedJids: [], + }); + }); + it("does not add mention metadata for direct chats or unmatched group participants", () => { expect( resolveWhatsAppOutboundMentions({ diff --git a/extensions/whatsapp/src/inbound/outbound-mentions.ts b/extensions/whatsapp/src/inbound/outbound-mentions.ts index ed165edbd48..1996e5d06ca 100644 --- a/extensions/whatsapp/src/inbound/outbound-mentions.ts +++ b/extensions/whatsapp/src/inbound/outbound-mentions.ts @@ -175,13 +175,15 @@ function buildMentionTargetMaps(participants: readonly WhatsAppOutboundMentionPa function shouldSkipMentionAt( text: string, index: number, + end: number, codeRanges: readonly TextRange[], ): boolean { if (isInRange(index, codeRanges)) { return true; } const previous = index > 0 ? text[index - 1] : ""; - return Boolean(previous && /[\w@]/.test(previous)); + const next = text[end] ?? ""; + return Boolean((previous && /[\w@]/.test(previous)) || (next && /[\w@]/.test(next))); } export function resolveWhatsAppOutboundMentions(params: { @@ -209,10 +211,10 @@ export function resolveWhatsAppOutboundMentions(params: { for (const match of params.text.matchAll(OUTBOUND_MENTION_RE)) { const start = match.index; - if (shouldSkipMentionAt(params.text, start, codeRanges)) { + const token = match[0]; + if (shouldSkipMentionAt(params.text, start, start + token.length, codeRanges)) { continue; } - const token = match[0]; const digits = match[1].replace(/\D/g, ""); const target = token.startsWith("@+") ? (byPhone.get(digits) ?? byLid.get(digits))