From 14312ff5704f4c2e71e7a404845edc1c96be9dd4 Mon Sep 17 00:00:00 2001 From: Vincent Koc Date: Sat, 2 May 2026 23:18:15 -0700 Subject: [PATCH] fix(feishu): avoid duplicate voice reply text --- CHANGELOG.md | 1 + extensions/feishu/src/media.test.ts | 21 ++++ extensions/feishu/src/media.ts | 35 +++++++ extensions/feishu/src/outbound.test.ts | 97 +++++++++++++++++-- extensions/feishu/src/outbound.ts | 34 ++++--- .../feishu/src/reply-dispatcher.test.ts | 95 +++++++++++++++++- extensions/feishu/src/reply-dispatcher.ts | 58 ++++++++++- 7 files changed, 317 insertions(+), 24 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 44d6db3c2bb..c139b430927 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ Docs: https://docs.openclaw.ai - Agents/sessions: keep delayed `sessions_send` A2A replies alive after soft wait-window timeouts, while preserving terminal run timeouts and avoiding stale target replies in requester sessions. Fixes #76443. Thanks @ryswork1993 and @vincentkoc. - Config/doctor: cap `.clobbered.*` forensic snapshots per config path and serialize snapshot writes so repeated `doctor --fix` recovery loops cannot flood the config directory. Fixes #76454; carries forward #65649. Thanks @JUSTICEESSIELP, @rsnow, and @vincentkoc. +- Feishu: suppress duplicate text when replies send native voice media while preserving captions for ordinary audio files and falling back to text plus attachment links when voice uploads fail. - Channels/secrets: resolve SecretRef-backed channel credentials through external plugin secret contracts after the plugin split, covering runtime startup, target discovery, webhook auth, disabled-account enumeration, and late-bound web_search config. Fixes #76371. (#76449) Thanks @joshavant and @neeravmakwana. - Docker/Gateway: pass Docker setup `.env` values into gateway and CLI containers and preserve exec SecretRef `passEnv` keys in managed service plans, so 1Password Connect-backed Discord tokens keep resolving after doctor or plugin repair. Thanks @vincentkoc. - Control UI/WebChat: explain compaction boundaries in chat history and link directly to session checkpoint controls so pre-compaction turns no longer look silently lost after refresh. Fixes #76415. Thanks @BunsDev. diff --git a/extensions/feishu/src/media.test.ts b/extensions/feishu/src/media.test.ts index efc6f67b991..a4958759bd9 100644 --- a/extensions/feishu/src/media.test.ts +++ b/extensions/feishu/src/media.test.ts @@ -55,6 +55,7 @@ let downloadImageFeishu: typeof import("./media.js").downloadImageFeishu; let downloadMessageResourceFeishu: typeof import("./media.js").downloadMessageResourceFeishu; let sanitizeFileNameForUpload: typeof import("./media.js").sanitizeFileNameForUpload; let sendMediaFeishu: typeof import("./media.js").sendMediaFeishu; +let shouldSuppressFeishuTextForVoiceMedia: typeof import("./media.js").shouldSuppressFeishuTextForVoiceMedia; function expectPathIsolatedToTmpRoot(pathValue: string, key: string): void { expect(pathValue).not.toContain(key); @@ -92,6 +93,7 @@ describe("sendMediaFeishu msg_type routing", () => { downloadMessageResourceFeishu, sanitizeFileNameForUpload, sendMediaFeishu, + shouldSuppressFeishuTextForVoiceMedia, } = await import("./media.js")); }); @@ -155,6 +157,25 @@ describe("sendMediaFeishu msg_type routing", () => { }); }); + it("suppresses reply text only for voice-intent or native voice media", () => { + expect( + shouldSuppressFeishuTextForVoiceMedia({ + mediaUrl: "https://example.com/reply.mp3", + audioAsVoice: true, + }), + ).toBe(true); + expect( + shouldSuppressFeishuTextForVoiceMedia({ + mediaUrl: "https://example.com/reply.ogg?download=1", + }), + ).toBe(true); + expect( + shouldSuppressFeishuTextForVoiceMedia({ + mediaUrl: "https://example.com/song.mp3", + }), + ).toBe(false); + }); + it("uses msg_type=media for mp4 video", async () => { await sendMediaFeishu({ cfg: emptyConfig, diff --git a/extensions/feishu/src/media.ts b/extensions/feishu/src/media.ts index b9241827eff..5b328735613 100644 --- a/extensions/feishu/src/media.ts +++ b/extensions/feishu/src/media.ts @@ -694,6 +694,41 @@ function isFeishuNativeVoiceAudio(params: { fileName: string; contentType?: stri ); } +function normalizeMediaNameForExtension(raw: string): string { + try { + return new URL(raw).pathname; + } catch { + return raw.split(/[?#]/, 1)[0] ?? raw; + } +} + +export function shouldSuppressFeishuTextForVoiceMedia(params: { + mediaUrl?: string; + fileName?: string; + contentType?: string; + audioAsVoice?: boolean; +}): boolean { + if (params.audioAsVoice === true) { + return true; + } + if ( + params.fileName && + isFeishuNativeVoiceAudio({ + fileName: params.fileName, + contentType: params.contentType, + }) + ) { + return true; + } + if (!params.mediaUrl) { + return false; + } + return isFeishuNativeVoiceAudio({ + fileName: normalizeMediaNameForExtension(params.mediaUrl), + contentType: params.contentType, + }); +} + function isLikelyTranscodableAudio(params: { fileName: string; contentType?: string }): boolean { const ext = normalizeLowercaseStringOrEmpty(path.extname(params.fileName)); const contentType = normalizeLowercaseStringOrEmpty(params.contentType); diff --git a/extensions/feishu/src/outbound.test.ts b/extensions/feishu/src/outbound.test.ts index 5ec2bb40ab6..313e11828c8 100644 --- a/extensions/feishu/src/outbound.test.ts +++ b/extensions/feishu/src/outbound.test.ts @@ -12,9 +12,14 @@ const sendMarkdownCardFeishuMock = vi.hoisted(() => vi.fn()); const sendStructuredCardFeishuMock = vi.hoisted(() => vi.fn()); const deliverCommentThreadTextMock = vi.hoisted(() => vi.fn()); const cleanupAmbientCommentTypingReactionMock = vi.hoisted(() => vi.fn(async () => false)); +const shouldSuppressFeishuTextForVoiceMediaMock = vi.hoisted( + () => (params: { mediaUrl?: string; audioAsVoice?: boolean }) => + params.audioAsVoice === true || /\.(?:ogg|opus)(?:[?#]|$)/i.test(params.mediaUrl ?? ""), +); vi.mock("./media.js", () => ({ sendMediaFeishu: sendMediaFeishuMock, + shouldSuppressFeishuTextForVoiceMedia: shouldSuppressFeishuTextForVoiceMediaMock, })); vi.mock("./send.js", () => ({ @@ -406,13 +411,13 @@ describe("feishuOutbound.sendPayload native cards", () => { await feishuOutbound.sendPayload?.({ cfg: emptyConfig, to: "chat_1", - text: "Choose ", + text: 'Choose ', accountId: "main", payload: { - text: "Choose ", + text: 'Choose ', presentation: { blocks: [ - { type: "context", text: "Injected" }, + { type: "context", text: 'Injected' }, { type: "buttons", buttons: [ @@ -428,10 +433,11 @@ describe("feishuOutbound.sendPayload native cards", () => { const card = sendCardFeishuMock.mock.calls[0][0].card; expect(card.body.elements).toEqual( expect.arrayContaining([ - { tag: "markdown", content: "Choose <at id=\"ou_1\">" }, + { tag: "markdown", content: 'Choose <at id="ou_1">' }, { tag: "markdown", - content: "</font><at id=\"ou_2\">Injected</at>", + content: + "</font><at id=\"ou_2\">Injected</at>", }, { tag: "action", @@ -466,7 +472,7 @@ describe("feishuOutbound.sendPayload native cards", () => { body: { elements: [ { tag: "img", img_key: "image-secret" }, - { tag: "markdown", content: "ping" }, + { tag: "markdown", content: 'ping' }, { tag: "action", actions: [ @@ -493,7 +499,7 @@ describe("feishuOutbound.sendPayload native cards", () => { const card = sendCardFeishuMock.mock.calls[0][0].card; expect(card.header.template).toBe("blue"); expect(card.body.elements).toEqual([ - { tag: "markdown", content: "<at id=\"ou_1\">ping</at>" }, + { tag: "markdown", content: '<at id="ou_1">ping</at>' }, { tag: "action", actions: [ @@ -855,6 +861,83 @@ describe("feishuOutbound.sendMedia replyToId forwarding", () => { ); }); + it("suppresses duplicate text when sending voice media", async () => { + await feishuOutbound.sendMedia?.({ + cfg: emptyConfig, + to: "chat_1", + text: "spoken reply", + mediaUrl: "https://example.com/reply.mp3", + audioAsVoice: true, + accountId: "main", + }); + + expect(sendMessageFeishuMock).not.toHaveBeenCalled(); + expect(sendMediaFeishuMock).toHaveBeenCalledWith( + expect.objectContaining({ + mediaUrl: "https://example.com/reply.mp3", + audioAsVoice: true, + }), + ); + }); + + it("suppresses duplicate text for native voice media without audioAsVoice", async () => { + await feishuOutbound.sendMedia?.({ + cfg: emptyConfig, + to: "chat_1", + text: "spoken reply", + mediaUrl: "https://example.com/reply.ogg?download=1", + accountId: "main", + }); + + expect(sendMessageFeishuMock).not.toHaveBeenCalled(); + expect(sendMediaFeishuMock).toHaveBeenCalledWith( + expect.objectContaining({ + mediaUrl: "https://example.com/reply.ogg?download=1", + }), + ); + }); + + it("keeps captions for regular audio file attachments", async () => { + await feishuOutbound.sendMedia?.({ + cfg: emptyConfig, + to: "chat_1", + text: "caption text", + mediaUrl: "https://example.com/song.mp3", + accountId: "main", + }); + + expect(sendMessageFeishuMock).toHaveBeenCalledWith( + expect.objectContaining({ + text: "caption text", + }), + ); + expect(sendMediaFeishuMock).toHaveBeenCalledWith( + expect.objectContaining({ + mediaUrl: "https://example.com/song.mp3", + }), + ); + }); + + it("keeps skipped voice text in the upload failure fallback", async () => { + sendMediaFeishuMock.mockRejectedValueOnce(new Error("upload failed")); + + await feishuOutbound.sendMedia?.({ + cfg: emptyConfig, + to: "chat_1", + text: "spoken reply", + mediaUrl: "https://example.com/reply.mp3", + audioAsVoice: true, + accountId: "main", + }); + + expect(sendMessageFeishuMock).toHaveBeenCalledTimes(1); + expect(sendMessageFeishuMock).toHaveBeenCalledWith( + expect.objectContaining({ + text: "spoken reply\n\nšŸ“Ž https://example.com/reply.mp3", + }), + ); + }); + it("forwards replyToId to text caption send", async () => { await feishuOutbound.sendMedia?.({ cfg: emptyConfig, diff --git a/extensions/feishu/src/outbound.ts b/extensions/feishu/src/outbound.ts index 7e6a9d0d47f..947b5797836 100644 --- a/extensions/feishu/src/outbound.ts +++ b/extensions/feishu/src/outbound.ts @@ -25,7 +25,7 @@ import { createFeishuClient } from "./client.js"; import { cleanupAmbientCommentTypingReaction } from "./comment-reaction.js"; import { parseFeishuCommentTarget } from "./comment-target.js"; import { deliverCommentThreadText } from "./drive.js"; -import { sendMediaFeishu } from "./media.js"; +import { sendMediaFeishu, shouldSuppressFeishuTextForVoiceMedia } from "./media.js"; import { chunkTextForOutbound, type ChannelOutboundAdapter } from "./outbound-runtime-api.js"; import { resolveFeishuCardTemplate, @@ -132,9 +132,10 @@ function sanitizeNativeFeishuCardButton(button: unknown): Record): Record | undefined { +function sanitizeNativeFeishuCard( + card: Record, +): Record | undefined { const body = isRecord(card.body) ? card.body : undefined; const rawElements = Array.isArray(body?.elements) ? body.elements : []; const elements = rawElements @@ -187,9 +190,10 @@ function sanitizeNativeFeishuCard(card: Record): Record vi.fn()); const addTypingIndicatorMock = vi.hoisted(() => vi.fn(async () => ({ messageId: "om_msg" }))); const removeTypingIndicatorMock = vi.hoisted(() => vi.fn(async () => {})); const streamingInstances = vi.hoisted((): StreamingSessionStub[] => []); +const shouldSuppressFeishuTextForVoiceMediaMock = vi.hoisted( + () => (params: { mediaUrl?: string; audioAsVoice?: boolean }) => + params.audioAsVoice === true || /\.(?:ogg|opus)(?:[?#]|$)/i.test(params.mediaUrl ?? ""), +); function mergeStreamingText( previousText: string | undefined, @@ -58,7 +62,10 @@ vi.mock("./send.js", () => ({ sendMarkdownCardFeishu: sendMarkdownCardFeishuMock, sendStructuredCardFeishu: sendStructuredCardFeishuMock, })); -vi.mock("./media.js", () => ({ sendMediaFeishu: sendMediaFeishuMock })); +vi.mock("./media.js", () => ({ + sendMediaFeishu: sendMediaFeishuMock, + shouldSuppressFeishuTextForVoiceMedia: shouldSuppressFeishuTextForVoiceMediaMock, +})); vi.mock("./client.js", () => ({ createFeishuClient: createFeishuClientMock })); vi.mock("./targets.js", () => ({ resolveReceiveIdType: resolveReceiveIdTypeMock })); vi.mock("./typing.js", () => ({ @@ -619,6 +626,92 @@ describe("createFeishuReplyDispatcher streaming behavior", () => { ); }); + it("suppresses duplicate text when final replies send voice media", async () => { + const { options } = createDispatcherHarness(); + await options.deliver( + { + text: "spoken reply", + mediaUrl: "https://example.com/reply.mp3", + audioAsVoice: true, + }, + { kind: "final" }, + ); + + expect(sendMessageFeishuMock).not.toHaveBeenCalled(); + expect(sendStructuredCardFeishuMock).not.toHaveBeenCalled(); + expect(sendMediaFeishuMock).toHaveBeenCalledTimes(1); + expect(sendMediaFeishuMock).toHaveBeenCalledWith( + expect.objectContaining({ + mediaUrl: "https://example.com/reply.mp3", + audioAsVoice: true, + }), + ); + }); + + it("suppresses duplicate text for native voice media without audioAsVoice", async () => { + const { options } = createDispatcherHarness(); + await options.deliver( + { + text: "spoken reply", + mediaUrl: "https://example.com/reply.opus?download=1", + }, + { kind: "final" }, + ); + + expect(sendMessageFeishuMock).not.toHaveBeenCalled(); + expect(sendMediaFeishuMock).toHaveBeenCalledTimes(1); + expect(sendMediaFeishuMock).toHaveBeenCalledWith( + expect.objectContaining({ + mediaUrl: "https://example.com/reply.opus?download=1", + }), + ); + }); + + it("preserves captions for regular audio attachments", async () => { + const { options } = createDispatcherHarness(); + await options.deliver( + { + text: "caption text", + mediaUrl: "https://example.com/song.mp3", + }, + { kind: "final" }, + ); + + expect(sendMessageFeishuMock).toHaveBeenCalledTimes(1); + expect(sendMessageFeishuMock).toHaveBeenCalledWith( + expect.objectContaining({ + text: "caption text", + }), + ); + expect(sendMediaFeishuMock).toHaveBeenCalledTimes(1); + expect(sendMediaFeishuMock).toHaveBeenCalledWith( + expect.objectContaining({ + mediaUrl: "https://example.com/song.mp3", + }), + ); + }); + + it("keeps skipped voice text in the upload failure fallback", async () => { + sendMediaFeishuMock.mockRejectedValueOnce(new Error("media failed")); + + const { options } = createDispatcherHarness(); + await options.deliver( + { + text: "spoken reply", + mediaUrl: "https://example.com/reply.mp3", + audioAsVoice: true, + }, + { kind: "final" }, + ); + + expect(sendMessageFeishuMock).toHaveBeenCalledTimes(1); + expect(sendMessageFeishuMock).toHaveBeenCalledWith( + expect.objectContaining({ + text: "spoken reply\n\nšŸ“Ž https://example.com/reply.mp3", + }), + ); + }); + it("falls back to legacy mediaUrl when mediaUrls is an empty array", async () => { const { options } = createDispatcherHarness(); await options.deliver( diff --git a/extensions/feishu/src/reply-dispatcher.ts b/extensions/feishu/src/reply-dispatcher.ts index 7fc57bb7061..c02ad204a03 100644 --- a/extensions/feishu/src/reply-dispatcher.ts +++ b/extensions/feishu/src/reply-dispatcher.ts @@ -8,7 +8,7 @@ import { import { stripReasoningTagsFromText } from "openclaw/plugin-sdk/text-runtime"; import { resolveFeishuRuntimeAccount } from "./accounts.js"; import { createFeishuClient } from "./client.js"; -import { sendMediaFeishu } from "./media.js"; +import { sendMediaFeishu, shouldSuppressFeishuTextForVoiceMedia } from "./media.js"; import type { MentionTarget } from "./mention-target.types.js"; import { buildMentionedCardContent } from "./mention.js"; import { @@ -54,6 +54,12 @@ function rememberStreamingStartFailure(accountId: string, now = Date.now()): num return backoffUntil; } +function formatMediaFallbackText(text: string | undefined, mediaUrl: string): string { + const trimmedText = text?.trim() ?? ""; + const attachmentText = `šŸ“Ž ${mediaUrl}`; + return trimmedText ? `${trimmedText}\n\n${attachmentText}` : attachmentText; +} + export function clearFeishuStreamingStartBackoffForTests() { streamingStartBackoffUntilByAccount.clear(); } @@ -431,9 +437,11 @@ export function createFeishuReplyDispatcher(params: CreateFeishuReplyDispatcherP } }; - const sendMediaReplies = async (payload: ReplyPayload) => { + const sendMediaReplies = async (payload: ReplyPayload, options?: { fallbackText?: string }) => { + const mediaUrls = resolveSendableOutboundReplyParts(payload).mediaUrls; + let sentFallbackText = false; await sendMediaWithLeadingCaption({ - mediaUrls: resolveSendableOutboundReplyParts(payload).mediaUrls, + mediaUrls, caption: "", send: async ({ mediaUrl }) => { await sendMediaFeishu({ @@ -446,6 +454,32 @@ export function createFeishuReplyDispatcher(params: CreateFeishuReplyDispatcherP ...(payload.audioAsVoice === true ? { audioAsVoice: true } : {}), }); }, + onError: + options?.fallbackText === undefined + ? undefined + : async ({ mediaUrl }) => { + const fallbackText = formatMediaFallbackText( + sentFallbackText ? undefined : options.fallbackText, + mediaUrl, + ); + sentFallbackText = true; + await sendChunkedTextReply({ + text: fallbackText, + useCard: false, + infoKind: "final", + sendChunk: async ({ chunk, isFirst }) => { + await sendMessageFeishu({ + cfg, + to: chatId, + text: chunk, + replyToMessageId: sendReplyToMessageId, + replyInThread: effectiveReplyInThread, + mentions: isFirst ? mentionTargets : undefined, + accountId, + }); + }, + }); + }, }); }; @@ -468,6 +502,14 @@ export function createFeishuReplyDispatcher(params: CreateFeishuReplyDispatcherP const text = reply.text; const hasText = reply.hasText; const hasMedia = reply.hasMedia; + const hasVoiceMedia = + hasMedia && + reply.mediaUrls.some((mediaUrl) => + shouldSuppressFeishuTextForVoiceMedia({ + mediaUrl, + ...(payload.audioAsVoice === true ? { audioAsVoice: true } : {}), + }), + ); const useCard = hasText && (renderMode === "card" || (renderMode === "auto" && shouldUseCard(text))); const skipTextForDuplicateFinal = @@ -480,7 +522,10 @@ export function createFeishuReplyDispatcher(params: CreateFeishuReplyDispatcherP streamingEnabled && useCard; const shouldDeliverText = - hasText && !skipTextForDuplicateFinal && !skipTextForClosedStreamingFinal; + hasText && + !hasVoiceMedia && + !skipTextForDuplicateFinal && + !skipTextForClosedStreamingFinal; if (!shouldDeliverText && !hasMedia) { return; @@ -567,7 +612,10 @@ export function createFeishuReplyDispatcher(params: CreateFeishuReplyDispatcherP } if (hasMedia) { - await sendMediaReplies(payload); + await sendMediaReplies( + payload, + hasVoiceMedia && hasText ? { fallbackText: text } : undefined, + ); } }, onError: async (error, info) => {