diff --git a/CHANGELOG.md b/CHANGELOG.md index 655de6d7427..2dfa2981103 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -110,6 +110,7 @@ Docs: https://docs.openclaw.ai - Agents/exec: restore `host=node` routing for node-pinned and `host=auto` sessions, while still blocking sandboxed `auto` sessions from jumping to gateway. (#60788) Thanks @openperf. - Agents/compaction: keep assistant tool calls and displaced tool results in the same compaction chunk so strict summarization providers stop rejecting orphaned tool pairs. (#58849) Thanks @openperf. - Outbound/sanitizer: strip leaked ``, ``, and model special tokens from shared user-visible assistant text, including truncated tool-call streams, so internal scaffolding no longer bleeds into replies across surfaces. (#60619) Thanks @oliviareid-svg. +- Telegram: restore DM voice-note preflight transcription so direct-message audio stops arriving as raw `` placeholders. (#61008) Thanks @manueltarouca. - Control UI/avatar: honor `ui.assistant.avatar` when serving `/avatar/:agentId` so Appearance UI avatar paths stop falling back to initials placeholders. (#60778) Thanks @hannasdev. - Control UI/Overview: prevent gateway access token/password visibility toggle buttons from overlapping their inputs at narrow widths. (#56924) Thanks @bbddbb1. - Control UI/cron: highlight the Cron refresh button while refresh is in flight so the page's loading state stays visible even when prior data remains on screen. (#60394) Thanks @coder-zhuzm. diff --git a/extensions/telegram/src/bot-message-context.body.test.ts b/extensions/telegram/src/bot-message-context.body.test.ts index e8f8cd38350..c9a01b87d76 100644 --- a/extensions/telegram/src/bot-message-context.body.test.ts +++ b/extensions/telegram/src/bot-message-context.body.test.ts @@ -145,4 +145,48 @@ describe("resolveTelegramInboundBody", () => { effectiveWasMentioned: true, }); }); + + it("transcribes DM voice notes via preflight (not only groups)", async () => { + transcribeFirstAudioMock.mockReset(); + transcribeFirstAudioMock.mockResolvedValueOnce("hello from a voice note"); + + const result = await resolveTelegramInboundBody({ + cfg: { + channels: { telegram: {} }, + tools: { media: { audio: { enabled: true } } }, + } as never, + primaryCtx: { + me: { id: 7, username: "bot" }, + } as never, + msg: { + message_id: 10, + date: 1_700_000_010, + chat: { id: 42, type: "private", first_name: "Pat" }, + from: { id: 42, first_name: "Pat" }, + voice: { file_id: "voice-dm-1" }, + entities: [], + } as never, + allMedia: [{ path: "/tmp/voice-dm.ogg", contentType: "audio/ogg" }], + isGroup: false, + chatId: 42, + senderId: "42", + senderUsername: "", + routeAgentId: undefined, + effectiveGroupAllow: normalizeAllowFrom([]), + effectiveDmAllow: normalizeAllowFrom([]), + groupConfig: undefined, + topicConfig: undefined, + requireMention: false, + options: undefined, + groupHistories: new Map(), + historyLimit: 0, + logger: { info: vi.fn() }, + }); + + expect(transcribeFirstAudioMock).toHaveBeenCalledTimes(1); + expect(result).toMatchObject({ + bodyText: "hello from a voice note", + }); + expect(result?.bodyText).not.toContain(""); + }); }); diff --git a/extensions/telegram/src/bot-message-context.body.ts b/extensions/telegram/src/bot-message-context.body.ts index b24b00772a0..6fc3fa360b9 100644 --- a/extensions/telegram/src/bot-message-context.body.ts +++ b/extensions/telegram/src/bot-message-context.body.ts @@ -181,13 +181,13 @@ export async function resolveTelegramInboundBody(params: { let preflightTranscript: string | undefined; const needsPreflightTranscription = - isGroup && - requireMention && hasAudio && !hasUserText && - mentionRegexes.length > 0 && - !disableAudioPreflight && - senderAllowedForAudioPreflight; + (!isGroup || + (requireMention && + mentionRegexes.length > 0 && + !disableAudioPreflight && + senderAllowedForAudioPreflight)); if (needsPreflightTranscription) { try {