From 0a62c1e665ad47d7bf2e13914d5b642a0575b4ac Mon Sep 17 00:00:00 2001 From: Vincent Koc Date: Mon, 4 May 2026 16:46:58 -0700 Subject: [PATCH] fix(telegram): derive media placeholders from MIME Fixes #69793. Verification: - repro before fix: `pnpm test:serial extensions/telegram/src/bot-message-context.body.test.ts -- --reporter=verbose` failed 3 new cases with `` returned for non-image/mixed saved media - `pnpm test:serial extensions/telegram/src/bot-message-context.body.test.ts -- --reporter=verbose` passed 9 tests after fix - `pnpm exec oxfmt --check --threads=1 extensions/telegram/src/bot-message-context.body.ts extensions/telegram/src/bot-message-context.body.test.ts` - `git diff --check` - `OPENCLAW_TESTBOX=1 pnpm testbox:run --id tbx_01kqtnnhpg6rk1225tbb7109kf -- "pnpm check:changed"` passed --- CHANGELOG.md | 1 + .../src/bot-message-context.body.test.ts | 58 +++++++++++++++++++ .../telegram/src/bot-message-context.body.ts | 44 +++++++++++++- 3 files changed, 102 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index eb50ae2da9e..71831a52b58 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -60,6 +60,7 @@ Docs: https://docs.openclaw.ai ### Fixes - Codex plugin: mirror the experimental upstream app-server protocol and format generated TypeScript before drift checks, keeping OpenClaw's `experimentalApi` bridge compatible with latest Codex while preserving formatter gates. +- Telegram/media: derive no-caption inbound media placeholders from saved MIME metadata instead of the Telegram `photo` shape, so non-image and mixed attachments no longer reach the model as ``. Fixes #69793. Thanks @aspalagin. - Gateway/startup: include resolved thinking and fast-mode defaults in the `agent model` startup log line, defaulting unset startup thinking to `medium` without mixing in reasoning visibility. - Gateway/watch: suppress sync-I/O trace output during `pnpm gateway:watch --benchmark` unless explicitly requested, so CPU profiling no longer floods the terminal with stack traces. - Gateway/watch: when benchmark sync-I/O tracing is explicitly enabled, tee trace blocks to the benchmark output log and filter them from the terminal pane while keeping normal Gateway logs visible. diff --git a/extensions/telegram/src/bot-message-context.body.test.ts b/extensions/telegram/src/bot-message-context.body.test.ts index 7f798c7df0a..682f1329c94 100644 --- a/extensions/telegram/src/bot-message-context.body.test.ts +++ b/extensions/telegram/src/bot-message-context.body.test.ts @@ -70,6 +70,64 @@ describe("resolveTelegramInboundBody", () => { }); }); + it("uses saved media MIME for no-caption photo placeholders", async () => { + const result = await resolveTelegramBody({ + msg: { + message_id: 3, + date: 1_700_000_003, + chat: { id: 42, type: "private", first_name: "Pat" }, + from: { id: 42, first_name: "Pat" }, + photo: [{ file_id: "photo-1", file_unique_id: "photo-u1", width: 120, height: 80 }], + } as never, + allMedia: [{ path: "/tmp/upload.bin", contentType: "application/octet-stream" }], + }); + + expect(result).toMatchObject({ + rawBody: "", + bodyText: "", + }); + }); + + it("summarizes multiple saved images as images", async () => { + const result = await resolveTelegramBody({ + msg: { + message_id: 4, + date: 1_700_000_004, + chat: { id: 42, type: "private", first_name: "Pat" }, + from: { id: 42, first_name: "Pat" }, + photo: [{ file_id: "photo-2", file_unique_id: "photo-u2", width: 120, height: 80 }], + } as never, + allMedia: [ + { path: "/tmp/photo-1.webp", contentType: "image/webp" }, + { path: "/tmp/photo-2.png", contentType: "image/png" }, + ], + }); + + expect(result).toMatchObject({ + bodyText: " (2 images)", + }); + }); + + it("summarizes mixed saved media as attachments", async () => { + const result = await resolveTelegramBody({ + msg: { + message_id: 5, + date: 1_700_000_005, + chat: { id: 42, type: "private", first_name: "Pat" }, + from: { id: 42, first_name: "Pat" }, + photo: [{ file_id: "photo-3", file_unique_id: "photo-u3", width: 120, height: 80 }], + } as never, + allMedia: [ + { path: "/tmp/photo.webp", contentType: "image/webp" }, + { path: "/tmp/report.pdf", contentType: "application/pdf" }, + ], + }); + + expect(result).toMatchObject({ + bodyText: " (2 attachments)", + }); + }); + it("does not transcribe group audio for unauthorized senders", async () => { transcribeFirstAudioMock.mockReset(); const logger = { info: vi.fn() }; diff --git a/extensions/telegram/src/bot-message-context.body.ts b/extensions/telegram/src/bot-message-context.body.ts index df7de19b9e7..a2e425ab740 100644 --- a/extensions/telegram/src/bot-message-context.body.ts +++ b/extensions/telegram/src/bot-message-context.body.ts @@ -82,6 +82,44 @@ function formatAudioTranscriptForAgent(transcript: string): string { return `[Audio transcript (machine-generated, untrusted)]: ${JSON.stringify(transcript)}`; } +type TelegramSavedMediaKind = "audio" | "document" | "image" | "video"; + +function resolveSavedMediaKind(contentType: string | undefined): TelegramSavedMediaKind { + const normalized = contentType?.split(";")[0]?.trim().toLowerCase(); + if (normalized?.startsWith("audio/")) { + return "audio"; + } + if (normalized?.startsWith("image/")) { + return "image"; + } + if (normalized?.startsWith("video/")) { + return "video"; + } + return "document"; +} + +function formatSavedMediaPlaceholder(allMedia: TelegramMediaRef[]): string | undefined { + if (allMedia.length === 0) { + return undefined; + } + const kinds = allMedia.map((media) => resolveSavedMediaKind(media.contentType)); + const firstKind = kinds[0] ?? "document"; + const kind = kinds.every((candidate) => candidate === firstKind) ? firstKind : "document"; + if (allMedia.length === 1) { + return ``; + } + if (kind === "image") { + return ` (${allMedia.length} images)`; + } + if (kind === "video") { + return ` (${allMedia.length} videos)`; + } + if (kind === "audio") { + return ` (${allMedia.length} audio attachments)`; + } + return ` (${allMedia.length} attachments)`; +} + async function resolveStickerVisionSupport(params: { cfg: OpenClawConfig; agentId?: string; @@ -248,13 +286,17 @@ export async function resolveTelegramInboundBody(params: { bodyText = formatAudioTranscriptForAgent(preflightTranscript); } + const savedMediaPlaceholder = formatSavedMediaPlaceholder(allMedia); + if (!hasAudio && savedMediaPlaceholder && placeholder && bodyText === placeholder) { + bodyText = savedMediaPlaceholder; + } if (!bodyText && allMedia.length > 0) { if (hasAudio) { bodyText = preflightTranscript ? formatAudioTranscriptForAgent(preflightTranscript) : ""; } else { - bodyText = `${allMedia.length > 1 ? ` (${allMedia.length} images)` : ""}`; + bodyText = savedMediaPlaceholder ?? ""; } }