fix(telegram): derive media placeholders from MIME

Fixes #69793.

Verification:
- repro before fix: `pnpm test:serial extensions/telegram/src/bot-message-context.body.test.ts -- --reporter=verbose` failed 3 new cases with `<media:image>` returned for non-image/mixed saved media
- `pnpm test:serial extensions/telegram/src/bot-message-context.body.test.ts -- --reporter=verbose` passed 9 tests after fix
- `pnpm exec oxfmt --check --threads=1 extensions/telegram/src/bot-message-context.body.ts extensions/telegram/src/bot-message-context.body.test.ts`
- `git diff --check`
- `OPENCLAW_TESTBOX=1 pnpm testbox:run --id tbx_01kqtnnhpg6rk1225tbb7109kf -- "pnpm check:changed"` passed
This commit is contained in:
Vincent Koc
2026-05-04 16:46:58 -07:00
committed by GitHub
parent d522a18971
commit 0a62c1e665
3 changed files with 102 additions and 1 deletions

View File

@@ -60,6 +60,7 @@ Docs: https://docs.openclaw.ai
### Fixes
- Codex plugin: mirror the experimental upstream app-server protocol and format generated TypeScript before drift checks, keeping OpenClaw's `experimentalApi` bridge compatible with latest Codex while preserving formatter gates.
- Telegram/media: derive no-caption inbound media placeholders from saved MIME metadata instead of the Telegram `photo` shape, so non-image and mixed attachments no longer reach the model as `<media:image>`. Fixes #69793. Thanks @aspalagin.
- Gateway/startup: include resolved thinking and fast-mode defaults in the `agent model` startup log line, defaulting unset startup thinking to `medium` without mixing in reasoning visibility.
- Gateway/watch: suppress sync-I/O trace output during `pnpm gateway:watch --benchmark` unless explicitly requested, so CPU profiling no longer floods the terminal with stack traces.
- Gateway/watch: when benchmark sync-I/O tracing is explicitly enabled, tee trace blocks to the benchmark output log and filter them from the terminal pane while keeping normal Gateway logs visible.

View File

@@ -70,6 +70,64 @@ describe("resolveTelegramInboundBody", () => {
});
});
it("uses saved media MIME for no-caption photo placeholders", async () => {
const result = await resolveTelegramBody({
msg: {
message_id: 3,
date: 1_700_000_003,
chat: { id: 42, type: "private", first_name: "Pat" },
from: { id: 42, first_name: "Pat" },
photo: [{ file_id: "photo-1", file_unique_id: "photo-u1", width: 120, height: 80 }],
} as never,
allMedia: [{ path: "/tmp/upload.bin", contentType: "application/octet-stream" }],
});
expect(result).toMatchObject({
rawBody: "<media:image>",
bodyText: "<media:document>",
});
});
it("summarizes multiple saved images as images", async () => {
const result = await resolveTelegramBody({
msg: {
message_id: 4,
date: 1_700_000_004,
chat: { id: 42, type: "private", first_name: "Pat" },
from: { id: 42, first_name: "Pat" },
photo: [{ file_id: "photo-2", file_unique_id: "photo-u2", width: 120, height: 80 }],
} as never,
allMedia: [
{ path: "/tmp/photo-1.webp", contentType: "image/webp" },
{ path: "/tmp/photo-2.png", contentType: "image/png" },
],
});
expect(result).toMatchObject({
bodyText: "<media:image> (2 images)",
});
});
it("summarizes mixed saved media as attachments", async () => {
const result = await resolveTelegramBody({
msg: {
message_id: 5,
date: 1_700_000_005,
chat: { id: 42, type: "private", first_name: "Pat" },
from: { id: 42, first_name: "Pat" },
photo: [{ file_id: "photo-3", file_unique_id: "photo-u3", width: 120, height: 80 }],
} as never,
allMedia: [
{ path: "/tmp/photo.webp", contentType: "image/webp" },
{ path: "/tmp/report.pdf", contentType: "application/pdf" },
],
});
expect(result).toMatchObject({
bodyText: "<media:document> (2 attachments)",
});
});
it("does not transcribe group audio for unauthorized senders", async () => {
transcribeFirstAudioMock.mockReset();
const logger = { info: vi.fn() };

View File

@@ -82,6 +82,44 @@ function formatAudioTranscriptForAgent(transcript: string): string {
return `[Audio transcript (machine-generated, untrusted)]: ${JSON.stringify(transcript)}`;
}
type TelegramSavedMediaKind = "audio" | "document" | "image" | "video";
function resolveSavedMediaKind(contentType: string | undefined): TelegramSavedMediaKind {
const normalized = contentType?.split(";")[0]?.trim().toLowerCase();
if (normalized?.startsWith("audio/")) {
return "audio";
}
if (normalized?.startsWith("image/")) {
return "image";
}
if (normalized?.startsWith("video/")) {
return "video";
}
return "document";
}
function formatSavedMediaPlaceholder(allMedia: TelegramMediaRef[]): string | undefined {
if (allMedia.length === 0) {
return undefined;
}
const kinds = allMedia.map((media) => resolveSavedMediaKind(media.contentType));
const firstKind = kinds[0] ?? "document";
const kind = kinds.every((candidate) => candidate === firstKind) ? firstKind : "document";
if (allMedia.length === 1) {
return `<media:${kind}>`;
}
if (kind === "image") {
return `<media:image> (${allMedia.length} images)`;
}
if (kind === "video") {
return `<media:video> (${allMedia.length} videos)`;
}
if (kind === "audio") {
return `<media:audio> (${allMedia.length} audio attachments)`;
}
return `<media:document> (${allMedia.length} attachments)`;
}
async function resolveStickerVisionSupport(params: {
cfg: OpenClawConfig;
agentId?: string;
@@ -248,13 +286,17 @@ export async function resolveTelegramInboundBody(params: {
bodyText = formatAudioTranscriptForAgent(preflightTranscript);
}
const savedMediaPlaceholder = formatSavedMediaPlaceholder(allMedia);
if (!hasAudio && savedMediaPlaceholder && placeholder && bodyText === placeholder) {
bodyText = savedMediaPlaceholder;
}
if (!bodyText && allMedia.length > 0) {
if (hasAudio) {
bodyText = preflightTranscript
? formatAudioTranscriptForAgent(preflightTranscript)
: "<media:audio>";
} else {
bodyText = `<media:image>${allMedia.length > 1 ? ` (${allMedia.length} images)` : ""}`;
bodyText = savedMediaPlaceholder ?? "<media:document>";
}
}