mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 17:50:45 +00:00
fix(telegram): derive media placeholders from MIME
Fixes #69793. Verification: - repro before fix: `pnpm test:serial extensions/telegram/src/bot-message-context.body.test.ts -- --reporter=verbose` failed 3 new cases with `<media:image>` returned for non-image/mixed saved media - `pnpm test:serial extensions/telegram/src/bot-message-context.body.test.ts -- --reporter=verbose` passed 9 tests after fix - `pnpm exec oxfmt --check --threads=1 extensions/telegram/src/bot-message-context.body.ts extensions/telegram/src/bot-message-context.body.test.ts` - `git diff --check` - `OPENCLAW_TESTBOX=1 pnpm testbox:run --id tbx_01kqtnnhpg6rk1225tbb7109kf -- "pnpm check:changed"` passed
This commit is contained in:
@@ -60,6 +60,7 @@ Docs: https://docs.openclaw.ai
|
||||
### Fixes
|
||||
|
||||
- Codex plugin: mirror the experimental upstream app-server protocol and format generated TypeScript before drift checks, keeping OpenClaw's `experimentalApi` bridge compatible with latest Codex while preserving formatter gates.
|
||||
- Telegram/media: derive no-caption inbound media placeholders from saved MIME metadata instead of the Telegram `photo` shape, so non-image and mixed attachments no longer reach the model as `<media:image>`. Fixes #69793. Thanks @aspalagin.
|
||||
- Gateway/startup: include resolved thinking and fast-mode defaults in the `agent model` startup log line, defaulting unset startup thinking to `medium` without mixing in reasoning visibility.
|
||||
- Gateway/watch: suppress sync-I/O trace output during `pnpm gateway:watch --benchmark` unless explicitly requested, so CPU profiling no longer floods the terminal with stack traces.
|
||||
- Gateway/watch: when benchmark sync-I/O tracing is explicitly enabled, tee trace blocks to the benchmark output log and filter them from the terminal pane while keeping normal Gateway logs visible.
|
||||
|
||||
@@ -70,6 +70,64 @@ describe("resolveTelegramInboundBody", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("uses saved media MIME for no-caption photo placeholders", async () => {
|
||||
const result = await resolveTelegramBody({
|
||||
msg: {
|
||||
message_id: 3,
|
||||
date: 1_700_000_003,
|
||||
chat: { id: 42, type: "private", first_name: "Pat" },
|
||||
from: { id: 42, first_name: "Pat" },
|
||||
photo: [{ file_id: "photo-1", file_unique_id: "photo-u1", width: 120, height: 80 }],
|
||||
} as never,
|
||||
allMedia: [{ path: "/tmp/upload.bin", contentType: "application/octet-stream" }],
|
||||
});
|
||||
|
||||
expect(result).toMatchObject({
|
||||
rawBody: "<media:image>",
|
||||
bodyText: "<media:document>",
|
||||
});
|
||||
});
|
||||
|
||||
it("summarizes multiple saved images as images", async () => {
|
||||
const result = await resolveTelegramBody({
|
||||
msg: {
|
||||
message_id: 4,
|
||||
date: 1_700_000_004,
|
||||
chat: { id: 42, type: "private", first_name: "Pat" },
|
||||
from: { id: 42, first_name: "Pat" },
|
||||
photo: [{ file_id: "photo-2", file_unique_id: "photo-u2", width: 120, height: 80 }],
|
||||
} as never,
|
||||
allMedia: [
|
||||
{ path: "/tmp/photo-1.webp", contentType: "image/webp" },
|
||||
{ path: "/tmp/photo-2.png", contentType: "image/png" },
|
||||
],
|
||||
});
|
||||
|
||||
expect(result).toMatchObject({
|
||||
bodyText: "<media:image> (2 images)",
|
||||
});
|
||||
});
|
||||
|
||||
it("summarizes mixed saved media as attachments", async () => {
|
||||
const result = await resolveTelegramBody({
|
||||
msg: {
|
||||
message_id: 5,
|
||||
date: 1_700_000_005,
|
||||
chat: { id: 42, type: "private", first_name: "Pat" },
|
||||
from: { id: 42, first_name: "Pat" },
|
||||
photo: [{ file_id: "photo-3", file_unique_id: "photo-u3", width: 120, height: 80 }],
|
||||
} as never,
|
||||
allMedia: [
|
||||
{ path: "/tmp/photo.webp", contentType: "image/webp" },
|
||||
{ path: "/tmp/report.pdf", contentType: "application/pdf" },
|
||||
],
|
||||
});
|
||||
|
||||
expect(result).toMatchObject({
|
||||
bodyText: "<media:document> (2 attachments)",
|
||||
});
|
||||
});
|
||||
|
||||
it("does not transcribe group audio for unauthorized senders", async () => {
|
||||
transcribeFirstAudioMock.mockReset();
|
||||
const logger = { info: vi.fn() };
|
||||
|
||||
@@ -82,6 +82,44 @@ function formatAudioTranscriptForAgent(transcript: string): string {
|
||||
return `[Audio transcript (machine-generated, untrusted)]: ${JSON.stringify(transcript)}`;
|
||||
}
|
||||
|
||||
type TelegramSavedMediaKind = "audio" | "document" | "image" | "video";
|
||||
|
||||
function resolveSavedMediaKind(contentType: string | undefined): TelegramSavedMediaKind {
|
||||
const normalized = contentType?.split(";")[0]?.trim().toLowerCase();
|
||||
if (normalized?.startsWith("audio/")) {
|
||||
return "audio";
|
||||
}
|
||||
if (normalized?.startsWith("image/")) {
|
||||
return "image";
|
||||
}
|
||||
if (normalized?.startsWith("video/")) {
|
||||
return "video";
|
||||
}
|
||||
return "document";
|
||||
}
|
||||
|
||||
function formatSavedMediaPlaceholder(allMedia: TelegramMediaRef[]): string | undefined {
|
||||
if (allMedia.length === 0) {
|
||||
return undefined;
|
||||
}
|
||||
const kinds = allMedia.map((media) => resolveSavedMediaKind(media.contentType));
|
||||
const firstKind = kinds[0] ?? "document";
|
||||
const kind = kinds.every((candidate) => candidate === firstKind) ? firstKind : "document";
|
||||
if (allMedia.length === 1) {
|
||||
return `<media:${kind}>`;
|
||||
}
|
||||
if (kind === "image") {
|
||||
return `<media:image> (${allMedia.length} images)`;
|
||||
}
|
||||
if (kind === "video") {
|
||||
return `<media:video> (${allMedia.length} videos)`;
|
||||
}
|
||||
if (kind === "audio") {
|
||||
return `<media:audio> (${allMedia.length} audio attachments)`;
|
||||
}
|
||||
return `<media:document> (${allMedia.length} attachments)`;
|
||||
}
|
||||
|
||||
async function resolveStickerVisionSupport(params: {
|
||||
cfg: OpenClawConfig;
|
||||
agentId?: string;
|
||||
@@ -248,13 +286,17 @@ export async function resolveTelegramInboundBody(params: {
|
||||
bodyText = formatAudioTranscriptForAgent(preflightTranscript);
|
||||
}
|
||||
|
||||
const savedMediaPlaceholder = formatSavedMediaPlaceholder(allMedia);
|
||||
if (!hasAudio && savedMediaPlaceholder && placeholder && bodyText === placeholder) {
|
||||
bodyText = savedMediaPlaceholder;
|
||||
}
|
||||
if (!bodyText && allMedia.length > 0) {
|
||||
if (hasAudio) {
|
||||
bodyText = preflightTranscript
|
||||
? formatAudioTranscriptForAgent(preflightTranscript)
|
||||
: "<media:audio>";
|
||||
} else {
|
||||
bodyText = `<media:image>${allMedia.length > 1 ? ` (${allMedia.length} images)` : ""}`;
|
||||
bodyText = savedMediaPlaceholder ?? "<media:document>";
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user