fix(bluebubbles): UTI-aware audio attachment detection (#75488)

Co-authored-by: Omar Shahine <10343873+omarshahine@users.noreply.github.com>
This commit is contained in:
Omar Shahine
2026-05-01 10:40:08 -07:00
committed by GitHub
parent fd4bee9c05
commit 68c010906a
3 changed files with 93 additions and 2 deletions

View File

@@ -82,6 +82,7 @@ Docs: https://docs.openclaw.ai
- Gateway/sessions: yield during bulk transcript title/preview hydration and copy compaction checkpoints asynchronously, keeping the Gateway event loop responsive for large session stores and large transcripts. Refs #75330 and #75414. Thanks @amknight.
- Gateway/sessions: stream bounded transcript reads for session detail, history, artifacts, compaction, and send/subscribe sequence paths so small Gateway requests no longer materialize large transcripts or OOM on oversized session logs. Thanks @vincentkoc.
- Gateway/chat: bound chat-history transcript reads to the requested display window so large session logs no longer OOM the Gateway when clients ask for a small history page. Thanks @vincentkoc.
- BlueBubbles: detect audio attachments by Apple UTIs (`public.audio`, `public.mpeg-4-audio`, `com.apple.m4a-audio`, `com.apple.coreaudio-format`) in addition to `audio/*` MIME, so iMessage voice notes whose webhook payload only carries the UTI are now classified as audio in the inbound `<media:audio>` placeholder instead of falling through to the generic `<media:attachment>` tag. Thanks @omarshahine.
- Voice Call/Twilio: honor stored pre-connect TwiML before realtime webhook shortcuts and reject DTMF sequences outside conversation mode, so Meet PIN entry cannot be skipped or silently dropped. Thanks @donkeykong91 and @PfanP.
- Docs/sandboxing: clarify that sandbox setup scripts (`sandbox-setup.sh`, `sandbox-common-setup.sh`, `sandbox-browser-setup.sh`) are only available from a source checkout, and add inline `docker build` commands for npm-installed users so sandbox image setup works without cloning the repo. Fixes #75485. Thanks @amknight.
- Google Meet/Voice Call: play Twilio Meet DTMF before opening the realtime media stream and carry the intro as the initial Voice Call message, so the greeting is generated after Meet admits the phone participant instead of racing a live-call TwiML update. Thanks @donkeykong91 and @PfanP.

View File

@@ -1,5 +1,10 @@
import { describe, expect, it } from "vitest";
import { normalizeWebhookMessage, normalizeWebhookReaction } from "./monitor-normalize.js";
import {
buildMessagePlaceholder,
isBlueBubblesAudioAttachment,
normalizeWebhookMessage,
normalizeWebhookReaction,
} from "./monitor-normalize.js";
function createFallbackDmPayload(overrides: Record<string, unknown> = {}) {
return {
@@ -140,3 +145,62 @@ describe("normalizeWebhookReaction", () => {
expect(result?.action).toBe("added");
});
});
describe("isBlueBubblesAudioAttachment", () => {
it("detects audio by `audio/*` MIME type", () => {
expect(isBlueBubblesAudioAttachment({ mimeType: "audio/x-m4a" })).toBe(true);
expect(isBlueBubblesAudioAttachment({ mimeType: "audio/mp4" })).toBe(true);
});
it("detects audio by Apple UTI even when MIME is missing", () => {
expect(isBlueBubblesAudioAttachment({ uti: "public.audio" })).toBe(true);
expect(isBlueBubblesAudioAttachment({ uti: "public.mpeg-4-audio" })).toBe(true);
expect(isBlueBubblesAudioAttachment({ uti: "com.apple.m4a-audio" })).toBe(true);
expect(isBlueBubblesAudioAttachment({ uti: "com.apple.coreaudio-format" })).toBe(true);
});
it("treats UTI matching as case-insensitive", () => {
expect(isBlueBubblesAudioAttachment({ uti: "Public.Audio" })).toBe(true);
});
it("returns false for image / video / unknown attachments", () => {
expect(isBlueBubblesAudioAttachment({ mimeType: "image/jpeg" })).toBe(false);
expect(isBlueBubblesAudioAttachment({ mimeType: "video/quicktime" })).toBe(false);
expect(isBlueBubblesAudioAttachment({ uti: "public.jpeg" })).toBe(false);
expect(isBlueBubblesAudioAttachment({})).toBe(false);
});
});
describe("buildMessagePlaceholder audio detection", () => {
function makeMsg(attachments: Array<{ mimeType?: string; uti?: string }>) {
return {
text: "",
senderId: "+15551234567",
senderIdExplicit: false,
isGroup: false,
attachments,
} as Parameters<typeof buildMessagePlaceholder>[0];
}
it("emits <media:audio> for `audio/*` MIME (existing behavior)", () => {
expect(buildMessagePlaceholder(makeMsg([{ mimeType: "audio/x-m4a" }]))).toContain(
"<media:audio>",
);
});
it("emits <media:audio> for Apple `public.audio` UTI when MIME is missing", () => {
expect(buildMessagePlaceholder(makeMsg([{ uti: "public.audio" }]))).toContain("<media:audio>");
});
it("emits <media:audio> for Apple `com.apple.m4a-audio` UTI", () => {
expect(buildMessagePlaceholder(makeMsg([{ uti: "com.apple.m4a-audio" }]))).toContain(
"<media:audio>",
);
});
it("falls back to <media:attachment> for non-audio mixes", () => {
expect(
buildMessagePlaceholder(makeMsg([{ uti: "public.audio" }, { mimeType: "image/jpeg" }])),
).toContain("<media:attachment>");
});
});

View File

@@ -59,6 +59,32 @@ export function extractAttachments(message: Record<string, unknown>): BlueBubble
return out;
}
// Apple UTIs used by BlueBubbles for voice notes / audio attachments. Webhook
// payloads sometimes carry only a UTI without a normalized `audio/*` MIME
// (notably iMessage voice notes recorded on macOS 26 Tahoe), so audio
// detection must consult both. Intentionally narrow: covers what BB emits for
// iMessage voice notes today (m4a/MPEG-4 audio). Broader UTIs like
// `public.aiff-audio`, `public.wav`, `public.mp3` are not iMessage voice-note
// formats and pull in `audio/*` MIME paths anyway.
const APPLE_AUDIO_UTIS = new Set<string>([
"public.audio",
"public.mpeg-4-audio",
"com.apple.m4a-audio",
"com.apple.coreaudio-format",
]);
export function isBlueBubblesAudioAttachment(attachment: BlueBubblesAttachment): boolean {
const mime = attachment.mimeType?.trim().toLowerCase();
if (mime && mime.startsWith("audio/")) {
return true;
}
const uti = attachment.uti?.trim().toLowerCase();
if (uti && APPLE_AUDIO_UTIS.has(uti)) {
return true;
}
return false;
}
function buildAttachmentPlaceholder(attachments: BlueBubblesAttachment[]): string {
if (attachments.length === 0) {
return "";
@@ -66,7 +92,7 @@ function buildAttachmentPlaceholder(attachments: BlueBubblesAttachment[]): strin
const mimeTypes = attachments.map((entry) => entry.mimeType ?? "");
const allImages = mimeTypes.every((entry) => entry.startsWith("image/"));
const allVideos = mimeTypes.every((entry) => entry.startsWith("video/"));
const allAudio = mimeTypes.every((entry) => entry.startsWith("audio/"));
const allAudio = attachments.every(isBlueBubblesAudioAttachment);
const tag = allImages
? "<media:image>"
: allVideos