fix(imessage): send TTS audio as voice messages

This commit is contained in:
Omar Shahine
2026-06-06 04:36:31 +00:00
parent 8e9c377971
commit 46f1d2e340
7 changed files with 68 additions and 4 deletions

View File

@@ -17,6 +17,7 @@ export async function sendIMessageOutbound(params: {
text: string;
mediaUrl?: string;
mediaLocalRoots?: readonly string[];
audioAsVoice?: boolean;
accountId?: string;
deps?: { [channelId: string]: unknown };
replyToId?: string;
@@ -36,6 +37,7 @@ export async function sendIMessageOutbound(params: {
config: params.cfg,
...(params.mediaUrl ? { mediaUrl: params.mediaUrl } : {}),
...(params.mediaLocalRoots?.length ? { mediaLocalRoots: params.mediaLocalRoots } : {}),
...(params.audioAsVoice ? { audioAsVoice: true } : {}),
maxBytes,
accountId: params.accountId ?? undefined,
replyToId: params.replyToId ?? undefined,

View File

@@ -113,11 +113,16 @@ const imessageMessageAdapter = defineChannelMessageAdapter({
text: ctx.text,
mediaUrl: ctx.mediaUrl,
mediaLocalRoots: ctx.mediaLocalRoots,
audioAsVoice: ctx.audioAsVoice,
accountId: ctx.accountId ?? undefined,
deps: (ctx as typeof ctx & IMessageMessageContextExtras).deps,
replyToId: ctx.replyToId ?? undefined,
});
return toIMessageMessageSendResult(result, "media", ctx.replyToId);
return toIMessageMessageSendResult(
result,
ctx.audioAsVoice ? "voice" : "media",
ctx.replyToId,
);
},
},
});
@@ -362,6 +367,7 @@ export const imessagePlugin: ChannelPlugin<ResolvedIMessageAccount, IMessageProb
text,
mediaUrl,
mediaLocalRoots,
audioAsVoice,
accountId,
deps,
replyToId,
@@ -374,6 +380,7 @@ export const imessagePlugin: ChannelPlugin<ResolvedIMessageAccount, IMessageProb
text,
mediaUrl,
mediaLocalRoots,
audioAsVoice,
accountId: accountId ?? undefined,
deps,
replyToId: replyToId ?? undefined,

View File

@@ -168,6 +168,38 @@ describe("sendMessageIMessage receipts", () => {
expect(result.receipt.sentAt).toBeGreaterThan(0);
});
it("sends audioAsVoice media through send-attachment audio transport", async () => {
const client = createClient({ message_id: 12345 });
const runCliJson = vi.fn().mockResolvedValueOnce({ messageId: "p:0/voice-guid" });
const result = await sendMessageIMessage("chat_guid:chat-1", "", {
config: IMESSAGE_TEST_CFG,
client,
mediaUrl: "/tmp/voice.caf",
audioAsVoice: true,
resolveAttachmentImpl: async () => ({ path: "/tmp/voice.caf", contentType: "audio/x-caf" }),
runCliJson,
});
expect(result.messageId).toBe("p:0/voice-guid");
expect(runCliJson.mock.calls).toEqual([
[
[
"send-attachment",
"--chat",
"chat-1",
"--file",
"/tmp/voice.caf",
"--audio",
"--transport",
"auto",
],
],
]);
expect(result.receipt.parts.map((part) => part.kind)).toEqual(["voice"]);
expect(client["request"]).not.toHaveBeenCalled();
});
it("resolves chat_id media-only payloads before using send-attachment", async () => {
const client = createClient({ message_id: 12345 });
const runCliJson = vi

View File

@@ -48,6 +48,7 @@ type IMessageSendOpts = {
mediaUrl?: string;
mediaLocalRoots?: readonly string[];
mediaReadFile?: (filePath: string) => Promise<Buffer>;
audioAsVoice?: boolean;
maxBytes?: number;
timeoutMs?: number;
chatId?: number;
@@ -729,6 +730,7 @@ async function trySendAttachmentForTarget(params: {
target: ReturnType<typeof parseIMessageTarget>;
service?: IMessageService;
filePath: string;
audioAsVoice?: boolean;
echoText?: string;
runCliJson: (args: readonly string[]) => Promise<Record<string, unknown>>;
resolveMessageGuidImpl?: IMessageSendOpts["resolveMessageGuidImpl"];
@@ -758,6 +760,7 @@ async function trySendAttachmentForTarget(params: {
attachmentChatTarget,
"--file",
params.filePath,
...(params.audioAsVoice ? ["--audio"] : []),
"--transport",
"auto",
]);
@@ -822,7 +825,7 @@ async function trySendAttachmentForTarget(params: {
receipt: createIMessageSendReceipt({
messageId,
target: params.target,
kind: "media",
kind: params.audioAsVoice ? "voice" : "media",
}),
};
}
@@ -915,6 +918,7 @@ export async function sendMessageIMessage(
target,
service,
filePath,
audioAsVoice: opts.audioAsVoice,
echoText: attachmentEchoText,
runCliJson,
resolveMessageGuidImpl: opts.resolveMessageGuidImpl,

View File

@@ -83,6 +83,13 @@ export function createIMessagePluginBase(params: {
capabilities: {
chatTypes: ["direct", "group"],
media: true,
tts: {
voice: {
synthesisTarget: "audio-file",
audioFileFormats: ["mp3", "caf", "audio/mpeg", "audio/x-caf"],
preferAudioFileFormat: "caf",
},
},
reactions: true,
edit: true,
unsend: true,

View File

@@ -112,6 +112,14 @@ describe("createIMessageTestPlugin", () => {
});
});
it("declares native iMessage voice memo TTS delivery", () => {
expect(imessagePlugin.capabilities.tts?.voice).toStrictEqual({
synthesisTarget: "audio-file",
audioFileFormats: ["mp3", "caf", "audio/mpeg", "audio/x-caf"],
preferAudioFileFormat: "caf",
});
});
it("preserves the local approval prompt suppressor through attached-result composition", () => {
const suppressor = imessagePlugin.outbound?.shouldSuppressLocalPayloadPrompt;
if (!suppressor) {
@@ -208,7 +216,7 @@ describe("createIMessageTestPlugin", () => {
const sendIMessage = async (
_to: string,
_text: string,
opts?: { mediaUrl?: string; replyToId?: string },
opts?: { mediaUrl?: string; replyToId?: string; audioAsVoice?: boolean },
) => {
const messageId = opts?.mediaUrl ? "imsg-media-1" : "imsg-text-1";
return {
@@ -216,7 +224,7 @@ describe("createIMessageTestPlugin", () => {
sentText: opts?.mediaUrl ? "<media:image>" : "hello",
receipt: createMessageReceiptFromOutboundResults({
results: [{ channel: "imessage", messageId }],
kind: opts?.mediaUrl ? "media" : "text",
kind: opts?.audioAsVoice ? "voice" : opts?.mediaUrl ? "media" : "text",
...(opts?.replyToId ? { replyToId: opts.replyToId } : {}),
}),
};
@@ -247,11 +255,13 @@ describe("createIMessageTestPlugin", () => {
text: "caption",
mediaUrl: "/tmp/image.png",
mediaLocalRoots: ["/tmp"],
audioAsVoice: true,
deps: { imessage: sendIMessage },
} as Parameters<typeof sendMedia>[0] & {
deps: { imessage: typeof sendIMessage };
});
expect(result.receipt.platformMessageIds).toEqual(["imsg-media-1"]);
expect(result.receipt.parts.map((part) => part.kind)).toEqual(["voice"]);
},
replyTo: async () => {
const result = await sendText({

View File

@@ -39,6 +39,7 @@ describe("resolveChannelTtsVoiceDelivery", () => {
voice: {
synthesisTarget: "audio-file",
audioFileFormats: ["mp3", "caf", "audio/mpeg", "audio/x-caf"],
preferAudioFileFormat: "caf",
},
},
}),
@@ -89,6 +90,7 @@ describe("resolveChannelTtsVoiceDelivery", () => {
expect(resolveChannelTtsVoiceDelivery("imessage")).toEqual({
synthesisTarget: "audio-file",
audioFileFormats: ["mp3", "caf", "audio/mpeg", "audio/x-caf"],
preferAudioFileFormat: "caf",
});
expect(resolveChannelTtsVoiceDelivery("discord")).toEqual({
synthesisTarget: "voice-note",