WhatsApp: gate DM audio preflight behind access control

This commit is contained in:
Roger Deng
2026-04-23 19:17:10 +08:00
committed by Marcus Castro
parent 3d839ac440
commit 176b70d57b
5 changed files with 56 additions and 2 deletions

View File

@@ -74,6 +74,7 @@ function makeAudioMsg(): WebInboundMsg {
id: "msg-1",
from: "+15550000002",
to: "+15550000001",
accessControlPassed: true,
body: "<media:audio>",
chatType: "direct",
mediaType: "audio/ogg; codecs=opus",
@@ -146,4 +147,45 @@ describe("createWebOnMessageHandler audio preflight", () => {
}),
);
});
it("skips early DM ack/preflight when access-control was not explicitly passed through", async () => {
const handler = createWebOnMessageHandler({
cfg: {
channels: {
whatsapp: {
ackReaction: { enabled: true },
},
},
} as never,
verbose: false,
connectionId: "conn-1",
maxMediaBytes: 1024 * 1024,
groupHistoryLimit: 20,
groupHistories: new Map(),
groupMemberNames: new Map(),
echoTracker: makeEchoTracker() as never,
backgroundTasks: new Set(),
replyResolver: vi.fn() as never,
replyLogger: {
info: () => {},
warn: () => {},
debug: () => {},
error: () => {},
} as never,
baseMentionConfig: {} as never,
account: { authDir: "/tmp/auth", accountId: "default" },
});
await handler({ ...makeAudioMsg(), accessControlPassed: undefined });
expect(events).toEqual([]);
expect(transcribeFirstAudioMock).not.toHaveBeenCalled();
expect(maybeSendAckReactionMock).not.toHaveBeenCalled();
expect(processMessageMock).toHaveBeenCalledWith(
expect.not.objectContaining({
preflightAudioTranscript: expect.anything(),
ackAlreadySent: true,
}),
);
});
});

View File

@@ -176,13 +176,17 @@ export function createWebOnMessageHandler(params: {
// Preflight audio transcription: run once here, before broadcast fan-out, so
// all agents share the same transcript instead of each making a separate STT call.
// For DMs, only do this on the real inbound path after access-control/pairing
// checks have already passed in inbound/monitor.ts. That keeps external STT and
// early ack feedback behind the same auth-first gate as the rest of DM handling.
// null = preflight was attempted but produced no transcript (failed / disabled / no audio);
// undefined = preflight was not attempted (non-audio message).
let preflightAudioTranscript: string | null | undefined;
const hasAudioBody =
msg.mediaType?.startsWith("audio/") === true && msg.body === "<media:audio>";
const canRunEarlyDmPreflight = msg.chatType === "group" || msg.accessControlPassed === true;
let ackAlreadySent = false;
if (hasAudioBody && msg.mediaPath) {
if (canRunEarlyDmPreflight && hasAudioBody && msg.mediaPath) {
await maybeSendAckReaction({
cfg: params.cfg,
msg,

View File

@@ -567,6 +567,7 @@ export async function attachWebInboxToSocket(
conversationId: inbound.from,
to: self.e164 ?? "me",
accountId: inbound.access.resolvedAccountId,
accessControlPassed: true,
body: enriched.body,
pushName: senderName,
timestamp,

View File

@@ -57,6 +57,8 @@ export type WebInboundMessage = {
conversationId: string; // alias for clarity (same as from)
to: string;
accountId: string;
/** Set by the real inbound monitor after access-control / pairing checks pass. */
accessControlPassed?: boolean;
body: string;
pushName?: string;
timestamp?: number;

View File

@@ -150,7 +150,12 @@ describe("web monitor inbox", () => {
expect(onMessage).toHaveBeenCalledTimes(1);
expect(onMessage).toHaveBeenCalledWith(
expect.objectContaining({ from: "+123", to: "+123", body: "self ping" }),
expect.objectContaining({
from: "+123",
to: "+123",
body: "self ping",
accessControlPassed: true,
}),
);
expect(sock.readMessages).not.toHaveBeenCalled();