Outbound: preserve routed audioAsVoice delivery

This commit is contained in:
Gustavo Madeira Santana
2026-03-21 15:14:03 -07:00
parent 21544f9e53
commit 5b3fce4c85
10 changed files with 157 additions and 9 deletions

View File

@@ -77,6 +77,7 @@ describe("matrixOutbound cfg threading", () => {
mediaUrl: "file:///tmp/cat.png",
mediaLocalRoots: ["/tmp/openclaw"],
accountId: "default",
audioAsVoice: true,
});
expect(mocks.sendMessageMatrix).toHaveBeenCalledWith(
@@ -86,6 +87,7 @@ describe("matrixOutbound cfg threading", () => {
cfg,
mediaUrl: "file:///tmp/cat.png",
mediaLocalRoots: ["/tmp/openclaw"],
audioAsVoice: true,
}),
);
});

View File

@@ -7,7 +7,7 @@ export const matrixOutbound: ChannelOutboundAdapter = {
chunker: (text, limit) => getMatrixRuntime().channel.text.chunkMarkdownText(text, limit),
chunkerMode: "markdown",
textChunkLimit: 4000,
sendText: async ({ cfg, to, text, deps, replyToId, threadId, accountId }) => {
sendText: async ({ cfg, to, text, deps, replyToId, threadId, accountId, audioAsVoice }) => {
const send =
resolveOutboundSendDep<typeof sendMessageMatrix>(deps, "matrix") ?? sendMessageMatrix;
const resolvedThreadId =
@@ -17,6 +17,7 @@ export const matrixOutbound: ChannelOutboundAdapter = {
replyToId: replyToId ?? undefined,
threadId: resolvedThreadId,
accountId: accountId ?? undefined,
audioAsVoice,
});
return {
channel: "matrix",
@@ -34,6 +35,7 @@ export const matrixOutbound: ChannelOutboundAdapter = {
replyToId,
threadId,
accountId,
audioAsVoice,
}) => {
const send =
resolveOutboundSendDep<typeof sendMessageMatrix>(deps, "matrix") ?? sendMessageMatrix;
@@ -46,6 +48,7 @@ export const matrixOutbound: ChannelOutboundAdapter = {
replyToId: replyToId ?? undefined,
threadId: resolvedThreadId,
accountId: accountId ?? undefined,
audioAsVoice,
});
return {
channel: "matrix",

View File

@@ -414,6 +414,31 @@ describe("routeReply", () => {
);
});
it("preserves audioAsVoice on routed outbound payloads", async () => {
mocks.deliverOutboundPayloads.mockClear();
mocks.deliverOutboundPayloads.mockResolvedValue([]);
await routeReply({
payload: { text: "voice caption", mediaUrl: "file:///tmp/clip.mp3", audioAsVoice: true },
channel: "slack",
to: "channel:C123",
cfg: {} as never,
});
expect(mocks.deliverOutboundPayloads).toHaveBeenCalledTimes(1);
expect(mocks.deliverOutboundPayloads).toHaveBeenCalledWith(
expect.objectContaining({
channel: "slack",
to: "channel:C123",
payloads: [
expect.objectContaining({
text: "voice caption",
mediaUrl: "file:///tmp/clip.mp3",
audioAsVoice: true,
}),
],
}),
);
});
it("uses replyToId as threadTs for Slack", async () => {
mocks.sendMessageSlack.mockClear();
await routeReply({

View File

@@ -130,6 +130,7 @@ export type ChannelOutboundContext = {
to: string;
text: string;
mediaUrl?: string;
audioAsVoice?: boolean;
mediaLocalRoots?: readonly string[];
gifPlayback?: boolean;
/** Send image as document to avoid Telegram compression. */

View File

@@ -1,4 +1,5 @@
import { beforeEach, describe, expect, it, vi } from "vitest";
import type { ReplyPayload } from "../auto-reply/types.js";
import type { CliDeps } from "../cli/deps.js";
import type { OpenClawConfig } from "../config/config.js";
import type { SessionEntry } from "../config/sessions.js";
@@ -52,11 +53,17 @@ describe("deliverAgentCommandResult", () => {
sessionEntry?: SessionEntry;
runtime?: RuntimeEnv;
resultText?: string;
payloads?: ReplyPayload[];
}) {
const cfg = {} as OpenClawConfig;
const deps = {} as CliDeps;
const runtime = params.runtime ?? createRuntime();
const result = createResult(params.resultText);
const result = params.payloads
? {
payloads: params.payloads,
meta: { durationMs: 1 },
}
: createResult(params.resultText);
await deliverAgentCommandResult({
cfg,
@@ -284,4 +291,32 @@ describe("deliverAgentCommandResult", () => {
expect(line).toContain("channel=webchat");
expect(line).toContain("ANNOUNCE_SKIP");
});
it("preserves audioAsVoice in JSON output envelopes", async () => {
const runtime = createRuntime();
await runDelivery({
runtime,
payloads: [{ text: "voice caption", mediaUrl: "file:///tmp/clip.mp3", audioAsVoice: true }],
opts: {
message: "hello",
deliver: false,
json: true,
},
});
expect(runtime.log).toHaveBeenCalledTimes(1);
expect(
JSON.parse(String((runtime.log as ReturnType<typeof vi.fn>).mock.calls[0]?.[0])),
).toEqual({
payloads: [
{
text: "voice caption",
mediaUrl: "file:///tmp/clip.mp3",
mediaUrls: ["file:///tmp/clip.mp3"],
audioAsVoice: true,
},
],
meta: { durationMs: 1 },
});
});
});

View File

@@ -501,6 +501,49 @@ describe("deliverOutboundPayloads", () => {
);
});
it("forwards audioAsVoice through generic plugin media delivery", async () => {
const sendMedia = vi.fn(async () => ({
channel: "matrix" as const,
messageId: "mx-1",
roomId: "!room:example",
}));
setActivePluginRegistry(
createTestRegistry([
{
pluginId: "matrix",
source: "test",
plugin: createOutboundTestPlugin({
id: "matrix",
outbound: {
deliveryMode: "direct",
sendText: async ({ to, text }) => ({
channel: "matrix",
messageId: `${to}:${text}`,
}),
sendMedia,
},
}),
},
]),
);
await deliverOutboundPayloads({
cfg: { channels: { matrix: {} } } as OpenClawConfig,
channel: "matrix",
to: "room:!room:example",
payloads: [{ text: "voice caption", mediaUrl: "file:///tmp/clip.mp3", audioAsVoice: true }],
});
expect(sendMedia).toHaveBeenCalledWith(
expect.objectContaining({
to: "room:!room:example",
text: "voice caption",
mediaUrl: "file:///tmp/clip.mp3",
audioAsVoice: true,
}),
);
});
it("includes OpenClaw tmp root in whatsapp mediaLocalRoots", async () => {
const sendWhatsApp = vi.fn().mockResolvedValue({ messageId: "w1", toJid: "jid" });

View File

@@ -78,6 +78,7 @@ type ChannelHandler = {
overrides?: {
replyToId?: string | null;
threadId?: string | number | null;
audioAsVoice?: boolean;
},
) => Promise<OutboundDeliveryResult>;
sendFormattedText?: (
@@ -85,6 +86,7 @@ type ChannelHandler = {
overrides?: {
replyToId?: string | null;
threadId?: string | number | null;
audioAsVoice?: boolean;
},
) => Promise<OutboundDeliveryResult[]>;
sendFormattedMedia?: (
@@ -93,6 +95,7 @@ type ChannelHandler = {
overrides?: {
replyToId?: string | null;
threadId?: string | number | null;
audioAsVoice?: boolean;
},
) => Promise<OutboundDeliveryResult>;
sendText: (
@@ -100,6 +103,7 @@ type ChannelHandler = {
overrides?: {
replyToId?: string | null;
threadId?: string | number | null;
audioAsVoice?: boolean;
},
) => Promise<OutboundDeliveryResult>;
sendMedia: (
@@ -108,6 +112,7 @@ type ChannelHandler = {
overrides?: {
replyToId?: string | null;
threadId?: string | number | null;
audioAsVoice?: boolean;
},
) => Promise<OutboundDeliveryResult>;
};
@@ -159,10 +164,12 @@ function createPluginHandler(
const resolveCtx = (overrides?: {
replyToId?: string | null;
threadId?: string | number | null;
audioAsVoice?: boolean;
}): Omit<ChannelOutboundContext, "text" | "mediaUrl"> => ({
...baseCtx,
replyToId: overrides?.replyToId ?? baseCtx.replyToId,
threadId: overrides?.threadId ?? baseCtx.threadId,
audioAsVoice: overrides?.audioAsVoice,
});
return {
chunker,
@@ -335,6 +342,7 @@ function buildPayloadSummary(payload: ReplyPayload): NormalizedOutboundPayload {
return {
text: parts.text,
mediaUrls: parts.mediaUrls,
audioAsVoice: payload.audioAsVoice === true ? true : undefined,
interactive: payload.interactive,
channelData: payload.channelData,
};
@@ -572,7 +580,11 @@ async function deliverOutboundPayloadsCore(
const sendTextChunks = async (
text: string,
overrides?: { replyToId?: string | null; threadId?: string | number | null },
overrides?: {
replyToId?: string | null;
threadId?: string | number | null;
audioAsVoice?: boolean;
},
) => {
throwIfAborted(abortSignal);
if (!handler.chunker || textLimit === undefined) {
@@ -657,6 +669,7 @@ async function deliverOutboundPayloadsCore(
const sendOverrides = {
replyToId: effectivePayload.replyToId ?? params.replyToId ?? undefined,
threadId: params.threadId ?? undefined,
audioAsVoice: effectivePayload.audioAsVoice === true ? true : undefined,
forceDocument: params.forceDocument,
};
if (

View File

@@ -1308,21 +1308,29 @@ describe("normalizeOutboundPayloadsForJson", () => {
{
input: [
{ text: "hi" },
{ text: "photo", mediaUrl: "https://x.test/a.jpg" },
{ text: "photo", mediaUrl: "https://x.test/a.jpg", audioAsVoice: true },
{ text: "multi", mediaUrls: ["https://x.test/1.png"] },
],
expected: [
{ text: "hi", mediaUrl: null, mediaUrls: undefined, channelData: undefined },
{
text: "hi",
mediaUrl: null,
mediaUrls: undefined,
audioAsVoice: undefined,
channelData: undefined,
},
{
text: "photo",
mediaUrl: "https://x.test/a.jpg",
mediaUrls: ["https://x.test/a.jpg"],
audioAsVoice: true,
channelData: undefined,
},
{
text: "multi",
mediaUrl: null,
mediaUrls: ["https://x.test/1.png"],
audioAsVoice: undefined,
channelData: undefined,
},
],
@@ -1338,6 +1346,7 @@ describe("normalizeOutboundPayloadsForJson", () => {
text: "",
mediaUrl: null,
mediaUrls: ["https://x.test/a.png", "https://x.test/b.png"],
audioAsVoice: undefined,
channelData: undefined,
},
],
@@ -1362,7 +1371,9 @@ describe("normalizeOutboundPayloadsForJson", () => {
{ text: "Reasoning:\n_step_", isReasoning: true },
{ text: "final answer" },
]);
expect(normalized).toEqual([{ text: "final answer", mediaUrl: null, mediaUrls: undefined }]);
expect(normalized).toEqual([
{ text: "final answer", mediaUrl: null, mediaUrls: undefined, audioAsVoice: undefined },
]);
});
});

View File

@@ -83,21 +83,29 @@ describe("normalizeOutboundPayloadsForJson", () => {
{
input: [
{ text: "hi" },
{ text: "photo", mediaUrl: "https://x.test/a.jpg" },
{ text: "photo", mediaUrl: "https://x.test/a.jpg", audioAsVoice: true },
{ text: "multi", mediaUrls: ["https://x.test/1.png"] },
],
expected: [
{ text: "hi", mediaUrl: null, mediaUrls: undefined, channelData: undefined },
{
text: "hi",
mediaUrl: null,
mediaUrls: undefined,
audioAsVoice: undefined,
channelData: undefined,
},
{
text: "photo",
mediaUrl: "https://x.test/a.jpg",
mediaUrls: ["https://x.test/a.jpg"],
audioAsVoice: true,
channelData: undefined,
},
{
text: "multi",
mediaUrl: null,
mediaUrls: ["https://x.test/1.png"],
audioAsVoice: undefined,
channelData: undefined,
},
],
@@ -113,6 +121,7 @@ describe("normalizeOutboundPayloadsForJson", () => {
text: "",
mediaUrl: null,
mediaUrls: ["https://x.test/a.png", "https://x.test/b.png"],
audioAsVoice: undefined,
channelData: undefined,
},
],
@@ -138,7 +147,9 @@ describe("normalizeOutboundPayloadsForJson", () => {
{ text: "Reasoning:\n_step_", isReasoning: true },
{ text: "final answer" },
]),
).toEqual([{ text: "final answer", mediaUrl: null, mediaUrls: undefined }]);
).toEqual([
{ text: "final answer", mediaUrl: null, mediaUrls: undefined, audioAsVoice: undefined },
]);
});
});

View File

@@ -16,6 +16,7 @@ import {
export type NormalizedOutboundPayload = {
text: string;
mediaUrls: string[];
audioAsVoice?: boolean;
interactive?: InteractiveReply;
channelData?: Record<string, unknown>;
};
@@ -24,6 +25,7 @@ export type OutboundPayloadJson = {
text: string;
mediaUrl: string | null;
mediaUrls?: string[];
audioAsVoice?: boolean;
interactive?: InteractiveReply;
channelData?: Record<string, unknown>;
};
@@ -111,6 +113,7 @@ export function normalizeOutboundPayloads(
normalizedPayloads.push({
text,
mediaUrls: parts.mediaUrls,
audioAsVoice: payload.audioAsVoice === true ? true : undefined,
...(hasInteractive ? { interactive } : {}),
...(hasChannelData ? { channelData } : {}),
});
@@ -128,6 +131,7 @@ export function normalizeOutboundPayloadsForJson(
text: parts.text,
mediaUrl: payload.mediaUrl ?? null,
mediaUrls: parts.mediaUrls.length ? parts.mediaUrls : undefined,
audioAsVoice: payload.audioAsVoice === true ? true : undefined,
interactive: payload.interactive,
channelData: payload.channelData,
});