From 0e9a6da7b817890e251d0ef8fa04f53b9bd491dc Mon Sep 17 00:00:00 2001 From: George Pickett Date: Mon, 13 Apr 2026 14:54:13 -0700 Subject: [PATCH] Reply media: persist preferred tmp-root media --- CHANGELOG.md | 1 + .../reply/reply-media-paths.test.ts | 69 +++++++++++++++++-- src/auto-reply/reply/reply-media-paths.ts | 65 ++++++++--------- 3 files changed, 98 insertions(+), 37 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 816a5407297..23c1cb4c9cc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -38,6 +38,7 @@ Docs: https://docs.openclaw.ai - Agents/OpenAI: map `minimal` thinking to OpenAI's supported `low` reasoning effort for GPT-5.4 requests, so embedded runs stop failing request validation. - Voice-call/media-stream: resolve the source IP from trusted forwarding headers for per-IP pending-connection limits when `webhookSecurity.trustForwardingHeaders` and `trustedProxyIPs` are configured, and reserve `maxConnections` capacity for in-flight WebSocket upgrades so concurrent handshakes can no longer momentarily exceed the operator-set cap. (#66027) Thanks @eleqtrizit. - Feishu/allowlist: canonicalize allowlist entries by explicit `user`/`chat` kind, strip repeated `feishu:`/`lark:` provider prefixes, and stop folding opaque Feishu IDs to lowercase, so allowlist matching no longer crosses user/chat namespaces or widens to case-insensitive ID matches the operator did not intend. (#66021) Thanks @eleqtrizit. +- TTS/reply media: persist OpenClaw temp voice outputs into managed outbound media and allow them through reply-media normalization, so voice-note replies stop silently dropping. (#63511) Thanks @jetd1. ## 2026.4.12 diff --git a/src/auto-reply/reply/reply-media-paths.test.ts b/src/auto-reply/reply/reply-media-paths.test.ts index 5f25e0f3d90..3f763924251 100644 --- a/src/auto-reply/reply/reply-media-paths.test.ts +++ b/src/auto-reply/reply/reply-media-paths.test.ts @@ -2,12 +2,21 @@ import path from "node:path"; import { beforeEach, describe, expect, it, vi } from "vitest"; const ensureSandboxWorkspaceForSession = vi.hoisted(() => vi.fn()); +const resolvePreferredOpenClawTmpDir = vi.hoisted(() => vi.fn(() => "/private/tmp/openclaw-501")); const saveMediaSource = vi.hoisted(() => vi.fn()); vi.mock("../../agents/sandbox.js", () => ({ ensureSandboxWorkspaceForSession, })); +vi.mock("../../infra/tmp-openclaw-dir.js", async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + resolvePreferredOpenClawTmpDir, + }; +}); + vi.mock("../../media/store.js", () => ({ saveMediaSource, })); @@ -17,6 +26,7 @@ import { createReplyMediaPathNormalizer } from "./reply-media-paths.js"; describe("createReplyMediaPathNormalizer", () => { beforeEach(() => { ensureSandboxWorkspaceForSession.mockReset().mockResolvedValue(null); + resolvePreferredOpenClawTmpDir.mockReset().mockReturnValue("/private/tmp/openclaw-501"); saveMediaSource.mockReset(); vi.unstubAllEnvs(); }); @@ -177,9 +187,15 @@ describe("createReplyMediaPathNormalizer", () => { }); }); - it("keeps TTS voice output from the OpenClaw temp directory", async () => { - // resolvePreferredOpenClawTmpDir() returns /tmp/openclaw on POSIX when it exists. - // We rely on the real function (no mock) since the test environment has /tmp/openclaw. + it("persists TTS voice output from the preferred OpenClaw temp directory", async () => { + const tmpVoicePath = path.join( + "/private/tmp/openclaw-501", + "tts-abc123", + "voice-1234567890.opus", + ); + saveMediaSource.mockResolvedValue({ + path: "/Users/peter/.openclaw/media/outbound/tts-voice.opus", + }); const normalize = createReplyMediaPathNormalizer({ cfg: {}, sessionKey: "session-key", @@ -187,12 +203,53 @@ describe("createReplyMediaPathNormalizer", () => { }); const result = await normalize({ - mediaUrls: ["/tmp/openclaw/tts-abc123/voice-1234567890.opus"], + mediaUrls: [tmpVoicePath], + }); + + expect(saveMediaSource).toHaveBeenCalledWith(tmpVoicePath, undefined, "outbound"); + expect(result).toMatchObject({ + mediaUrl: "/Users/peter/.openclaw/media/outbound/tts-voice.opus", + mediaUrls: ["/Users/peter/.openclaw/media/outbound/tts-voice.opus"], + }); + }); + + it("falls back to the original preferred tmp path when persisting TTS media fails", async () => { + const tmpVoicePath = path.join( + "/private/tmp/openclaw-501", + "tts-fallback", + "voice-1234567890.opus", + ); + saveMediaSource.mockRejectedValue(new Error("disk full")); + const normalize = createReplyMediaPathNormalizer({ + cfg: {}, + sessionKey: "session-key", + workspaceDir: "/tmp/agent-workspace", + }); + + const result = await normalize({ + mediaUrls: [tmpVoicePath], }); expect(result).toMatchObject({ - mediaUrl: "/tmp/openclaw/tts-abc123/voice-1234567890.opus", - mediaUrls: ["/tmp/openclaw/tts-abc123/voice-1234567890.opus"], + mediaUrl: tmpVoicePath, + mediaUrls: [tmpVoicePath], + }); + }); + + it("drops host tmp paths outside the preferred OpenClaw temp directory", async () => { + const normalize = createReplyMediaPathNormalizer({ + cfg: {}, + sessionKey: "session-key", + workspaceDir: "/tmp/agent-workspace", + }); + + const result = await normalize({ + mediaUrls: ["/private/tmp/not-openclaw/voice-1234567890.opus"], + }); + + expect(result).toMatchObject({ + mediaUrl: undefined, + mediaUrls: undefined, }); expect(saveMediaSource).not.toHaveBeenCalled(); }); diff --git a/src/auto-reply/reply/reply-media-paths.ts b/src/auto-reply/reply/reply-media-paths.ts index 49f9753540d..9f0192b7129 100644 --- a/src/auto-reply/reply/reply-media-paths.ts +++ b/src/auto-reply/reply/reply-media-paths.ts @@ -19,6 +19,7 @@ const SCHEME_RE = /^[a-zA-Z][a-zA-Z0-9+.-]*:/; const HAS_FILE_EXT_RE = /\.\w{1,10}$/; const AGENT_STATE_MEDIA_DIRNAME = path.join(".openclaw", "media"); const MANAGED_GLOBAL_MEDIA_SUBDIRS = new Set(["outbound"]); +let cachedPreferredTmpRoot: string | null | undefined; function isPathInside(root: string, candidate: string): boolean { const relative = path.relative(path.resolve(root), path.resolve(candidate)); @@ -35,6 +36,32 @@ function isManagedGlobalReplyMediaPath(candidate: string): boolean { return MANAGED_GLOBAL_MEDIA_SUBDIRS.has(firstSegment) || firstSegment.startsWith("tool-"); } +function resolvePreferredReplyMediaTmpRoot(): string | undefined { + if (cachedPreferredTmpRoot !== undefined) { + return cachedPreferredTmpRoot ?? undefined; + } + try { + cachedPreferredTmpRoot = path.resolve(resolvePreferredOpenClawTmpDir()); + } catch { + cachedPreferredTmpRoot = null; + } + return cachedPreferredTmpRoot ?? undefined; +} + +function buildVolatileReplyMediaRoots(params: { + workspaceDir: string; + sandboxRoot?: string; +}): string[] { + const roots = [params.workspaceDir, params.sandboxRoot] + .filter((root): root is string => Boolean(root)) + .map((root) => path.join(path.resolve(root), AGENT_STATE_MEDIA_DIRNAME)); + const preferredTmpRoot = resolvePreferredReplyMediaTmpRoot(); + if (preferredTmpRoot) { + roots.push(preferredTmpRoot); + } + return roots; +} + function isAllowedAbsoluteReplyMediaPath(params: { candidate: string; workspaceDir: string; @@ -43,32 +70,7 @@ function isAllowedAbsoluteReplyMediaPath(params: { if (isManagedGlobalReplyMediaPath(params.candidate)) { return true; } - // Allow media from the OpenClaw temp directory (TTS output, etc.). - // These are trusted paths written by OpenClaw's own tooling - // and should be deliverable as reply media. - if (isOpenClawTmpPath(params.candidate)) { - return true; - } - const volatileRoots = [params.workspaceDir, params.sandboxRoot] - .filter((root): root is string => Boolean(root)) - .map((root) => path.join(path.resolve(root), AGENT_STATE_MEDIA_DIRNAME)); - return volatileRoots.some((root) => isPathInside(root, params.candidate)); -} - -/** - * Check whether a path is inside the OpenClaw temp directory. - * These are trusted paths written by OpenClaw's own tooling - * (TTS, media processing, etc.) and should be deliverable as reply media. - */ -let cachedTmpRoot: string | undefined; - -function isOpenClawTmpPath(candidate: string): boolean { - try { - cachedTmpRoot ??= resolvePreferredOpenClawTmpDir(); - return isPathInside(cachedTmpRoot, candidate); - } catch { - return false; - } + return buildVolatileReplyMediaRoots(params).some((root) => isPathInside(root, params.candidate)); } function isLikelyLocalMediaSource(media: string): boolean { @@ -115,14 +117,15 @@ export function createReplyMediaPathNormalizer(params: { return await sandboxRootPromise; }; - const persistVolatileAgentMedia = async (media: string): Promise => { + const persistVolatileReplyMedia = async (media: string): Promise => { if (!path.isAbsolute(media)) { return media; } const sandboxRoot = await resolveSandboxRoot(); - const volatileRoots = [params.workspaceDir, sandboxRoot] - .filter((root): root is string => Boolean(root)) - .map((root) => path.join(path.resolve(root), AGENT_STATE_MEDIA_DIRNAME)); + const volatileRoots = buildVolatileReplyMediaRoots({ + workspaceDir: params.workspaceDir, + sandboxRoot, + }); if (!volatileRoots.some((root) => isPathInside(root, media))) { return media; } @@ -222,7 +225,7 @@ export function createReplyMediaPathNormalizer(params: { for (const media of mediaList) { let normalized: string; try { - normalized = await persistVolatileAgentMedia(await normalizeMediaSource(media)); + normalized = await persistVolatileReplyMedia(await normalizeMediaSource(media)); } catch (err) { logVerbose(`dropping blocked reply media ${media}: ${String(err)}`); continue;