diff --git a/src/gateway/chat-attachments.test.ts b/src/gateway/chat-attachments.test.ts index ff68480b2c2..7f30da554fa 100644 --- a/src/gateway/chat-attachments.test.ts +++ b/src/gateway/chat-attachments.test.ts @@ -18,10 +18,14 @@ vi.mock("../media/store.js", async (importOriginal) => { deleteMediaBuffer: deleteMediaBufferMock, }; }); + +import type { OpenClawConfig } from "../config/types.openclaw.js"; import { buildMessageWithAttachments, type ChatAttachment, + DEFAULT_CHAT_ATTACHMENT_MAX_MB, parseMessageWithAttachments, + resolveChatAttachmentMaxBytes, UnsupportedAttachmentError, } from "./chat-attachments.js"; @@ -411,6 +415,32 @@ describe("parseMessageWithAttachments validation errors", () => { }); }); +describe("resolveChatAttachmentMaxBytes", () => { + const MB = 1024 * 1024; + const DEFAULT_BYTES = DEFAULT_CHAT_ATTACHMENT_MAX_MB * MB; + + const cfgWithMediaMaxMb = (value: unknown): OpenClawConfig => + ({ agents: { defaults: { mediaMaxMb: value } } }) as unknown as OpenClawConfig; + + it("honours a configured agents.defaults.mediaMaxMb", () => { + expect(resolveChatAttachmentMaxBytes(cfgWithMediaMaxMb(10))).toBe(10 * MB); + expect(resolveChatAttachmentMaxBytes(cfgWithMediaMaxMb(50))).toBe(50 * MB); + }); + + it("falls back to DEFAULT_CHAT_ATTACHMENT_MAX_MB when unset", () => { + expect(resolveChatAttachmentMaxBytes({} as OpenClawConfig)).toBe(DEFAULT_BYTES); + expect(resolveChatAttachmentMaxBytes({ agents: {} } as unknown as OpenClawConfig)).toBe( + DEFAULT_BYTES, + ); + }); + + it("rejects non-positive, non-finite, or non-number values", () => { + for (const bad of [0, -5, Number.NaN, Number.POSITIVE_INFINITY, "50", null, undefined]) { + expect(resolveChatAttachmentMaxBytes(cfgWithMediaMaxMb(bad))).toBe(DEFAULT_BYTES); + } + }); +}); + describe("shared attachment validation", () => { it("rejects invalid base64 content for both builder and parser", async () => { const bad: ChatAttachment = { diff --git a/src/gateway/chat-attachments.ts b/src/gateway/chat-attachments.ts index 6f2df3f70d1..7f61e02d5b2 100644 --- a/src/gateway/chat-attachments.ts +++ b/src/gateway/chat-attachments.ts @@ -257,6 +257,11 @@ export async function parseMessageWithAttachments( const providedMime = normalizeMime(mime); const sniffedMime = normalizeMime(await sniffMimeFromBase64(b64)); const labelMime = normalizeMime(mimeTypeFromFilePath(label)); + + // Prefer specific MIME signals over generic container types. OOXML + // documents (docx/xlsx/pptx) sniff as application/zip; without this + // priority the agent would receive a `.zip` instead of the specific + // Office document the caller declared. const finalMime = (sniffedMime && !isGenericContainerMime(sniffedMime) && sniffedMime) || (providedMime && !isGenericContainerMime(providedMime) && providedMime) || diff --git a/src/gateway/server-methods/agent.ts b/src/gateway/server-methods/agent.ts index 52c49075a63..640df493eb4 100644 --- a/src/gateway/server-methods/agent.ts +++ b/src/gateway/server-methods/agent.ts @@ -79,7 +79,11 @@ import { } from "../../utils/message-channel.js"; import { resolveAssistantIdentity } from "../assistant-identity.js"; import { registerChatAbortController, resolveAgentRunExpiresAtMs } from "../chat-abort.js"; -import { MediaOffloadError, parseMessageWithAttachments } from "../chat-attachments.js"; +import { + MediaOffloadError, + parseMessageWithAttachments, + resolveChatAttachmentMaxBytes, +} from "../chat-attachments.js"; import { resolveAssistantAvatarUrl } from "../control-ui-shared.js"; import { ADMIN_SCOPE } from "../method-scopes.js"; import { GATEWAY_CLIENT_CAPS, hasGatewayClientCap } from "../protocol/client-info.js"; @@ -506,7 +510,7 @@ export const agentHandlers: GatewayRequestHandlers = { try { const parsed = await parseMessageWithAttachments(message, normalizedAttachments, { - maxBytes: 5_000_000, + maxBytes: resolveChatAttachmentMaxBytes(cfg), log: context.logGateway, supportsInlineImages, // agent.run does not yet wire a ctx.MediaPaths stage path, so reject diff --git a/src/gateway/server-methods/chat.directive-tags.test.ts b/src/gateway/server-methods/chat.directive-tags.test.ts index f845dd3284a..fb1f6f0a302 100644 --- a/src/gateway/server-methods/chat.directive-tags.test.ts +++ b/src/gateway/server-methods/chat.directive-tags.test.ts @@ -66,6 +66,10 @@ const mockState = vi.hoisted(() => ({ saveMediaWait: null as Promise | null, activeSaveMediaCalls: 0, maxActiveSaveMediaCalls: 0, + sandboxWorkspace: null as { workspaceDir: string; containerWorkdir?: string } | null, + stageSandboxMediaError: null as Error | null, + stagedRelativePaths: null as string[] | null, + deleteMediaBufferCalls: [] as Array<{ id: string; subdir?: string }>, })); const bindingMocks = vi.hoisted(() => ({ @@ -216,11 +220,38 @@ vi.mock("../../sessions/transcript-events.js", () => ({ ), })); +vi.mock("../../agents/sandbox/context.js", async () => { + const original = await vi.importActual( + "../../agents/sandbox/context.js", + ); + return { + ...original, + ensureSandboxWorkspaceForSession: vi.fn(async () => mockState.sandboxWorkspace), + }; +}); + +vi.mock("../../auto-reply/reply/stage-sandbox-media.js", () => ({ + stageSandboxMedia: vi.fn( + async (params: { ctx: { MediaPaths?: string[]; MediaPath?: string } }) => { + if (mockState.stageSandboxMediaError) { + throw mockState.stageSandboxMediaError; + } + if (mockState.stagedRelativePaths) { + params.ctx.MediaPaths = [...mockState.stagedRelativePaths]; + params.ctx.MediaPath = mockState.stagedRelativePaths[0]; + } + }, + ), +})); + vi.mock("../../media/store.js", async () => { const original = await vi.importActual("../../media/store.js"); return { ...original, + deleteMediaBuffer: vi.fn(async (id: string, subdir?: string) => { + mockState.deleteMediaBufferCalls.push({ id, subdir }); + }), saveMediaBuffer: vi.fn(async (buffer: Buffer, contentType?: string, subdir?: string) => { mockState.activeSaveMediaCalls += 1; mockState.maxActiveSaveMediaCalls = Math.max( @@ -465,6 +496,10 @@ describe("chat directive tag stripping for non-streaming final payloads", () => mockState.maxActiveSaveMediaCalls = 0; bindingMocks.resolveByConversation.mockReset(); bindingMocks.resolveByConversation.mockReturnValue(null); + mockState.sandboxWorkspace = null; + mockState.stageSandboxMediaError = null; + mockState.stagedRelativePaths = null; + mockState.deleteMediaBufferCalls = []; }); it("registers tool-event recipients for clients advertising tool-events capability", async () => { @@ -2522,6 +2557,119 @@ describe("chat directive tag stripping for non-streaming final payloads", () => expect(mockState.lastDispatchCtx?.MediaStaged).toBe(true); }); + it("absolutizes sandbox-relative MediaPaths so media-understanding can resolve them", async () => { + createTranscriptFixture("openclaw-chat-send-non-image-absolutize-"); + mockState.finalText = "ok"; + mockState.sessionEntry = { + modelProvider: "test-provider", + model: "vision-model", + }; + mockState.modelCatalog = [ + { + provider: "test-provider", + id: "vision-model", + name: "Vision model", + input: ["text", "image"], + }, + ]; + mockState.savedMediaResults = [ + { path: "/home/user/.openclaw/media/inbound/report.pdf", contentType: "application/pdf" }, + ]; + mockState.sandboxWorkspace = { workspaceDir: "/sandbox/workspace" }; + mockState.stagedRelativePaths = ["media/inbound/report.pdf"]; + const respond = vi.fn(); + const context = createChatContext(); + const pdf = Buffer.from("%PDF-1.4\n%µ¶\n1 0 obj\n<<>>\nendobj\n").toString("base64"); + + await runNonStreamingChatSend({ + context, + respond, + idempotencyKey: "idem-non-image-absolutize", + message: "read this", + requestParams: { + attachments: [ + { + type: "file", + mimeType: "application/pdf", + fileName: "report.pdf", + content: pdf, + }, + ], + }, + expectBroadcast: false, + }); + + // applyMediaUnderstanding resolves `path.isAbsolute(raw) ? raw : path.resolve(raw)` + // against the gateway CWD. Relative paths would miss the staged copy, so + // prestage rejoins the sandbox workspaceDir to keep the path absolute. + expect(mockState.lastDispatchCtx?.MediaPaths).toEqual([ + "/sandbox/workspace/media/inbound/report.pdf", + ]); + expect(mockState.lastDispatchCtx?.MediaPath).toBe( + "/sandbox/workspace/media/inbound/report.pdf", + ); + expect(mockState.lastDispatchCtx?.MediaStaged).toBe(true); + }); + + it("wraps stageSandboxMedia infrastructure errors as 5xx UNAVAILABLE and cleans up media-store files", async () => { + createTranscriptFixture("openclaw-chat-send-stage-unavailable-"); + mockState.finalText = "ok"; + mockState.sessionEntry = { + modelProvider: "test-provider", + model: "vision-model", + }; + mockState.modelCatalog = [ + { + provider: "test-provider", + id: "vision-model", + name: "Vision model", + input: ["text", "image"], + }, + ]; + mockState.savedMediaResults = [ + { path: "/home/user/.openclaw/media/inbound/report.pdf", contentType: "application/pdf" }, + ]; + mockState.sandboxWorkspace = { workspaceDir: "/sandbox/workspace" }; + mockState.stageSandboxMediaError = Object.assign(new Error("ENOSPC: no space left on device"), { + code: "ENOSPC", + }); + const respond = vi.fn(); + const context = createChatContext(); + const pdf = Buffer.from("%PDF-1.4\n%µ¶\n1 0 obj\n<<>>\nendobj\n").toString("base64"); + + await runNonStreamingChatSend({ + context, + respond, + idempotencyKey: "idem-stage-unavailable", + message: "read this", + requestParams: { + attachments: [ + { + type: "file", + mimeType: "application/pdf", + fileName: "report.pdf", + content: pdf, + }, + ], + }, + expectBroadcast: false, + waitFor: "none", + }); + + // Plain Error from stageSandboxMedia would be misclassified as INVALID_REQUEST + // by the outer catch. Wrapping it in MediaOffloadError routes it to UNAVAILABLE + // so the client retries instead of treating it as a bad request. + expect(mockState.lastDispatchCtx).toBeUndefined(); + expect(respond).toHaveBeenCalledTimes(1); + const [ok, payload, error] = respond.mock.calls[0] ?? []; + expect(ok).toBe(false); + expect(payload).toBeUndefined(); + expect(error?.code).toBe(ErrorCodes.UNAVAILABLE); + expect(error?.message ?? String(error)).toMatch(/ENOSPC|non-image attachments/i); + // Orphaned media-store files are cleaned up before the 5xx surfaces. + expect(mockState.deleteMediaBufferCalls).toEqual([{ id: "saved-media", subdir: "inbound" }]); + }); + it("passes imageOrder for mixed inline and offloaded chat.send attachments", async () => { createTranscriptFixture("openclaw-chat-send-image-order-"); mockState.finalText = "ok"; diff --git a/src/gateway/server-methods/chat.ts b/src/gateway/server-methods/chat.ts index b52f7c45e53..9ef2aa4fcb2 100644 --- a/src/gateway/server-methods/chat.ts +++ b/src/gateway/server-methods/chat.ts @@ -15,6 +15,7 @@ import type { MsgContext, TemplateContext } from "../../auto-reply/templating.js import { extractCanvasFromText } from "../../chat/canvas-render.js"; import { resolveSessionFilePath } from "../../config/sessions.js"; import type { OpenClawConfig } from "../../config/types.openclaw.js"; +import { formatErrorMessage } from "../../infra/errors.js"; import { jsonUtf8Bytes } from "../../infra/json-utf8-bytes.js"; import { normalizeReplyPayloadsForDelivery } from "../../infra/outbound/payloads.js"; import { getSessionBindingService } from "../../infra/outbound/session-binding-service.js"; @@ -51,18 +52,18 @@ import { } from "../chat-abort.js"; import { type ChatImageContent, + MediaOffloadError, type OffloadedRef, parseMessageWithAttachments, resolveChatAttachmentMaxBytes, } from "../chat-attachments.js"; -import { MediaOffloadError } from "../chat-attachments.js"; import { isToolHistoryBlockType, projectChatDisplayMessage, projectRecentChatDisplayMessages, resolveEffectiveChatHistoryMaxChars, } from "../chat-display-projection.js"; -import { stripEnvelopeFromMessage } from "../chat-sanitize.js"; +import { stripEnvelopeFromMessage, stripEnvelopeFromMessages } from "../chat-sanitize.js"; import { augmentChatHistoryWithCliSessionImports } from "../cli-session-history.js"; import { isSuppressedControlReplyText } from "../control-reply-text.js"; import { @@ -771,10 +772,19 @@ function buildChatSendTranscriptMessage(params: { } // Stages non-image offloads into the agent sandbox synchronously so chat.send -// can surface 5xx before respond(). Throws MediaOffloadError on partial-stage -// instead of silently losing files (channel path is best-effort; chat.send is -// strong-delivery RPC). Callers MUST set ctx.MediaStaged=true when this runs -// so the dispatch pipeline skips its own stageSandboxMedia pass. +// can surface 5xx before respond(). Throws MediaOffloadError on any staging +// failure (ENOSPC / EPERM / partial-stage) so the outer chat.send handler can +// map it to UNAVAILABLE (5xx); plain Error would be misclassified as 4xx. All +// offloaded refs are cleaned up from the media store before rethrow. +// Callers MUST set ctx.MediaStaged=true when this runs so the dispatch +// pipeline skips its own stageSandboxMedia pass. +// +// Returned paths are ABSOLUTE (pointing into the sandbox workspace when sandbox +// is enabled, or the media-store origin when it is not). applyMediaUnderstanding +// runs before any further staging in get-reply.ts and uses +// `path.isAbsolute(raw) ? raw : path.resolve(raw)` against the gateway CWD, so +// any relative path here would make media-understanding target the wrong host +// path and silently skip file analysis. async function prestageNonImageOffloads(params: { offloadedRefs: OffloadedRef[]; cfg: OpenClawConfig; @@ -786,46 +796,58 @@ async function prestageNonImageOffloads(params: { return { paths: [], types: [] }; } - const workspaceDir = resolveAgentWorkspaceDir(params.cfg, params.agentId); - const sandbox = await ensureSandboxWorkspaceForSession({ - config: params.cfg, - sessionKey: params.sessionKey, - workspaceDir, - }); - if (!sandbox) { - return { - paths: nonImage.map((ref) => ref.path), - types: nonImage.map((ref) => ref.mimeType), + try { + const workspaceDir = resolveAgentWorkspaceDir(params.cfg, params.agentId); + const sandbox = await ensureSandboxWorkspaceForSession({ + config: params.cfg, + sessionKey: params.sessionKey, + workspaceDir, + }); + if (!sandbox) { + return { + paths: nonImage.map((ref) => ref.path), + types: nonImage.map((ref) => ref.mimeType), + }; + } + + const stagingCtx: MsgContext = { + MediaPath: nonImage[0].path, + MediaPaths: nonImage.map((ref) => ref.path), + MediaType: nonImage[0].mimeType, + MediaTypes: nonImage.map((ref) => ref.mimeType), }; - } + await stageSandboxMedia({ + ctx: stagingCtx, + sessionCtx: stagingCtx as TemplateContext, + cfg: params.cfg, + sessionKey: params.sessionKey, + workspaceDir, + }); - const stagingCtx: MsgContext = { - MediaPath: nonImage[0].path, - MediaPaths: nonImage.map((ref) => ref.path), - MediaType: nonImage[0].mimeType, - MediaTypes: nonImage.map((ref) => ref.mimeType), - }; - await stageSandboxMedia({ - ctx: stagingCtx, - sessionCtx: stagingCtx as TemplateContext, - cfg: params.cfg, - sessionKey: params.sessionKey, - workspaceDir, - }); + const stagedPaths = stagingCtx.MediaPaths ?? []; + const stagedTypes = stagingCtx.MediaTypes ?? nonImage.map((ref) => ref.mimeType); + if (stagedPaths.length !== nonImage.length) { + throw new Error( + `non-image attachment staging incomplete: ${stagedPaths.length}/${nonImage.length} paths staged into sandbox workspace`, + ); + } - const stagedPaths = stagingCtx.MediaPaths ?? []; - const stagedTypes = stagingCtx.MediaTypes ?? nonImage.map((ref) => ref.mimeType); - const allRewritten = - stagedPaths.length === nonImage.length && stagedPaths.every((p) => !path.isAbsolute(p)); - if (!allRewritten) { + const absolutePaths = stagedPaths.map((p) => + path.isAbsolute(p) ? p : path.join(sandbox.workspaceDir, p), + ); + return { paths: absolutePaths, types: stagedTypes, workspaceDir: sandbox.workspaceDir }; + } catch (err) { await Promise.allSettled( params.offloadedRefs.map((ref) => deleteMediaBuffer(ref.id, "inbound")), ); + if (err instanceof MediaOffloadError) { + throw err; + } throw new MediaOffloadError( - `non-image attachment staging incomplete: ${stagedPaths.length}/${nonImage.length} paths rewritten into sandbox workspace`, + `[Gateway Error] Failed to stage non-image attachments into agent workspace: ${formatErrorMessage(err)}`, + { cause: err }, ); } - return { paths: stagedPaths, types: stagedTypes, workspaceDir: sandbox.workspaceDir }; } function resolveChatSendTranscriptMediaFields(savedImages: SavedMedia[]) { diff --git a/src/gateway/server-node-events.runtime.ts b/src/gateway/server-node-events.runtime.ts index 01697810d6a..fa501faccb0 100644 --- a/src/gateway/server-node-events.runtime.ts +++ b/src/gateway/server-node-events.runtime.ts @@ -15,7 +15,7 @@ export { enqueueSystemEvent } from "../infra/system-events.js"; export { deleteMediaBuffer } from "../media/store.js"; export { normalizeMainKey, scopedHeartbeatWakeOptions } from "../routing/session-key.js"; export { defaultRuntime } from "../runtime.js"; -export { parseMessageWithAttachments } from "./chat-attachments.js"; +export { parseMessageWithAttachments, resolveChatAttachmentMaxBytes } from "./chat-attachments.js"; export { normalizeRpcAttachmentsToChatAttachments } from "./server-methods/attachment-normalize.js"; export { loadSessionEntry, diff --git a/src/gateway/server-node-events.test.ts b/src/gateway/server-node-events.test.ts index c8324d75bfa..b084e7e05eb 100644 --- a/src/gateway/server-node-events.test.ts +++ b/src/gateway/server-node-events.test.ts @@ -90,6 +90,7 @@ const runtimeMocks = vi.hoisted(() => ({ parseMessageWithAttachments: parseMessageWithAttachmentsMock, registerApnsRegistration: registerApnsRegistrationMock, requestHeartbeatNow: vi.fn(), + resolveChatAttachmentMaxBytes: vi.fn(() => 20 * 1024 * 1024), resolveGatewayModelSupportsImages: vi.fn( async ({ loadGatewayModelCatalog, diff --git a/src/gateway/server-node-events.ts b/src/gateway/server-node-events.ts index 5fd94038cd0..31bcfc1595f 100644 --- a/src/gateway/server-node-events.ts +++ b/src/gateway/server-node-events.ts @@ -26,6 +26,7 @@ import { parseMessageWithAttachments, registerApnsRegistration, requestHeartbeatNow, + resolveChatAttachmentMaxBytes, resolveGatewayModelSupportsImages, resolveOutboundTarget, resolveSessionAgentId, @@ -434,7 +435,7 @@ export const handleNodeEvent = async (ctx: NodeEventContext, nodeId: string, evt }); try { const parsed = await parseMessageWithAttachments(message, normalizedAttachments, { - maxBytes: 5_000_000, + maxBytes: resolveChatAttachmentMaxBytes(cfg), log: ctx.logGateway, supportsInlineImages, // server-node-events dispatches via agentCommandFromIngress which