From b63b33adb6b52097f55f19c87c37c6c74b7a5978 Mon Sep 17 00:00:00 2001 From: samzong Date: Thu, 16 Apr 2026 23:19:17 +0800 Subject: [PATCH] Gateway: reject sandbox-oversized non-image attachments as 4xx --- .../chat.directive-tags.test.ts | 59 +++++++++++++++++++ src/gateway/server-methods/chat.ts | 30 +++++++++- 2 files changed, 88 insertions(+), 1 deletion(-) diff --git a/src/gateway/server-methods/chat.directive-tags.test.ts b/src/gateway/server-methods/chat.directive-tags.test.ts index d32f8cb7b40..df43a29b4ec 100644 --- a/src/gateway/server-methods/chat.directive-tags.test.ts +++ b/src/gateway/server-methods/chat.directive-tags.test.ts @@ -2746,6 +2746,65 @@ describe("chat directive tag stripping for non-streaming final payloads", () => ]); }); + it("rejects sandbox-oversized non-image attachments as 4xx before staging", async () => { + // Regression: resolveChatAttachmentMaxBytes defaults to 20MB, but + // stageSandboxMedia caps each file at STAGED_MEDIA_MAX_BYTES (5MB) and + // silently drops oversize files. Without a pre-check, a sandbox session + // accepting a 5-20MB non-image would fail staging and surface as a + // retryable 5xx UNAVAILABLE, misleading clients into retrying a + // deterministically broken request. + createTranscriptFixture("openclaw-chat-send-sandbox-oversize-"); + mockState.finalText = "ok"; + mockState.sessionEntry = { + modelProvider: "test-provider", + model: "vision-model", + }; + mockState.modelCatalog = [ + { + provider: "test-provider", + id: "vision-model", + name: "Vision model", + input: ["text", "image"], + }, + ]; + mockState.savedMediaResults = [ + { path: "/home/user/.openclaw/media/inbound/huge.pdf", contentType: "application/pdf" }, + ]; + mockState.sandboxWorkspace = { workspaceDir: "/sandbox/workspace" }; + const respond = vi.fn(); + const context = createChatContext(); + // 6MB buffer — above STAGED_MEDIA_MAX_BYTES (5MB) but below the 20MB parse cap. + const oversized = Buffer.alloc(6 * 1024 * 1024); + oversized.set(Buffer.from("%PDF-1.4\n"), 0); + const pdf = oversized.toString("base64"); + + await runNonStreamingChatSend({ + context, + respond, + idempotencyKey: "idem-sandbox-oversize", + message: "read this", + requestParams: { + attachments: [ + { type: "file", mimeType: "application/pdf", fileName: "huge.pdf", content: pdf }, + ], + }, + expectBroadcast: false, + waitFor: "none", + }); + + expect(mockState.lastDispatchCtx).toBeUndefined(); + expect(respond).toHaveBeenCalledTimes(1); + const [ok, payload, error] = respond.mock.calls[0] ?? []; + expect(ok).toBe(false); + expect(payload).toBeUndefined(); + // 4xx, not 5xx — retrying a file that exceeds the staging cap cannot + // succeed, so the failure must be surfaced as a client-side rejection. + expect(error?.code).toBe(ErrorCodes.INVALID_REQUEST); + expect(error?.message ?? String(error)).toMatch(/sandbox staging limit/i); + // Orphaned media-store entries are cleaned up before the 4xx surfaces. + expect(mockState.deleteMediaBufferCalls).toEqual([{ id: "saved-media", subdir: "inbound" }]); + }); + it("passes imageOrder for mixed inline and offloaded chat.send attachments", async () => { createTranscriptFixture("openclaw-chat-send-image-order-"); mockState.finalText = "ok"; diff --git a/src/gateway/server-methods/chat.ts b/src/gateway/server-methods/chat.ts index 0424460be24..065ecd11f63 100644 --- a/src/gateway/server-methods/chat.ts +++ b/src/gateway/server-methods/chat.ts @@ -26,7 +26,12 @@ import { } from "../../media/local-roots.js"; import { isAudioFileName } from "../../media/mime.js"; import type { PromptImageOrderEntry } from "../../media/prompt-image-order.js"; -import { deleteMediaBuffer, type SavedMedia, saveMediaBuffer } from "../../media/store.js"; +import { + deleteMediaBuffer, + MEDIA_MAX_BYTES, + type SavedMedia, + saveMediaBuffer, +} from "../../media/store.js"; import { createChannelReplyPipeline } from "../../plugin-sdk/channel-reply-pipeline.js"; import { isPluginOwnedSessionBindingRecord } from "../../plugins/conversation-binding.js"; import { normalizeInputProvenance, type InputProvenance } from "../../sessions/input-provenance.js"; @@ -56,6 +61,7 @@ import { type OffloadedRef, parseMessageWithAttachments, resolveChatAttachmentMaxBytes, + UnsupportedAttachmentError, } from "../chat-attachments.js"; import { isToolHistoryBlockType, @@ -827,6 +833,23 @@ async function prestageNonImageOffloads(params: { }; } + // stageSandboxMedia caps each file at STAGED_MEDIA_MAX_BYTES (= + // MEDIA_MAX_BYTES, 5MB) and silently skips oversized files. The parse cap + // (resolveChatAttachmentMaxBytes, default 20MB) is higher, so a sandboxed + // session receiving a non-image file between the two caps would otherwise + // pass parse, fail staging, and surface as a retryable 5xx even though + // retry cannot succeed. Reject here as a client-side 4xx instead. + const oversizedForSandbox = nonImage.filter((ref) => ref.sizeBytes > MEDIA_MAX_BYTES); + if (oversizedForSandbox.length > 0) { + const details = oversizedForSandbox + .map((ref) => `${ref.label} (${ref.sizeBytes} bytes)`) + .join(", "); + throw new UnsupportedAttachmentError( + "non-image-too-large-for-sandbox", + `non-image attachments exceed sandbox staging limit (${MEDIA_MAX_BYTES} bytes): ${details}`, + ); + } + const stagingCtx: MsgContext = { MediaPath: nonImage[0].path, MediaPaths: nonImage.map((ref) => ref.path), @@ -868,6 +891,11 @@ async function prestageNonImageOffloads(params: { if (err instanceof MediaOffloadError) { throw err; } + // Sandbox-oversize rejections are client-side 4xx (see check above). Wrapping + // them as MediaOffloadError would misclassify them as retryable 5xx. + if (err instanceof UnsupportedAttachmentError) { + throw err; + } throw new MediaOffloadError( `[Gateway Error] Failed to stage non-image attachments into agent workspace: ${formatErrorMessage(err)}`, { cause: err },