fix(gateway): wrap prestage errors as 5xx and absolutize MediaPaths

Signed-off-by: samzong <samzong.lu@gmail.com>
This commit is contained in:
samzong
2026-04-16 16:24:41 +08:00
committed by Frank Yang
parent 4c61d307da
commit bb259ac039
8 changed files with 252 additions and 41 deletions

View File

@@ -18,10 +18,14 @@ vi.mock("../media/store.js", async (importOriginal) => {
deleteMediaBuffer: deleteMediaBufferMock,
};
});
import type { OpenClawConfig } from "../config/types.openclaw.js";
import {
buildMessageWithAttachments,
type ChatAttachment,
DEFAULT_CHAT_ATTACHMENT_MAX_MB,
parseMessageWithAttachments,
resolveChatAttachmentMaxBytes,
UnsupportedAttachmentError,
} from "./chat-attachments.js";
@@ -411,6 +415,32 @@ describe("parseMessageWithAttachments validation errors", () => {
});
});
describe("resolveChatAttachmentMaxBytes", () => {
const MB = 1024 * 1024;
const DEFAULT_BYTES = DEFAULT_CHAT_ATTACHMENT_MAX_MB * MB;
const cfgWithMediaMaxMb = (value: unknown): OpenClawConfig =>
({ agents: { defaults: { mediaMaxMb: value } } }) as unknown as OpenClawConfig;
it("honours a configured agents.defaults.mediaMaxMb", () => {
expect(resolveChatAttachmentMaxBytes(cfgWithMediaMaxMb(10))).toBe(10 * MB);
expect(resolveChatAttachmentMaxBytes(cfgWithMediaMaxMb(50))).toBe(50 * MB);
});
it("falls back to DEFAULT_CHAT_ATTACHMENT_MAX_MB when unset", () => {
expect(resolveChatAttachmentMaxBytes({} as OpenClawConfig)).toBe(DEFAULT_BYTES);
expect(resolveChatAttachmentMaxBytes({ agents: {} } as unknown as OpenClawConfig)).toBe(
DEFAULT_BYTES,
);
});
it("rejects non-positive, non-finite, or non-number values", () => {
for (const bad of [0, -5, Number.NaN, Number.POSITIVE_INFINITY, "50", null, undefined]) {
expect(resolveChatAttachmentMaxBytes(cfgWithMediaMaxMb(bad))).toBe(DEFAULT_BYTES);
}
});
});
describe("shared attachment validation", () => {
it("rejects invalid base64 content for both builder and parser", async () => {
const bad: ChatAttachment = {

View File

@@ -257,6 +257,11 @@ export async function parseMessageWithAttachments(
const providedMime = normalizeMime(mime);
const sniffedMime = normalizeMime(await sniffMimeFromBase64(b64));
const labelMime = normalizeMime(mimeTypeFromFilePath(label));
// Prefer specific MIME signals over generic container types. OOXML
// documents (docx/xlsx/pptx) sniff as application/zip; without this
// priority the agent would receive a `.zip` instead of the specific
// Office document the caller declared.
const finalMime =
(sniffedMime && !isGenericContainerMime(sniffedMime) && sniffedMime) ||
(providedMime && !isGenericContainerMime(providedMime) && providedMime) ||

View File

@@ -79,7 +79,11 @@ import {
} from "../../utils/message-channel.js";
import { resolveAssistantIdentity } from "../assistant-identity.js";
import { registerChatAbortController, resolveAgentRunExpiresAtMs } from "../chat-abort.js";
import { MediaOffloadError, parseMessageWithAttachments } from "../chat-attachments.js";
import {
MediaOffloadError,
parseMessageWithAttachments,
resolveChatAttachmentMaxBytes,
} from "../chat-attachments.js";
import { resolveAssistantAvatarUrl } from "../control-ui-shared.js";
import { ADMIN_SCOPE } from "../method-scopes.js";
import { GATEWAY_CLIENT_CAPS, hasGatewayClientCap } from "../protocol/client-info.js";
@@ -506,7 +510,7 @@ export const agentHandlers: GatewayRequestHandlers = {
try {
const parsed = await parseMessageWithAttachments(message, normalizedAttachments, {
maxBytes: 5_000_000,
maxBytes: resolveChatAttachmentMaxBytes(cfg),
log: context.logGateway,
supportsInlineImages,
// agent.run does not yet wire a ctx.MediaPaths stage path, so reject

View File

@@ -66,6 +66,10 @@ const mockState = vi.hoisted(() => ({
saveMediaWait: null as Promise<void> | null,
activeSaveMediaCalls: 0,
maxActiveSaveMediaCalls: 0,
sandboxWorkspace: null as { workspaceDir: string; containerWorkdir?: string } | null,
stageSandboxMediaError: null as Error | null,
stagedRelativePaths: null as string[] | null,
deleteMediaBufferCalls: [] as Array<{ id: string; subdir?: string }>,
}));
const bindingMocks = vi.hoisted(() => ({
@@ -216,11 +220,38 @@ vi.mock("../../sessions/transcript-events.js", () => ({
),
}));
vi.mock("../../agents/sandbox/context.js", async () => {
const original = await vi.importActual<typeof import("../../agents/sandbox/context.js")>(
"../../agents/sandbox/context.js",
);
return {
...original,
ensureSandboxWorkspaceForSession: vi.fn(async () => mockState.sandboxWorkspace),
};
});
vi.mock("../../auto-reply/reply/stage-sandbox-media.js", () => ({
stageSandboxMedia: vi.fn(
async (params: { ctx: { MediaPaths?: string[]; MediaPath?: string } }) => {
if (mockState.stageSandboxMediaError) {
throw mockState.stageSandboxMediaError;
}
if (mockState.stagedRelativePaths) {
params.ctx.MediaPaths = [...mockState.stagedRelativePaths];
params.ctx.MediaPath = mockState.stagedRelativePaths[0];
}
},
),
}));
vi.mock("../../media/store.js", async () => {
const original =
await vi.importActual<typeof import("../../media/store.js")>("../../media/store.js");
return {
...original,
deleteMediaBuffer: vi.fn(async (id: string, subdir?: string) => {
mockState.deleteMediaBufferCalls.push({ id, subdir });
}),
saveMediaBuffer: vi.fn(async (buffer: Buffer, contentType?: string, subdir?: string) => {
mockState.activeSaveMediaCalls += 1;
mockState.maxActiveSaveMediaCalls = Math.max(
@@ -465,6 +496,10 @@ describe("chat directive tag stripping for non-streaming final payloads", () =>
mockState.maxActiveSaveMediaCalls = 0;
bindingMocks.resolveByConversation.mockReset();
bindingMocks.resolveByConversation.mockReturnValue(null);
mockState.sandboxWorkspace = null;
mockState.stageSandboxMediaError = null;
mockState.stagedRelativePaths = null;
mockState.deleteMediaBufferCalls = [];
});
it("registers tool-event recipients for clients advertising tool-events capability", async () => {
@@ -2522,6 +2557,119 @@ describe("chat directive tag stripping for non-streaming final payloads", () =>
expect(mockState.lastDispatchCtx?.MediaStaged).toBe(true);
});
it("absolutizes sandbox-relative MediaPaths so media-understanding can resolve them", async () => {
createTranscriptFixture("openclaw-chat-send-non-image-absolutize-");
mockState.finalText = "ok";
mockState.sessionEntry = {
modelProvider: "test-provider",
model: "vision-model",
};
mockState.modelCatalog = [
{
provider: "test-provider",
id: "vision-model",
name: "Vision model",
input: ["text", "image"],
},
];
mockState.savedMediaResults = [
{ path: "/home/user/.openclaw/media/inbound/report.pdf", contentType: "application/pdf" },
];
mockState.sandboxWorkspace = { workspaceDir: "/sandbox/workspace" };
mockState.stagedRelativePaths = ["media/inbound/report.pdf"];
const respond = vi.fn();
const context = createChatContext();
const pdf = Buffer.from("%PDF-1.4\n%µ¶\n1 0 obj\n<<>>\nendobj\n").toString("base64");
await runNonStreamingChatSend({
context,
respond,
idempotencyKey: "idem-non-image-absolutize",
message: "read this",
requestParams: {
attachments: [
{
type: "file",
mimeType: "application/pdf",
fileName: "report.pdf",
content: pdf,
},
],
},
expectBroadcast: false,
});
// applyMediaUnderstanding resolves `path.isAbsolute(raw) ? raw : path.resolve(raw)`
// against the gateway CWD. Relative paths would miss the staged copy, so
// prestage rejoins the sandbox workspaceDir to keep the path absolute.
expect(mockState.lastDispatchCtx?.MediaPaths).toEqual([
"/sandbox/workspace/media/inbound/report.pdf",
]);
expect(mockState.lastDispatchCtx?.MediaPath).toBe(
"/sandbox/workspace/media/inbound/report.pdf",
);
expect(mockState.lastDispatchCtx?.MediaStaged).toBe(true);
});
it("wraps stageSandboxMedia infrastructure errors as 5xx UNAVAILABLE and cleans up media-store files", async () => {
createTranscriptFixture("openclaw-chat-send-stage-unavailable-");
mockState.finalText = "ok";
mockState.sessionEntry = {
modelProvider: "test-provider",
model: "vision-model",
};
mockState.modelCatalog = [
{
provider: "test-provider",
id: "vision-model",
name: "Vision model",
input: ["text", "image"],
},
];
mockState.savedMediaResults = [
{ path: "/home/user/.openclaw/media/inbound/report.pdf", contentType: "application/pdf" },
];
mockState.sandboxWorkspace = { workspaceDir: "/sandbox/workspace" };
mockState.stageSandboxMediaError = Object.assign(new Error("ENOSPC: no space left on device"), {
code: "ENOSPC",
});
const respond = vi.fn();
const context = createChatContext();
const pdf = Buffer.from("%PDF-1.4\n%µ¶\n1 0 obj\n<<>>\nendobj\n").toString("base64");
await runNonStreamingChatSend({
context,
respond,
idempotencyKey: "idem-stage-unavailable",
message: "read this",
requestParams: {
attachments: [
{
type: "file",
mimeType: "application/pdf",
fileName: "report.pdf",
content: pdf,
},
],
},
expectBroadcast: false,
waitFor: "none",
});
// Plain Error from stageSandboxMedia would be misclassified as INVALID_REQUEST
// by the outer catch. Wrapping it in MediaOffloadError routes it to UNAVAILABLE
// so the client retries instead of treating it as a bad request.
expect(mockState.lastDispatchCtx).toBeUndefined();
expect(respond).toHaveBeenCalledTimes(1);
const [ok, payload, error] = respond.mock.calls[0] ?? [];
expect(ok).toBe(false);
expect(payload).toBeUndefined();
expect(error?.code).toBe(ErrorCodes.UNAVAILABLE);
expect(error?.message ?? String(error)).toMatch(/ENOSPC|non-image attachments/i);
// Orphaned media-store files are cleaned up before the 5xx surfaces.
expect(mockState.deleteMediaBufferCalls).toEqual([{ id: "saved-media", subdir: "inbound" }]);
});
it("passes imageOrder for mixed inline and offloaded chat.send attachments", async () => {
createTranscriptFixture("openclaw-chat-send-image-order-");
mockState.finalText = "ok";

View File

@@ -15,6 +15,7 @@ import type { MsgContext, TemplateContext } from "../../auto-reply/templating.js
import { extractCanvasFromText } from "../../chat/canvas-render.js";
import { resolveSessionFilePath } from "../../config/sessions.js";
import type { OpenClawConfig } from "../../config/types.openclaw.js";
import { formatErrorMessage } from "../../infra/errors.js";
import { jsonUtf8Bytes } from "../../infra/json-utf8-bytes.js";
import { normalizeReplyPayloadsForDelivery } from "../../infra/outbound/payloads.js";
import { getSessionBindingService } from "../../infra/outbound/session-binding-service.js";
@@ -51,18 +52,18 @@ import {
} from "../chat-abort.js";
import {
type ChatImageContent,
MediaOffloadError,
type OffloadedRef,
parseMessageWithAttachments,
resolveChatAttachmentMaxBytes,
} from "../chat-attachments.js";
import { MediaOffloadError } from "../chat-attachments.js";
import {
isToolHistoryBlockType,
projectChatDisplayMessage,
projectRecentChatDisplayMessages,
resolveEffectiveChatHistoryMaxChars,
} from "../chat-display-projection.js";
import { stripEnvelopeFromMessage } from "../chat-sanitize.js";
import { stripEnvelopeFromMessage, stripEnvelopeFromMessages } from "../chat-sanitize.js";
import { augmentChatHistoryWithCliSessionImports } from "../cli-session-history.js";
import { isSuppressedControlReplyText } from "../control-reply-text.js";
import {
@@ -771,10 +772,19 @@ function buildChatSendTranscriptMessage(params: {
}
// Stages non-image offloads into the agent sandbox synchronously so chat.send
// can surface 5xx before respond(). Throws MediaOffloadError on partial-stage
// instead of silently losing files (channel path is best-effort; chat.send is
// strong-delivery RPC). Callers MUST set ctx.MediaStaged=true when this runs
// so the dispatch pipeline skips its own stageSandboxMedia pass.
// can surface 5xx before respond(). Throws MediaOffloadError on any staging
// failure (ENOSPC / EPERM / partial-stage) so the outer chat.send handler can
// map it to UNAVAILABLE (5xx); plain Error would be misclassified as 4xx. All
// offloaded refs are cleaned up from the media store before rethrow.
// Callers MUST set ctx.MediaStaged=true when this runs so the dispatch
// pipeline skips its own stageSandboxMedia pass.
//
// Returned paths are ABSOLUTE (pointing into the sandbox workspace when sandbox
// is enabled, or the media-store origin when it is not). applyMediaUnderstanding
// runs before any further staging in get-reply.ts and uses
// `path.isAbsolute(raw) ? raw : path.resolve(raw)` against the gateway CWD, so
// any relative path here would make media-understanding target the wrong host
// path and silently skip file analysis.
async function prestageNonImageOffloads(params: {
offloadedRefs: OffloadedRef[];
cfg: OpenClawConfig;
@@ -786,46 +796,58 @@ async function prestageNonImageOffloads(params: {
return { paths: [], types: [] };
}
const workspaceDir = resolveAgentWorkspaceDir(params.cfg, params.agentId);
const sandbox = await ensureSandboxWorkspaceForSession({
config: params.cfg,
sessionKey: params.sessionKey,
workspaceDir,
});
if (!sandbox) {
return {
paths: nonImage.map((ref) => ref.path),
types: nonImage.map((ref) => ref.mimeType),
try {
const workspaceDir = resolveAgentWorkspaceDir(params.cfg, params.agentId);
const sandbox = await ensureSandboxWorkspaceForSession({
config: params.cfg,
sessionKey: params.sessionKey,
workspaceDir,
});
if (!sandbox) {
return {
paths: nonImage.map((ref) => ref.path),
types: nonImage.map((ref) => ref.mimeType),
};
}
const stagingCtx: MsgContext = {
MediaPath: nonImage[0].path,
MediaPaths: nonImage.map((ref) => ref.path),
MediaType: nonImage[0].mimeType,
MediaTypes: nonImage.map((ref) => ref.mimeType),
};
}
await stageSandboxMedia({
ctx: stagingCtx,
sessionCtx: stagingCtx as TemplateContext,
cfg: params.cfg,
sessionKey: params.sessionKey,
workspaceDir,
});
const stagingCtx: MsgContext = {
MediaPath: nonImage[0].path,
MediaPaths: nonImage.map((ref) => ref.path),
MediaType: nonImage[0].mimeType,
MediaTypes: nonImage.map((ref) => ref.mimeType),
};
await stageSandboxMedia({
ctx: stagingCtx,
sessionCtx: stagingCtx as TemplateContext,
cfg: params.cfg,
sessionKey: params.sessionKey,
workspaceDir,
});
const stagedPaths = stagingCtx.MediaPaths ?? [];
const stagedTypes = stagingCtx.MediaTypes ?? nonImage.map((ref) => ref.mimeType);
if (stagedPaths.length !== nonImage.length) {
throw new Error(
`non-image attachment staging incomplete: ${stagedPaths.length}/${nonImage.length} paths staged into sandbox workspace`,
);
}
const stagedPaths = stagingCtx.MediaPaths ?? [];
const stagedTypes = stagingCtx.MediaTypes ?? nonImage.map((ref) => ref.mimeType);
const allRewritten =
stagedPaths.length === nonImage.length && stagedPaths.every((p) => !path.isAbsolute(p));
if (!allRewritten) {
const absolutePaths = stagedPaths.map((p) =>
path.isAbsolute(p) ? p : path.join(sandbox.workspaceDir, p),
);
return { paths: absolutePaths, types: stagedTypes, workspaceDir: sandbox.workspaceDir };
} catch (err) {
await Promise.allSettled(
params.offloadedRefs.map((ref) => deleteMediaBuffer(ref.id, "inbound")),
);
if (err instanceof MediaOffloadError) {
throw err;
}
throw new MediaOffloadError(
`non-image attachment staging incomplete: ${stagedPaths.length}/${nonImage.length} paths rewritten into sandbox workspace`,
`[Gateway Error] Failed to stage non-image attachments into agent workspace: ${formatErrorMessage(err)}`,
{ cause: err },
);
}
return { paths: stagedPaths, types: stagedTypes, workspaceDir: sandbox.workspaceDir };
}
function resolveChatSendTranscriptMediaFields(savedImages: SavedMedia[]) {

View File

@@ -15,7 +15,7 @@ export { enqueueSystemEvent } from "../infra/system-events.js";
export { deleteMediaBuffer } from "../media/store.js";
export { normalizeMainKey, scopedHeartbeatWakeOptions } from "../routing/session-key.js";
export { defaultRuntime } from "../runtime.js";
export { parseMessageWithAttachments } from "./chat-attachments.js";
export { parseMessageWithAttachments, resolveChatAttachmentMaxBytes } from "./chat-attachments.js";
export { normalizeRpcAttachmentsToChatAttachments } from "./server-methods/attachment-normalize.js";
export {
loadSessionEntry,

View File

@@ -90,6 +90,7 @@ const runtimeMocks = vi.hoisted(() => ({
parseMessageWithAttachments: parseMessageWithAttachmentsMock,
registerApnsRegistration: registerApnsRegistrationMock,
requestHeartbeatNow: vi.fn(),
resolveChatAttachmentMaxBytes: vi.fn(() => 20 * 1024 * 1024),
resolveGatewayModelSupportsImages: vi.fn(
async ({
loadGatewayModelCatalog,

View File

@@ -26,6 +26,7 @@ import {
parseMessageWithAttachments,
registerApnsRegistration,
requestHeartbeatNow,
resolveChatAttachmentMaxBytes,
resolveGatewayModelSupportsImages,
resolveOutboundTarget,
resolveSessionAgentId,
@@ -434,7 +435,7 @@ export const handleNodeEvent = async (ctx: NodeEventContext, nodeId: string, evt
});
try {
const parsed = await parseMessageWithAttachments(message, normalizedAttachments, {
maxBytes: 5_000_000,
maxBytes: resolveChatAttachmentMaxBytes(cfg),
log: ctx.logGateway,
supportsInlineImages,
// server-node-events dispatches via agentCommandFromIngress which