mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 08:50:43 +00:00
fix(gateway): keep sandbox-relative MediaPaths and pass workspace context
Root cause: ctx.MediaPaths was overloaded with two incompatible meanings —
sandbox-relative for the agent runtime, host-absolute for host-side
media-understanding. The previous "absolutize in chat.send + set
MediaStaged=true" path made media-understanding work but shipped an
unreadable host path to the agent inside the sandbox.
- Keep ctx.MediaPaths sandbox-relative after prestage; carry a separate
ctx.MediaWorkspaceDir so host-side media-understanding can still resolve
the staged files via localPathRoots / attachment cache.
- stageSandboxMedia returns an authoritative {source -> relpath} map so
prestageNonImageOffloads detects partial staging failures (files admitted
by the 20MB RPC cap but rejected by the 5MB staging cap) and surfaces
them as 5xx MediaOffloadError UNAVAILABLE.
- Reject images above MAX_IMAGE_BYTES at parse time: the agent-side
hydration path drops them silently otherwise, producing a successful
response with a missing image.
- Scope imageOrder to image offloads only and split persistChatSendImages
offloaded refs by mime so non-image files append to the transcript tail
instead of consuming image slots in mixed batches.
Signed-off-by: samzong <samzong.lu@gmail.com>
This commit is contained in:
@@ -19,18 +19,31 @@ import type { MsgContext, TemplateContext } from "../templating.js";
|
||||
|
||||
const STAGED_MEDIA_MAX_BYTES = MEDIA_MAX_BYTES;
|
||||
|
||||
// `staged` maps every absolute source path that was copied into the sandbox
|
||||
// (or remote cache) to its rewritten ctx path. Callers like chat.send's
|
||||
// prestage use this to detect partial failures: unstaged sources keep their
|
||||
// original absolute path in ctx.MediaPaths, so a length check against the
|
||||
// input cannot distinguish "everything staged" from "silently skipped some"
|
||||
// (e.g. the 5MB cap in STAGED_MEDIA_MAX_BYTES rejecting files that the
|
||||
// chat.send RPC already admitted under its 20MB cap).
|
||||
export type StageSandboxMediaResult = {
|
||||
staged: ReadonlyMap<string, string>;
|
||||
};
|
||||
|
||||
const EMPTY_STAGE_RESULT: StageSandboxMediaResult = { staged: new Map() };
|
||||
|
||||
export async function stageSandboxMedia(params: {
|
||||
ctx: MsgContext;
|
||||
sessionCtx: TemplateContext;
|
||||
cfg: OpenClawConfig;
|
||||
sessionKey?: string;
|
||||
workspaceDir: string;
|
||||
}) {
|
||||
}): Promise<StageSandboxMediaResult> {
|
||||
const { ctx, sessionCtx, cfg, sessionKey, workspaceDir } = params;
|
||||
const hasPathsArray = Array.isArray(ctx.MediaPaths) && ctx.MediaPaths.length > 0;
|
||||
const rawPaths = resolveRawPaths(ctx);
|
||||
if (rawPaths.length === 0 || !sessionKey) {
|
||||
return;
|
||||
return EMPTY_STAGE_RESULT;
|
||||
}
|
||||
|
||||
const sandbox = await ensureSandboxWorkspaceForSession({
|
||||
@@ -45,7 +58,7 @@ export async function stageSandboxMedia(params: {
|
||||
: null;
|
||||
const effectiveWorkspaceDir = sandbox?.workspaceDir ?? remoteMediaCacheDir;
|
||||
if (!effectiveWorkspaceDir) {
|
||||
return;
|
||||
return EMPTY_STAGE_RESULT;
|
||||
}
|
||||
|
||||
await fs.mkdir(effectiveWorkspaceDir, { recursive: true });
|
||||
@@ -116,6 +129,8 @@ export async function stageSandboxMedia(params: {
|
||||
staged,
|
||||
hasPathsArray,
|
||||
});
|
||||
|
||||
return { staged };
|
||||
}
|
||||
|
||||
async function stageLocalFileIntoRoot(params: {
|
||||
|
||||
@@ -20,6 +20,7 @@ vi.mock("../media/store.js", async (importOriginal) => {
|
||||
});
|
||||
|
||||
import type { OpenClawConfig } from "../config/types.openclaw.js";
|
||||
import { MAX_IMAGE_BYTES } from "../media/constants.js";
|
||||
import {
|
||||
buildMessageWithAttachments,
|
||||
type ChatAttachment,
|
||||
@@ -192,7 +193,56 @@ describe("parseMessageWithAttachments", () => {
|
||||
expect(parsed.images[0]?.mimeType).toBe("image/png");
|
||||
expect(parsed.offloadedRefs).toHaveLength(1);
|
||||
expect(parsed.offloadedRefs[0]?.mimeType).toBe("application/pdf");
|
||||
expect(parsed.imageOrder).toEqual(["inline"]);
|
||||
});
|
||||
|
||||
it("excludes non-image offloads from imageOrder in mixed batches", async () => {
|
||||
// Regression: a prior revision pushed "offloaded" for every offload,
|
||||
// including non-image files. In a [non-image, inline, offloaded-image]
|
||||
// batch that produced imageOrder=["offloaded","inline","offloaded"] even
|
||||
// though only one `[media attached: media://...]` line is ever appended
|
||||
// to the prompt (for the image offload). extractTrailingAttachmentMediaUris
|
||||
// then read count=2 against one trailing URI, and
|
||||
// mergePromptAttachmentImages placed the single offloaded image into the
|
||||
// first "offloaded" slot — swapping it ahead of the inline image.
|
||||
const pdf = Buffer.from("%PDF-1.4\n").toString("base64");
|
||||
const bigPng = Buffer.alloc(2_100_000);
|
||||
bigPng.set([0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a], 0);
|
||||
const { parsed } = await parseWithWarnings("x", [
|
||||
{ type: "file", mimeType: "application/pdf", fileName: "report.pdf", content: pdf },
|
||||
{ type: "image", mimeType: "image/png", fileName: "dot.png", content: PNG_1x1 },
|
||||
{
|
||||
type: "image",
|
||||
mimeType: "image/png",
|
||||
fileName: "big.png",
|
||||
content: bigPng.toString("base64"),
|
||||
},
|
||||
]);
|
||||
expect(parsed.images).toHaveLength(1);
|
||||
expect(parsed.offloadedRefs.map((ref) => ref.mimeType)).toEqual([
|
||||
"application/pdf",
|
||||
"image/png",
|
||||
]);
|
||||
expect(parsed.imageOrder).toEqual(["inline", "offloaded"]);
|
||||
// The offloaded-image URI is the sole trailing media:// line, matching
|
||||
// imageOrder's single "offloaded" slot.
|
||||
const trailingMediaLines = parsed.message
|
||||
.split("\n")
|
||||
.filter((line) => line.trim().startsWith("[media attached: media://inbound/"));
|
||||
expect(trailingMediaLines).toHaveLength(1);
|
||||
});
|
||||
|
||||
it("rejects oversized images before offload", async () => {
|
||||
const big = Buffer.alloc(MAX_IMAGE_BYTES + 1, 1).toString("base64");
|
||||
|
||||
await expect(
|
||||
parseMessageWithAttachments(
|
||||
"x",
|
||||
[{ type: "image", mimeType: "image/png", fileName: "huge.png", content: big }],
|
||||
{ maxBytes: resolveChatAttachmentMaxBytes({} as OpenClawConfig), log: { warn: () => {} } },
|
||||
),
|
||||
).rejects.toThrow(/image exceeds size limit/i);
|
||||
expect(saveMediaBufferMock).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("preserves specific OOXML mime when sniff returns generic zip (docx)", async () => {
|
||||
|
||||
@@ -296,6 +296,13 @@ export async function parseMessageWithAttachments(
|
||||
`attachment ${label}: non-image attachments (${finalMime}) are not supported on this entrypoint`,
|
||||
);
|
||||
}
|
||||
// Agent-side hydration (loadImageFromRef via optimizeAndClampImage / GIF
|
||||
// direct compare) caps at MAX_IMAGE_BYTES. Accepting images above that
|
||||
// would offload a file the runner later drops to null — a successful
|
||||
// response with a silently missing image. Reject here so the client
|
||||
// sees an explicit 4xx. Non-image attachments keep the full maxBytes
|
||||
// ceiling because their host path (ctx.MediaPaths → Read/Bash) doesn't
|
||||
// load into the model.
|
||||
if (isImage && sizeBytes > MAX_IMAGE_BYTES) {
|
||||
throw new Error(
|
||||
`attachment ${label}: image exceeds size limit (${sizeBytes} > ${MAX_IMAGE_BYTES} bytes)`,
|
||||
|
||||
@@ -69,6 +69,11 @@ const mockState = vi.hoisted(() => ({
|
||||
sandboxWorkspace: null as { workspaceDir: string; containerWorkdir?: string } | null,
|
||||
stageSandboxMediaError: null as Error | null,
|
||||
stagedRelativePaths: null as string[] | null,
|
||||
// `unstagedSources` lets tests simulate partial staging failure: absolute
|
||||
// source paths listed here are excluded from the returned `staged` map even
|
||||
// though ctx still carries their rewritten paths. This mirrors how the real
|
||||
// stageSandboxMedia silently skips over-cap files.
|
||||
unstagedSources: null as string[] | null,
|
||||
deleteMediaBufferCalls: [] as Array<{ id: string; subdir?: string }>,
|
||||
}));
|
||||
|
||||
@@ -236,10 +241,26 @@ vi.mock("../../auto-reply/reply/stage-sandbox-media.js", () => ({
|
||||
if (mockState.stageSandboxMediaError) {
|
||||
throw mockState.stageSandboxMediaError;
|
||||
}
|
||||
const staged = new Map<string, string>();
|
||||
const originalPaths = params.ctx.MediaPaths ?? [];
|
||||
if (mockState.stagedRelativePaths) {
|
||||
params.ctx.MediaPaths = [...mockState.stagedRelativePaths];
|
||||
params.ctx.MediaPath = mockState.stagedRelativePaths[0];
|
||||
const mapping = mockState.stagedRelativePaths;
|
||||
params.ctx.MediaPaths = [...mapping];
|
||||
params.ctx.MediaPath = mapping[0];
|
||||
for (let i = 0; i < mapping.length; i += 1) {
|
||||
const source = originalPaths[i];
|
||||
const dest = mapping[i];
|
||||
if (source && dest) {
|
||||
staged.set(source, dest);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (mockState.unstagedSources) {
|
||||
for (const source of mockState.unstagedSources) {
|
||||
staged.delete(source);
|
||||
}
|
||||
}
|
||||
return { staged };
|
||||
},
|
||||
),
|
||||
}));
|
||||
@@ -499,6 +520,7 @@ describe("chat directive tag stripping for non-streaming final payloads", () =>
|
||||
mockState.sandboxWorkspace = null;
|
||||
mockState.stageSandboxMediaError = null;
|
||||
mockState.stagedRelativePaths = null;
|
||||
mockState.unstagedSources = null;
|
||||
mockState.deleteMediaBufferCalls = [];
|
||||
});
|
||||
|
||||
@@ -2557,7 +2579,7 @@ describe("chat directive tag stripping for non-streaming final payloads", () =>
|
||||
expect(mockState.lastDispatchCtx?.MediaStaged).toBe(true);
|
||||
});
|
||||
|
||||
it("absolutizes sandbox-relative MediaPaths so media-understanding can resolve them", async () => {
|
||||
it("preserves sandbox-relative MediaPaths and stores workspace context for media-understanding", async () => {
|
||||
createTranscriptFixture("openclaw-chat-send-non-image-absolutize-");
|
||||
mockState.finalText = "ok";
|
||||
mockState.sessionEntry = {
|
||||
@@ -2599,15 +2621,9 @@ describe("chat directive tag stripping for non-streaming final payloads", () =>
|
||||
expectBroadcast: false,
|
||||
});
|
||||
|
||||
// applyMediaUnderstanding resolves `path.isAbsolute(raw) ? raw : path.resolve(raw)`
|
||||
// against the gateway CWD. Relative paths would miss the staged copy, so
|
||||
// prestage rejoins the sandbox workspaceDir to keep the path absolute.
|
||||
expect(mockState.lastDispatchCtx?.MediaPaths).toEqual([
|
||||
"/sandbox/workspace/media/inbound/report.pdf",
|
||||
]);
|
||||
expect(mockState.lastDispatchCtx?.MediaPath).toBe(
|
||||
"/sandbox/workspace/media/inbound/report.pdf",
|
||||
);
|
||||
expect(mockState.lastDispatchCtx?.MediaPaths).toEqual(["media/inbound/report.pdf"]);
|
||||
expect(mockState.lastDispatchCtx?.MediaPath).toBe("media/inbound/report.pdf");
|
||||
expect(mockState.lastDispatchCtx?.MediaWorkspaceDir).toBe("/sandbox/workspace");
|
||||
expect(mockState.lastDispatchCtx?.MediaStaged).toBe(true);
|
||||
});
|
||||
|
||||
@@ -2670,6 +2686,66 @@ describe("chat directive tag stripping for non-streaming final payloads", () =>
|
||||
expect(mockState.deleteMediaBufferCalls).toEqual([{ id: "saved-media", subdir: "inbound" }]);
|
||||
});
|
||||
|
||||
it("surfaces partial non-image staging failures as 5xx UNAVAILABLE", async () => {
|
||||
// Regression: stageSandboxMedia keeps unstaged entries as their original
|
||||
// absolute path, so a simple `stagedPaths.length === nonImage.length`
|
||||
// check could not detect when one of the files silently fell out (e.g. a
|
||||
// file between the RPC cap and the staging cap). Prestage must compare
|
||||
// the returned `staged` map against the input refs.
|
||||
createTranscriptFixture("openclaw-chat-send-partial-stage-");
|
||||
mockState.finalText = "ok";
|
||||
mockState.sessionEntry = {
|
||||
modelProvider: "test-provider",
|
||||
model: "vision-model",
|
||||
};
|
||||
mockState.modelCatalog = [
|
||||
{
|
||||
provider: "test-provider",
|
||||
id: "vision-model",
|
||||
name: "Vision model",
|
||||
input: ["text", "image"],
|
||||
},
|
||||
];
|
||||
mockState.savedMediaResults = [
|
||||
{ path: "/home/user/.openclaw/media/inbound/report.pdf", contentType: "application/pdf" },
|
||||
{ path: "/home/user/.openclaw/media/inbound/oversize.pdf", contentType: "application/pdf" },
|
||||
];
|
||||
mockState.sandboxWorkspace = { workspaceDir: "/sandbox/workspace" };
|
||||
mockState.stagedRelativePaths = ["media/inbound/report.pdf", "media/inbound/oversize.pdf"];
|
||||
mockState.unstagedSources = ["/home/user/.openclaw/media/inbound/oversize.pdf"];
|
||||
const respond = vi.fn();
|
||||
const context = createChatContext();
|
||||
const pdf = Buffer.from("%PDF-1.4\n").toString("base64");
|
||||
|
||||
await runNonStreamingChatSend({
|
||||
context,
|
||||
respond,
|
||||
idempotencyKey: "idem-partial-stage",
|
||||
message: "read these",
|
||||
requestParams: {
|
||||
attachments: [
|
||||
{ type: "file", mimeType: "application/pdf", fileName: "report.pdf", content: pdf },
|
||||
{ type: "file", mimeType: "application/pdf", fileName: "oversize.pdf", content: pdf },
|
||||
],
|
||||
},
|
||||
expectBroadcast: false,
|
||||
waitFor: "none",
|
||||
});
|
||||
|
||||
expect(mockState.lastDispatchCtx).toBeUndefined();
|
||||
expect(respond).toHaveBeenCalledTimes(1);
|
||||
const [ok, payload, error] = respond.mock.calls[0] ?? [];
|
||||
expect(ok).toBe(false);
|
||||
expect(payload).toBeUndefined();
|
||||
expect(error?.code).toBe(ErrorCodes.UNAVAILABLE);
|
||||
expect(error?.message ?? String(error)).toMatch(/staging incomplete/i);
|
||||
// Both media-store entries are cleaned up before the 5xx surfaces.
|
||||
expect(mockState.deleteMediaBufferCalls.map((c) => c.id).toSorted()).toEqual([
|
||||
"saved-media",
|
||||
"saved-media",
|
||||
]);
|
||||
});
|
||||
|
||||
it("passes imageOrder for mixed inline and offloaded chat.send attachments", async () => {
|
||||
createTranscriptFixture("openclaw-chat-send-image-order-");
|
||||
mockState.finalText = "ok";
|
||||
|
||||
@@ -717,14 +717,28 @@ async function persistChatSendImages(params: {
|
||||
);
|
||||
}
|
||||
}
|
||||
const offloadedSaved = params.offloadedRefs.map((ref) => ({
|
||||
id: ref.id,
|
||||
path: ref.path,
|
||||
size: 0,
|
||||
contentType: ref.mimeType,
|
||||
}));
|
||||
// imageOrder now only tracks image slots (see chat-attachments.ts), so split
|
||||
// offloaded refs by mime: image offloads interleave with inline images via
|
||||
// imageOrder, and non-image offloads append to the transcript tail. Without
|
||||
// this split a non-image file would consume the next image slot whenever
|
||||
// both kinds appear in the same request.
|
||||
const imageOffloadedSaved: SavedMedia[] = [];
|
||||
const nonImageOffloadedSaved: SavedMedia[] = [];
|
||||
for (const ref of params.offloadedRefs) {
|
||||
const entry: SavedMedia = {
|
||||
id: ref.id,
|
||||
path: ref.path,
|
||||
size: 0,
|
||||
contentType: ref.mimeType,
|
||||
};
|
||||
if (ref.mimeType.startsWith("image/")) {
|
||||
imageOffloadedSaved.push(entry);
|
||||
} else {
|
||||
nonImageOffloadedSaved.push(entry);
|
||||
}
|
||||
}
|
||||
if (params.imageOrder.length === 0) {
|
||||
return [...inlineSaved, ...offloadedSaved];
|
||||
return [...inlineSaved, ...imageOffloadedSaved, ...nonImageOffloadedSaved];
|
||||
}
|
||||
const saved: SavedMedia[] = [];
|
||||
let inlineIndex = 0;
|
||||
@@ -737,7 +751,7 @@ async function persistChatSendImages(params: {
|
||||
}
|
||||
continue;
|
||||
}
|
||||
const offloaded = offloadedSaved[offloadedIndex++];
|
||||
const offloaded = imageOffloadedSaved[offloadedIndex++];
|
||||
if (offloaded) {
|
||||
saved.push(offloaded);
|
||||
}
|
||||
@@ -748,12 +762,15 @@ async function persistChatSendImages(params: {
|
||||
saved.push(inline);
|
||||
}
|
||||
}
|
||||
for (; offloadedIndex < offloadedSaved.length; offloadedIndex++) {
|
||||
const offloaded = offloadedSaved[offloadedIndex];
|
||||
for (; offloadedIndex < imageOffloadedSaved.length; offloadedIndex++) {
|
||||
const offloaded = imageOffloadedSaved[offloadedIndex];
|
||||
if (offloaded) {
|
||||
saved.push(offloaded);
|
||||
}
|
||||
}
|
||||
for (const offloaded of nonImageOffloadedSaved) {
|
||||
saved.push(offloaded);
|
||||
}
|
||||
return saved;
|
||||
}
|
||||
|
||||
@@ -816,7 +833,7 @@ async function prestageNonImageOffloads(params: {
|
||||
MediaType: nonImage[0].mimeType,
|
||||
MediaTypes: nonImage.map((ref) => ref.mimeType),
|
||||
};
|
||||
await stageSandboxMedia({
|
||||
const stageResult = await stageSandboxMedia({
|
||||
ctx: stagingCtx,
|
||||
sessionCtx: stagingCtx as TemplateContext,
|
||||
cfg: params.cfg,
|
||||
@@ -824,18 +841,26 @@ async function prestageNonImageOffloads(params: {
|
||||
workspaceDir,
|
||||
});
|
||||
|
||||
const stagedPaths = stagingCtx.MediaPaths ?? [];
|
||||
const stagedTypes = stagingCtx.MediaTypes ?? nonImage.map((ref) => ref.mimeType);
|
||||
if (stagedPaths.length !== nonImage.length) {
|
||||
// stageSandboxMedia silently keeps unstaged entries as their original
|
||||
// absolute path, so length parity with `nonImage` does not prove every
|
||||
// file landed in the sandbox. The RPC max (20MB via
|
||||
// resolveChatAttachmentMaxBytes) admits files above the staging cap
|
||||
// (STAGED_MEDIA_MAX_BYTES = 5MB); check the returned `staged` map so any
|
||||
// missing source becomes a 5xx MediaOffloadError the client can retry.
|
||||
const stagedSources = stageResult.staged;
|
||||
const missing = nonImage.filter((ref) => !stagedSources.has(ref.path));
|
||||
if (missing.length > 0) {
|
||||
throw new Error(
|
||||
`non-image attachment staging incomplete: ${stagedPaths.length}/${nonImage.length} paths staged into sandbox workspace`,
|
||||
`non-image attachment staging incomplete: ${stagedSources.size}/${nonImage.length} paths staged into sandbox workspace (missing: ${missing.map((ref) => ref.path).join(", ")})`,
|
||||
);
|
||||
}
|
||||
const stagedPaths = stagingCtx.MediaPaths ?? [];
|
||||
const stagedTypes = stagingCtx.MediaTypes ?? nonImage.map((ref) => ref.mimeType);
|
||||
|
||||
const absolutePaths = stagedPaths.map((p) =>
|
||||
path.isAbsolute(p) ? p : path.join(sandbox.workspaceDir, p),
|
||||
);
|
||||
return { paths: absolutePaths, types: stagedTypes, workspaceDir: sandbox.workspaceDir };
|
||||
// Keep stagedPaths sandbox-relative (e.g. `media/inbound/foo.pdf`) so the
|
||||
// agent inside the container can read them. Host-side media-understanding
|
||||
// resolves them via ctx.MediaWorkspaceDir, which we carry separately.
|
||||
return { paths: stagedPaths, types: stagedTypes, workspaceDir: sandbox.workspaceDir };
|
||||
} catch (err) {
|
||||
await Promise.allSettled(
|
||||
params.offloadedRefs.map((ref) => deleteMediaBuffer(ref.id, "inbound")),
|
||||
|
||||
@@ -536,6 +536,7 @@ export async function applyMediaUnderstanding(params: {
|
||||
const cache = createMediaAttachmentCache(attachments, {
|
||||
localPathRoots: resolveMediaAttachmentLocalRoots({ cfg, ctx }),
|
||||
ssrfPolicy: cfg.tools?.web?.fetch?.ssrfPolicy,
|
||||
workspaceDir: ctx.MediaWorkspaceDir,
|
||||
});
|
||||
|
||||
try {
|
||||
|
||||
@@ -53,6 +53,7 @@ export type MediaAttachmentCacheOptions = {
|
||||
localPathRoots?: readonly string[];
|
||||
includeDefaultLocalPathRoots?: boolean;
|
||||
ssrfPolicy?: SsrFPolicy;
|
||||
workspaceDir?: string;
|
||||
};
|
||||
|
||||
function resolveRequestUrl(input: RequestInfo | URL): string {
|
||||
@@ -70,6 +71,7 @@ export class MediaAttachmentCache {
|
||||
private readonly attachments: MediaAttachment[];
|
||||
private readonly localPathRoots: readonly string[];
|
||||
private readonly ssrfPolicy: SsrFPolicy | undefined;
|
||||
private readonly workspaceDir?: string;
|
||||
private canonicalLocalPathRoots?: Promise<readonly string[]>;
|
||||
|
||||
constructor(attachments: MediaAttachment[], options?: MediaAttachmentCacheOptions) {
|
||||
@@ -79,6 +81,7 @@ export class MediaAttachmentCache {
|
||||
options?.includeDefaultLocalPathRoots === false
|
||||
? mergeInboundPathRoots(options.localPathRoots)
|
||||
: mergeInboundPathRoots(options?.localPathRoots, getDefaultLocalPathRoots());
|
||||
this.workspaceDir = options?.workspaceDir ? path.resolve(options.workspaceDir) : undefined;
|
||||
for (const attachment of attachments) {
|
||||
this.entries.set(attachment.index, { attachment });
|
||||
}
|
||||
@@ -293,7 +296,7 @@ export class MediaAttachmentCache {
|
||||
if (!rawPath) {
|
||||
return undefined;
|
||||
}
|
||||
return path.isAbsolute(rawPath) ? rawPath : path.resolve(rawPath);
|
||||
return this.workspaceDir ? path.resolve(this.workspaceDir, rawPath) : path.resolve(rawPath);
|
||||
}
|
||||
|
||||
private async ensureLocalStat(entry: AttachmentCacheEntry): Promise<number | undefined> {
|
||||
|
||||
@@ -97,6 +97,23 @@ describe("media understanding attachments SSRF", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("resolves relative attachment paths against the provided workspaceDir", async () => {
|
||||
await withTempDir({ prefix: "openclaw-media-cache-workspace-" }, async (base) => {
|
||||
const workspaceDir = path.join(base, "workspace");
|
||||
const attachmentPath = path.join(workspaceDir, "media", "inbound", "report.pdf");
|
||||
await fs.mkdir(path.dirname(attachmentPath), { recursive: true });
|
||||
await fs.writeFile(attachmentPath, "ok");
|
||||
|
||||
const cache = new MediaAttachmentCache([{ index: 0, path: "media/inbound/report.pdf" }], {
|
||||
localPathRoots: [workspaceDir],
|
||||
workspaceDir,
|
||||
});
|
||||
|
||||
const result = await cache.getBuffer({ attachmentIndex: 0, maxBytes: 1024, timeoutMs: 1000 });
|
||||
expect(result.buffer.toString()).toBe("ok");
|
||||
});
|
||||
});
|
||||
|
||||
it("blocks local attachments outside configured roots", async () => {
|
||||
if (process.platform === "win32") {
|
||||
return;
|
||||
|
||||
@@ -270,8 +270,15 @@ export function resolveMediaAttachmentLocalRoots(params: {
|
||||
cfg: OpenClawConfig;
|
||||
ctx: MsgContext;
|
||||
}): readonly string[] {
|
||||
// ctx.MediaWorkspaceDir is set by chat.send's prestageNonImageOffloads when
|
||||
// inbound attachments were staged into a sandbox workspace. The paths in
|
||||
// ctx.MediaPaths are kept sandbox-relative (so the agent inside the
|
||||
// container can read them), and the workspace dir is carried separately so
|
||||
// host-side media-understanding can still resolve them via this root list.
|
||||
const workspaceDir = params.ctx.MediaWorkspaceDir;
|
||||
return mergeInboundPathRoots(
|
||||
getDefaultMediaLocalRoots(),
|
||||
workspaceDir ? [path.resolve(workspaceDir)] : undefined,
|
||||
resolveChannelInboundAttachmentRoots(params),
|
||||
);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user