diff --git a/CHANGELOG.md b/CHANGELOG.md index eec3a467e56..98449e58f94 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,7 @@ Docs: https://docs.openclaw.ai - BlueBubbles: include sender identity in group chat envelopes and pass clean message text to the agent prompt, aligning with iMessage/Signal formatting. (#16210) Thanks @zerone0x. - WhatsApp: honor per-account `dmPolicy` overrides (account-level settings now take precedence over channel defaults for inbound DMs). (#10082) Thanks @mcaxtr. - Media: accept `MEDIA:`-prefixed paths (lenient whitespace) when loading outbound media to prevent `ENOENT` for tool-returned local media paths. (#13107) Thanks @mcaxtr. +- Agents/Image tool: allow workspace-local image paths by including the active workspace directory in local media allowlists, and trust sandbox-validated paths in image loaders to prevent false "not under an allowed directory" rejections. (#15541) - Cron/Slack: preserve agent identity (name and icon) when cron jobs deliver outbound messages. (#16242) Thanks @robbyczgw-cla. - Cron: prevent `cron list`/`cron status` from silently skipping past-due recurring jobs by using maintenance recompute semantics. (#16156) Thanks @zerone0x. - Cron: repair missing/corrupt `nextRunAtMs` for the updated job without globally recomputing unrelated due jobs during `cron update`. (#15750) diff --git a/src/agents/openclaw-tools.ts b/src/agents/openclaw-tools.ts index 2be40ead3cc..5fc4bba92d4 100644 --- a/src/agents/openclaw-tools.ts +++ b/src/agents/openclaw-tools.ts @@ -64,6 +64,7 @@ export function createOpenClawTools(options?: { ? createImageTool({ config: options?.config, agentDir: options.agentDir, + workspaceDir: options?.workspaceDir, sandbox: options?.sandboxRoot && options?.sandboxFsBridge ? { root: options.sandboxRoot, bridge: options.sandboxFsBridge } diff --git a/src/agents/pi-embedded-runner/run/images.e2e.test.ts b/src/agents/pi-embedded-runner/run/images.e2e.test.ts index e37846e83a1..70cb663f418 100644 --- a/src/agents/pi-embedded-runner/run/images.e2e.test.ts +++ b/src/agents/pi-embedded-runner/run/images.e2e.test.ts @@ -1,5 +1,14 @@ +import fs from "node:fs/promises"; +import os from "node:os"; +import path from "node:path"; import { describe, expect, it } from "vitest"; -import { detectAndLoadPromptImages, detectImageReferences, modelSupportsImages } from "./images.js"; +import { createHostSandboxFsBridge } from "../../test-helpers/host-sandbox-fs-bridge.js"; +import { + detectAndLoadPromptImages, + detectImageReferences, + loadImageFromRef, + modelSupportsImages, +} from "./images.js"; describe("detectImageReferences", () => { it("detects absolute file paths with common extensions", () => { @@ -196,6 +205,41 @@ describe("modelSupportsImages", () => { }); }); +describe("loadImageFromRef", () => { + it("allows sandbox-validated host paths outside default media roots", async () => { + const sandboxParent = await fs.mkdtemp(path.join(os.homedir(), "openclaw-sandbox-image-")); + try { + const sandboxRoot = path.join(sandboxParent, "sandbox"); + await fs.mkdir(sandboxRoot, { recursive: true }); + const imagePath = path.join(sandboxRoot, "photo.png"); + const pngB64 = + "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/woAAn8B9FD5fHAAAAAASUVORK5CYII="; + await fs.writeFile(imagePath, Buffer.from(pngB64, "base64")); + + const image = await loadImageFromRef( + { + raw: "./photo.png", + type: "path", + resolved: "./photo.png", + }, + sandboxRoot, + { + sandbox: { + root: sandboxRoot, + bridge: createHostSandboxFsBridge(sandboxRoot), + }, + }, + ); + + expect(image).not.toBeNull(); + expect(image?.type).toBe("image"); + expect(image?.data.length).toBeGreaterThan(0); + } finally { + await fs.rm(sandboxParent, { recursive: true, force: true }); + } + }); +}); + describe("detectAndLoadPromptImages", () => { it("returns no images for non-vision models even when existing images are provided", async () => { const result = await detectAndLoadPromptImages({ diff --git a/src/agents/pi-embedded-runner/run/images.ts b/src/agents/pi-embedded-runner/run/images.ts index 076a32867e4..9b2d09e3670 100644 --- a/src/agents/pi-embedded-runner/run/images.ts +++ b/src/agents/pi-embedded-runner/run/images.ts @@ -211,6 +211,7 @@ export async function loadImageFromRef( const media = options?.sandbox ? await loadWebMedia(targetPath, { maxBytes: options.maxBytes, + localRoots: "any", readFile: (filePath) => options.sandbox!.bridge.readFile({ filePath, cwd: options.sandbox!.root }), }) diff --git a/src/agents/tools/image-tool.e2e.test.ts b/src/agents/tools/image-tool.e2e.test.ts index c979e806dfb..ee5c30a46f8 100644 --- a/src/agents/tools/image-tool.e2e.test.ts +++ b/src/agents/tools/image-tool.e2e.test.ts @@ -150,6 +150,75 @@ describe("image tool implicit imageModel config", () => { ); }); + it("allows workspace images outside default local media roots", async () => { + const workspaceParent = await fs.mkdtemp( + path.join(process.cwd(), ".openclaw-workspace-image-"), + ); + try { + const workspaceDir = path.join(workspaceParent, "workspace"); + await fs.mkdir(workspaceDir, { recursive: true }); + const imagePath = path.join(workspaceDir, "photo.png"); + const pngB64 = + "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/woAAn8B9FD5fHAAAAAASUVORK5CYII="; + await fs.writeFile(imagePath, Buffer.from(pngB64, "base64")); + + const fetch = vi.fn().mockResolvedValue({ + ok: true, + status: 200, + statusText: "OK", + headers: new Headers(), + json: async () => ({ + content: "ok", + base_resp: { status_code: 0, status_msg: "" }, + }), + }); + // @ts-expect-error partial global + global.fetch = fetch; + vi.stubEnv("MINIMAX_API_KEY", "minimax-test"); + + const agentDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-image-")); + const cfg: OpenClawConfig = { + agents: { + defaults: { + model: { primary: "minimax/MiniMax-M2.1" }, + imageModel: { primary: "minimax/MiniMax-VL-01" }, + }, + }, + }; + + const withoutWorkspace = createImageTool({ config: cfg, agentDir }); + expect(withoutWorkspace).not.toBeNull(); + if (!withoutWorkspace) { + throw new Error("expected image tool"); + } + await expect( + withoutWorkspace.execute("t0", { + prompt: "Describe the image.", + image: imagePath, + }), + ).rejects.toThrow(/Local media path is not under an allowed directory/i); + + const withWorkspace = createImageTool({ config: cfg, agentDir, workspaceDir }); + expect(withWorkspace).not.toBeNull(); + if (!withWorkspace) { + throw new Error("expected image tool"); + } + + await expect( + withWorkspace.execute("t1", { + prompt: "Describe the image.", + image: imagePath, + }), + ).resolves.toMatchObject({ + content: [{ type: "text", text: "ok" }], + }); + + expect(fetch).toHaveBeenCalledTimes(1); + } finally { + await fs.rm(workspaceParent, { recursive: true, force: true }); + } + }); + it("sandboxes image paths like the read tool", async () => { const stateDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-image-sandbox-")); const agentDir = path.join(stateDir, "agent"); diff --git a/src/agents/tools/image-tool.ts b/src/agents/tools/image-tool.ts index 45889c00005..fb7c40d202a 100644 --- a/src/agents/tools/image-tool.ts +++ b/src/agents/tools/image-tool.ts @@ -5,7 +5,7 @@ import type { OpenClawConfig } from "../../config/config.js"; import type { SandboxFsBridge } from "../sandbox/fs-bridge.js"; import type { AnyAgentTool } from "./common.js"; import { resolveUserPath } from "../../utils.js"; -import { loadWebMedia } from "../../web/media.js"; +import { getDefaultLocalRoots, loadWebMedia } from "../../web/media.js"; import { ensureAuthProfileStore, listProfilesForProvider } from "../auth-profiles.js"; import { DEFAULT_MODEL, DEFAULT_PROVIDER } from "../defaults.js"; import { minimaxUnderstandImage } from "../minimax-vlm.js"; @@ -325,6 +325,7 @@ async function runImagePrompt(params: { export function createImageTool(options?: { config?: OpenClawConfig; agentDir?: string; + workspaceDir?: string; sandbox?: ImageSandboxConfig; /** If true, the model has native vision capability and images in the prompt are auto-injected */ modelHasVision?: boolean; @@ -351,6 +352,19 @@ export function createImageTool(options?: { ? "Analyze an image with a vision model. Only use this tool when the image was NOT already provided in the user's message. Images mentioned in the prompt are automatically visible to you." : "Analyze an image with the configured image model (agents.defaults.imageModel). Provide a prompt and image path or URL."; + const localRoots = (() => { + const roots = getDefaultLocalRoots(); + const workspaceDir = options?.workspaceDir?.trim(); + if (!workspaceDir) { + return roots; + } + const normalized = workspaceDir.startsWith("~") ? resolveUserPath(workspaceDir) : workspaceDir; + if (!roots.includes(normalized)) { + roots.push(normalized); + } + return roots; + })(); + return { label: "Image", name: "image", @@ -441,10 +455,14 @@ export function createImageTool(options?: { : sandboxConfig ? await loadWebMedia(resolvedPath ?? resolvedImage, { maxBytes, + localRoots: "any", readFile: (filePath) => sandboxConfig.bridge.readFile({ filePath, cwd: sandboxConfig.root }), }) - : await loadWebMedia(resolvedPath ?? resolvedImage, maxBytes); + : await loadWebMedia(resolvedPath ?? resolvedImage, { + maxBytes, + localRoots, + }); if (media.kind !== "image") { throw new Error(`Unsupported media type: ${media.kind}`); } diff --git a/src/web/media.ts b/src/web/media.ts index cf7efd5f49c..18a8542cab8 100644 --- a/src/web/media.ts +++ b/src/web/media.ts @@ -32,7 +32,8 @@ type WebMediaOptions = { readFile?: (filePath: string) => Promise; }; -function getDefaultLocalRoots(): string[] { +<<<<<<< HEAD +export function getDefaultLocalRoots(): string[] { return [ os.tmpdir(), path.join(STATE_DIR, "media"),