diff --git a/CHANGELOG.md b/CHANGELOG.md index af50a2060eb..ee74f5839f4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -67,6 +67,7 @@ Docs: https://docs.openclaw.ai - Discord/subagents: preserve thread-bound completion delivery by keeping the requester-agent announce path primary and falling back to direct thread sends only when the announce produces no visible output. (#71064) Thanks @DolencLuka. - Browser/tool: give Chrome MCP existing-session manage calls a longer default timeout, pass explicit tool timeouts through tab management, and recover stale selected-page MCP sessions instead of forcing a manual reset. Thanks @steipete. - Plugins/Voice Call: pin voice response sessions to `responseModel` before embedded agent runs, avoiding live-session model switch failures when the global default model differs. Fixes #60118. Thanks @xinbenlv. +- Media tools: honor the configured web-fetch SSRF policy for media understanding, image/music/video generation references, and PDF inputs, so explicit RFC2544 opt-ins cover WebChat OSS uploads without weakening defaults. Fixes #71300. (#71321) Thanks @neeravmakwana. - Gateway/sessions: recover main-agent turns interrupted by a gateway restart from stale transcript-lock evidence, avoiding stuck `status: "running"` sessions without broad post-boot transcript scans. Fixes #70555. Thanks @bitloi. - Codex approvals: keep command approval responses within Codex app-server `availableDecisions`, including deny/cancel fallbacks for prompts that do not offer `decline`. (#71338) Thanks @Lucenx9. - Plugins/Google Meet: include live Chrome-node readiness in `googlemeet setup` and document the Parallels recovery checks, so stale node tokens or disconnected VM browsers are visible before an agent opens a meeting. Thanks @steipete. diff --git a/src/agents/tools/image-generate-tool.ts b/src/agents/tools/image-generate-tool.ts index 18368e8965e..d154ff789dc 100644 --- a/src/agents/tools/image-generate-tool.ts +++ b/src/agents/tools/image-generate-tool.ts @@ -38,6 +38,7 @@ import { isCapabilityProviderConfigured, normalizeMediaReferenceInputs, readGenerationTimeoutMs, + resolveRemoteMediaSsrfPolicy, resolveCapabilityModelConfigForTool, resolveGenerateAction, resolveMediaToolLocalRoots, @@ -552,7 +553,7 @@ export function createImageGenerateTool(options?: { } const effectiveCfg = applyImageGenerationModelConfigDefaults(cfg, imageGenerationModelConfig) ?? cfg; - const remoteMediaSsrfPolicy = effectiveCfg.tools?.web?.fetch?.ssrfPolicy; + const remoteMediaSsrfPolicy = resolveRemoteMediaSsrfPolicy(effectiveCfg); const sandboxConfig = options?.sandbox && options.sandbox.root.trim() ? { diff --git a/src/agents/tools/image-tool.ts b/src/agents/tools/image-tool.ts index 42bb93a9f51..a9ef33e4f7b 100644 --- a/src/agents/tools/image-tool.ts +++ b/src/agents/tools/image-tool.ts @@ -31,6 +31,7 @@ import { applyImageModelConfigDefaults, buildTextToolResult, resolveMediaToolLocalRoots, + resolveRemoteMediaSsrfPolicy, resolvePromptAndModelOverride, } from "./media-tool-shared.js"; import { @@ -307,7 +308,7 @@ export function createImageTool(options?: { if (!imageModelConfig) { return null; } - const remoteMediaSsrfPolicy = options?.config?.tools?.web?.fetch?.ssrfPolicy; + const remoteMediaSsrfPolicy = resolveRemoteMediaSsrfPolicy(options?.config); // If model has native vision, images in the prompt are auto-injected // so this tool is only needed when image wasn't provided in the prompt diff --git a/src/agents/tools/media-tool-shared.ts b/src/agents/tools/media-tool-shared.ts index c1d2fc26433..61da2c3aac0 100644 --- a/src/agents/tools/media-tool-shared.ts +++ b/src/agents/tools/media-tool-shared.ts @@ -1,6 +1,7 @@ import { type Api, type Model } from "@mariozechner/pi-ai"; import type { AgentModelConfig } from "../../config/types.agents-shared.js"; import type { OpenClawConfig } from "../../config/types.openclaw.js"; +import type { SsrFPolicy } from "../../infra/net/ssrf.js"; import { getDefaultLocalRoots } from "../../media/web-media.js"; import { readSnakeCaseParamRaw } from "../../param-key.js"; import { @@ -97,6 +98,12 @@ export function readGenerationTimeoutMs(args: Record): number | return timeoutMs; } +export function resolveRemoteMediaSsrfPolicy( + cfg: OpenClawConfig | undefined, +): SsrFPolicy | undefined { + return cfg?.tools?.web?.fetch?.ssrfPolicy; +} + function applyAgentDefaultModelConfig( cfg: OpenClawConfig | undefined, key: "imageModel" | "imageGenerationModel" | "videoGenerationModel" | "musicGenerationModel", diff --git a/src/agents/tools/music-generate-tool.ts b/src/agents/tools/music-generate-tool.ts index 8a94adde79e..648986eb25c 100644 --- a/src/agents/tools/music-generate-tool.ts +++ b/src/agents/tools/music-generate-tool.ts @@ -38,6 +38,7 @@ import { resolveCapabilityModelConfigForTool, resolveGenerateAction, resolveMediaToolLocalRoots, + resolveRemoteMediaSsrfPolicy, resolveSelectedCapabilityProvider, } from "./media-tool-shared.js"; import { type ToolModelConfig } from "./model-config.helpers.js"; @@ -556,7 +557,7 @@ export function createMusicGenerateTool(options?: { musicGenerationModelConfig, modelOverride: model, }); - const remoteMediaSsrfPolicy = effectiveCfg.tools?.web?.fetch?.ssrfPolicy; + const remoteMediaSsrfPolicy = resolveRemoteMediaSsrfPolicy(effectiveCfg); const loadedReferenceImages = await loadReferenceImages({ inputs: imageInputs, workspaceDir: options?.workspaceDir, diff --git a/src/agents/tools/pdf-tool.test.ts b/src/agents/tools/pdf-tool.test.ts index fa0da15f079..6655488bca6 100644 --- a/src/agents/tools/pdf-tool.test.ts +++ b/src/agents/tools/pdf-tool.test.ts @@ -252,6 +252,39 @@ describe("createPdfTool", () => { }); }); + it("passes web_fetch SSRF policy when loading remote PDFs", async () => { + await withTempPdfAgentDir(async (agentDir) => { + const { loadSpy } = await stubPdfToolInfra(agentDir, { + provider: "anthropic", + input: ["text", "document"], + }); + vi.spyOn(pdfNativeProviders, "anthropicAnalyzePdf").mockResolvedValue("native summary"); + const cfg: OpenClawConfig = { + ...withPdfModel(ANTHROPIC_PDF_MODEL), + tools: { + web: { + fetch: { + ssrfPolicy: { allowRfc2544BenchmarkRange: true }, + }, + }, + }, + }; + const tool = requirePdfTool((await loadCreatePdfTool())({ config: cfg, agentDir })); + + await tool.execute("t1", { + prompt: "summarize", + pdf: "http://198.18.0.153/doc.pdf", + }); + + expect(loadSpy).toHaveBeenCalledWith( + "http://198.18.0.153/doc.pdf", + expect.objectContaining({ + ssrfPolicy: { allowRfc2544BenchmarkRange: true }, + }), + ); + }); + }); + it("allows managed inbound absolute PDF paths when workspaceOnly is enabled", async () => { await withManagedInboundPdf(async ({ mediaPath }) => { await withTempPdfAgentDir(async (agentDir) => { diff --git a/src/agents/tools/pdf-tool.ts b/src/agents/tools/pdf-tool.ts index f0edc9f8096..88461446103 100644 --- a/src/agents/tools/pdf-tool.ts +++ b/src/agents/tools/pdf-tool.ts @@ -20,6 +20,7 @@ import { resolveMediaToolLocalRoots, resolveModelRuntimeApiKey, resolvePromptAndModelOverride, + resolveRemoteMediaSsrfPolicy, } from "./media-tool-shared.js"; import { anthropicAnalyzePdf, geminiAnalyzePdf } from "./pdf-native-providers.js"; import { @@ -277,6 +278,7 @@ export function createPdfTool(options?: { const description = "Analyze one or more PDF documents with a model. Supports native PDF analysis for Anthropic and Google models, with text/image extraction fallback for other providers. Use pdf for a single path/URL, or pdfs for multiple (up to 10). Provide a prompt describing what to analyze."; + const remoteMediaSsrfPolicy = resolveRemoteMediaSsrfPolicy(options?.config); return { label: "PDF", @@ -393,6 +395,7 @@ export function createPdfTool(options?: { : await loadWebMediaRaw(resolvedPathInfo.resolved, { maxBytes, localRoots, + ssrfPolicy: remoteMediaSsrfPolicy, }); if (media.kind !== "document") { diff --git a/src/agents/tools/video-generate-tool.ts b/src/agents/tools/video-generate-tool.ts index 5b8c7d9fc91..045f4be33b6 100644 --- a/src/agents/tools/video-generate-tool.ts +++ b/src/agents/tools/video-generate-tool.ts @@ -41,6 +41,7 @@ import { resolveCapabilityModelConfigForTool, resolveGenerateAction, resolveMediaToolLocalRoots, + resolveRemoteMediaSsrfPolicy, resolveSelectedCapabilityProvider, } from "./media-tool-shared.js"; import { type ToolModelConfig } from "./model-config.helpers.js"; @@ -813,7 +814,7 @@ export function createVideoGenerateTool(options?: { const action = resolveAction(args); const effectiveCfg = applyVideoGenerationModelConfigDefaults(cfg, videoGenerationModelConfig) ?? cfg; - const remoteMediaSsrfPolicy = effectiveCfg.tools?.web?.fetch?.ssrfPolicy; + const remoteMediaSsrfPolicy = resolveRemoteMediaSsrfPolicy(effectiveCfg); if (action === "list") { return createVideoGenerateListActionResult(effectiveCfg);