fix: honor media SSRF policy for PDF inputs (#71321)

This commit is contained in:
Peter Steinberger
2026-04-25 02:50:29 +01:00
parent f9cb942aa9
commit d99d9eda37
8 changed files with 52 additions and 4 deletions

View File

@@ -67,6 +67,7 @@ Docs: https://docs.openclaw.ai
- Discord/subagents: preserve thread-bound completion delivery by keeping the requester-agent announce path primary and falling back to direct thread sends only when the announce produces no visible output. (#71064) Thanks @DolencLuka.
- Browser/tool: give Chrome MCP existing-session manage calls a longer default timeout, pass explicit tool timeouts through tab management, and recover stale selected-page MCP sessions instead of forcing a manual reset. Thanks @steipete.
- Plugins/Voice Call: pin voice response sessions to `responseModel` before embedded agent runs, avoiding live-session model switch failures when the global default model differs. Fixes #60118. Thanks @xinbenlv.
- Media tools: honor the configured web-fetch SSRF policy for media understanding, image/music/video generation references, and PDF inputs, so explicit RFC2544 opt-ins cover WebChat OSS uploads without weakening defaults. Fixes #71300. (#71321) Thanks @neeravmakwana.
- Gateway/sessions: recover main-agent turns interrupted by a gateway restart from stale transcript-lock evidence, avoiding stuck `status: "running"` sessions without broad post-boot transcript scans. Fixes #70555. Thanks @bitloi.
- Codex approvals: keep command approval responses within Codex app-server `availableDecisions`, including deny/cancel fallbacks for prompts that do not offer `decline`. (#71338) Thanks @Lucenx9.
- Plugins/Google Meet: include live Chrome-node readiness in `googlemeet setup` and document the Parallels recovery checks, so stale node tokens or disconnected VM browsers are visible before an agent opens a meeting. Thanks @steipete.

View File

@@ -38,6 +38,7 @@ import {
isCapabilityProviderConfigured,
normalizeMediaReferenceInputs,
readGenerationTimeoutMs,
resolveRemoteMediaSsrfPolicy,
resolveCapabilityModelConfigForTool,
resolveGenerateAction,
resolveMediaToolLocalRoots,
@@ -552,7 +553,7 @@ export function createImageGenerateTool(options?: {
}
const effectiveCfg =
applyImageGenerationModelConfigDefaults(cfg, imageGenerationModelConfig) ?? cfg;
const remoteMediaSsrfPolicy = effectiveCfg.tools?.web?.fetch?.ssrfPolicy;
const remoteMediaSsrfPolicy = resolveRemoteMediaSsrfPolicy(effectiveCfg);
const sandboxConfig =
options?.sandbox && options.sandbox.root.trim()
? {

View File

@@ -31,6 +31,7 @@ import {
applyImageModelConfigDefaults,
buildTextToolResult,
resolveMediaToolLocalRoots,
resolveRemoteMediaSsrfPolicy,
resolvePromptAndModelOverride,
} from "./media-tool-shared.js";
import {
@@ -307,7 +308,7 @@ export function createImageTool(options?: {
if (!imageModelConfig) {
return null;
}
const remoteMediaSsrfPolicy = options?.config?.tools?.web?.fetch?.ssrfPolicy;
const remoteMediaSsrfPolicy = resolveRemoteMediaSsrfPolicy(options?.config);
// If model has native vision, images in the prompt are auto-injected
// so this tool is only needed when image wasn't provided in the prompt

View File

@@ -1,6 +1,7 @@
import { type Api, type Model } from "@mariozechner/pi-ai";
import type { AgentModelConfig } from "../../config/types.agents-shared.js";
import type { OpenClawConfig } from "../../config/types.openclaw.js";
import type { SsrFPolicy } from "../../infra/net/ssrf.js";
import { getDefaultLocalRoots } from "../../media/web-media.js";
import { readSnakeCaseParamRaw } from "../../param-key.js";
import {
@@ -97,6 +98,12 @@ export function readGenerationTimeoutMs(args: Record<string, unknown>): number |
return timeoutMs;
}
export function resolveRemoteMediaSsrfPolicy(
cfg: OpenClawConfig | undefined,
): SsrFPolicy | undefined {
return cfg?.tools?.web?.fetch?.ssrfPolicy;
}
function applyAgentDefaultModelConfig(
cfg: OpenClawConfig | undefined,
key: "imageModel" | "imageGenerationModel" | "videoGenerationModel" | "musicGenerationModel",

View File

@@ -38,6 +38,7 @@ import {
resolveCapabilityModelConfigForTool,
resolveGenerateAction,
resolveMediaToolLocalRoots,
resolveRemoteMediaSsrfPolicy,
resolveSelectedCapabilityProvider,
} from "./media-tool-shared.js";
import { type ToolModelConfig } from "./model-config.helpers.js";
@@ -556,7 +557,7 @@ export function createMusicGenerateTool(options?: {
musicGenerationModelConfig,
modelOverride: model,
});
const remoteMediaSsrfPolicy = effectiveCfg.tools?.web?.fetch?.ssrfPolicy;
const remoteMediaSsrfPolicy = resolveRemoteMediaSsrfPolicy(effectiveCfg);
const loadedReferenceImages = await loadReferenceImages({
inputs: imageInputs,
workspaceDir: options?.workspaceDir,

View File

@@ -252,6 +252,39 @@ describe("createPdfTool", () => {
});
});
it("passes web_fetch SSRF policy when loading remote PDFs", async () => {
await withTempPdfAgentDir(async (agentDir) => {
const { loadSpy } = await stubPdfToolInfra(agentDir, {
provider: "anthropic",
input: ["text", "document"],
});
vi.spyOn(pdfNativeProviders, "anthropicAnalyzePdf").mockResolvedValue("native summary");
const cfg: OpenClawConfig = {
...withPdfModel(ANTHROPIC_PDF_MODEL),
tools: {
web: {
fetch: {
ssrfPolicy: { allowRfc2544BenchmarkRange: true },
},
},
},
};
const tool = requirePdfTool((await loadCreatePdfTool())({ config: cfg, agentDir }));
await tool.execute("t1", {
prompt: "summarize",
pdf: "http://198.18.0.153/doc.pdf",
});
expect(loadSpy).toHaveBeenCalledWith(
"http://198.18.0.153/doc.pdf",
expect.objectContaining({
ssrfPolicy: { allowRfc2544BenchmarkRange: true },
}),
);
});
});
it("allows managed inbound absolute PDF paths when workspaceOnly is enabled", async () => {
await withManagedInboundPdf(async ({ mediaPath }) => {
await withTempPdfAgentDir(async (agentDir) => {

View File

@@ -20,6 +20,7 @@ import {
resolveMediaToolLocalRoots,
resolveModelRuntimeApiKey,
resolvePromptAndModelOverride,
resolveRemoteMediaSsrfPolicy,
} from "./media-tool-shared.js";
import { anthropicAnalyzePdf, geminiAnalyzePdf } from "./pdf-native-providers.js";
import {
@@ -277,6 +278,7 @@ export function createPdfTool(options?: {
const description =
"Analyze one or more PDF documents with a model. Supports native PDF analysis for Anthropic and Google models, with text/image extraction fallback for other providers. Use pdf for a single path/URL, or pdfs for multiple (up to 10). Provide a prompt describing what to analyze.";
const remoteMediaSsrfPolicy = resolveRemoteMediaSsrfPolicy(options?.config);
return {
label: "PDF",
@@ -393,6 +395,7 @@ export function createPdfTool(options?: {
: await loadWebMediaRaw(resolvedPathInfo.resolved, {
maxBytes,
localRoots,
ssrfPolicy: remoteMediaSsrfPolicy,
});
if (media.kind !== "document") {

View File

@@ -41,6 +41,7 @@ import {
resolveCapabilityModelConfigForTool,
resolveGenerateAction,
resolveMediaToolLocalRoots,
resolveRemoteMediaSsrfPolicy,
resolveSelectedCapabilityProvider,
} from "./media-tool-shared.js";
import { type ToolModelConfig } from "./model-config.helpers.js";
@@ -813,7 +814,7 @@ export function createVideoGenerateTool(options?: {
const action = resolveAction(args);
const effectiveCfg =
applyVideoGenerationModelConfigDefaults(cfg, videoGenerationModelConfig) ?? cfg;
const remoteMediaSsrfPolicy = effectiveCfg.tools?.web?.fetch?.ssrfPolicy;
const remoteMediaSsrfPolicy = resolveRemoteMediaSsrfPolicy(effectiveCfg);
if (action === "list") {
return createVideoGenerateListActionResult(effectiveCfg);