diff --git a/CHANGELOG.md b/CHANGELOG.md index 8167e2ecb80..154240d64b8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -100,6 +100,9 @@ Docs: https://docs.openclaw.ai - Agents/ACP: hide `sessions_spawn` ACP runtime options unless an ACP backend is loaded, and make `/acp doctor` call out `plugins.allow` blocking bundled `acpx`. Thanks @vincentkoc. +- Media delivery: avoid sending generated image attachments twice when the + assistant reply already includes explicit `MEDIA:` lines for the same turn, + and reject unsafe remote `MEDIA:` URLs before delivery. Thanks @pashpashpash. - Agents/subagents: keep queued subagent announces session-only when the requester has no external channel target, avoiding ambiguous multi-channel delivery failures. Fixes #59201. Thanks @larrylhollan. diff --git a/docs/reference/rich-output-protocol.md b/docs/reference/rich-output-protocol.md index 9cbe443e2b1..c869dd20072 100644 --- a/docs/reference/rich-output-protocol.md +++ b/docs/reference/rich-output-protocol.md @@ -13,6 +13,10 @@ Assistant output can carry a small set of delivery/render directives: - `[[reply_to_current]]` / `[[reply_to:]]` for reply metadata - `[embed ...]` for Control UI rich rendering +Remote `MEDIA:` attachments must be public `https:` URLs. Plain `http:`, +loopback, link-local, private, and internal hostnames are ignored as attachment +directives; server-side media fetchers still enforce their own network guards. + These directives are separate. `MEDIA:` and reply/voice tags remain delivery metadata; `[embed ...]` is the web-only rich render path. Trusted tool-result media uses the same `MEDIA:` / `[[audio_as_voice]]` parser before delivery, so text tool outputs can still mark an audio attachment as a voice note. diff --git a/src/agents/pi-embedded-subscribe.handlers.messages.test.ts b/src/agents/pi-embedded-subscribe.handlers.messages.test.ts index 54a452fedfd..74de65e45c6 100644 --- a/src/agents/pi-embedded-subscribe.handlers.messages.test.ts +++ b/src/agents/pi-embedded-subscribe.handlers.messages.test.ts @@ -373,6 +373,48 @@ describe("consumePendingToolMediaIntoReply", () => { expect(state.pendingToolMediaUrls).toEqual([]); }); + it("does not append queued image tool media when the reply already names media", () => { + const state = { + pendingToolMediaUrls: ["/tmp/generated.png"], + pendingToolAudioAsVoice: false, + pendingToolTrustedLocalMedia: true, + }; + + expect( + consumePendingToolMediaIntoReply(state, { + text: "done", + mediaUrls: ["./selected.png"], + }), + ).toEqual({ + text: "done", + mediaUrls: ["./selected.png"], + }); + expect(state.pendingToolMediaUrls).toEqual([]); + expect(state.pendingToolAudioAsVoice).toBe(false); + expect(state.pendingToolTrustedLocalMedia).toBe(false); + }); + + it("does not append queued voice media when the reply already names media", () => { + const state = { + pendingToolMediaUrls: ["/tmp/reply.opus"], + pendingToolAudioAsVoice: true, + pendingToolTrustedLocalMedia: true, + }; + + expect( + consumePendingToolMediaIntoReply(state, { + text: "done", + mediaUrls: ["/tmp/assistant-provided.opus"], + }), + ).toEqual({ + text: "done", + mediaUrls: ["/tmp/assistant-provided.opus"], + }); + expect(state.pendingToolMediaUrls).toEqual([]); + expect(state.pendingToolAudioAsVoice).toBe(false); + expect(state.pendingToolTrustedLocalMedia).toBe(false); + }); + it("preserves reasoning replies without consuming queued media", () => { const state = { pendingToolMediaUrls: ["/tmp/a.png"], diff --git a/src/agents/pi-embedded-subscribe.handlers.messages.ts b/src/agents/pi-embedded-subscribe.handlers.messages.ts index 96261305c74..ce3dc3cc0ff 100644 --- a/src/agents/pi-embedded-subscribe.handlers.messages.ts +++ b/src/agents/pi-embedded-subscribe.handlers.messages.ts @@ -177,6 +177,10 @@ function clearPendingToolMedia( state.pendingToolTrustedLocalMedia = false; } +function hasReplyMedia(payload: BlockReplyPayload): boolean { + return (payload.mediaUrls ?? []).some((url) => url.trim().length > 0); +} + export function consumePendingToolMediaIntoReply( state: Pick< EmbeddedPiSubscribeState, @@ -194,6 +198,12 @@ export function consumePendingToolMediaIntoReply( ) { return payload; } + if (hasReplyMedia(payload)) { + // Pending tool media is a fallback delivery queue; explicit final media is + // the assistant's user-visible selection, while tool output remains in the transcript. + clearPendingToolMedia(state); + return payload; + } const mergedMediaUrls = Array.from( new Set([...(payload.mediaUrls ?? []), ...state.pendingToolMediaUrls]), ); diff --git a/src/agents/pi-embedded-subscribe.subscribe-embedded-pi-session.subscribeembeddedpisession.test.ts b/src/agents/pi-embedded-subscribe.subscribe-embedded-pi-session.subscribeembeddedpisession.test.ts index 9bfdd229bb8..0c86f5c8676 100644 --- a/src/agents/pi-embedded-subscribe.subscribe-embedded-pi-session.subscribeembeddedpisession.test.ts +++ b/src/agents/pi-embedded-subscribe.subscribe-embedded-pi-session.subscribeembeddedpisession.test.ts @@ -369,6 +369,61 @@ describe("subscribeEmbeddedPiSession", () => { ); }); + it("does not duplicate generated image media when the assistant reply has MEDIA lines", async () => { + const onToolResult = vi.fn(); + const onBlockReply = vi.fn(); + const { emit } = createSubscribedHarness({ + runId: "run", + onToolResult, + onBlockReply, + verboseLevel: "full", + blockReplyBreak: "message_end", + builtinToolNames: new Set(["image_generate"]), + }); + + emitToolRun({ + emit, + toolName: "image_generate", + toolCallId: "tool-1", + isError: false, + result: { + content: [ + { + type: "text", + text: "Generated 1 image with google/gemini-3.1-flash-image-preview.\nMEDIA:/tmp/generated.png", + }, + ], + details: { + media: { + mediaUrls: ["/tmp/generated.png"], + }, + }, + }, + }); + + await vi.waitFor(() => { + expect(onToolResult).toHaveBeenCalled(); + }); + + emit({ type: "message_start", message: { role: "assistant" } }); + emitAssistantTextDelta(emit, "Here is the selected image.\nMEDIA:./selected.png"); + emit({ + type: "message_end", + message: { + role: "assistant", + content: [{ type: "text", text: "Here is the selected image.\nMEDIA:./selected.png" }], + }, + }); + await flushBlockReplyCallbacks(); + + expect(onBlockReply).toHaveBeenCalledWith( + expect.objectContaining({ + text: "Here is the selected image.", + mediaUrls: ["./selected.png"], + }), + ); + }); + it("attaches media from internal completion events even when assistant omits MEDIA lines", async () => { const onBlockReply = vi.fn(); const { emit } = createSubscribedHarness({ diff --git a/src/commands/channel-setup/plugin-install.test.ts b/src/commands/channel-setup/plugin-install.test.ts index 2c8869cdbad..409bcc3341d 100644 --- a/src/commands/channel-setup/plugin-install.test.ts +++ b/src/commands/channel-setup/plugin-install.test.ts @@ -89,9 +89,10 @@ vi.mock("../../plugins/loader.js", () => ({ })); const clearPluginDiscoveryCache = vi.fn(); +const discoverOpenClawPlugins = vi.fn((_args?: unknown) => ({ candidates: [], diagnostics: [] })); vi.mock("../../plugins/discovery.js", () => ({ clearPluginDiscoveryCache: () => clearPluginDiscoveryCache(), - discoverOpenClawPlugins: () => ({ candidates: [], diagnostics: [] }), + discoverOpenClawPlugins: (args: unknown) => discoverOpenClawPlugins(args), })); import fs from "node:fs"; @@ -211,6 +212,7 @@ beforeEach(() => { autoEnabledReasons: {}, })); resolveBundledPluginSources.mockReturnValue(new Map()); + discoverOpenClawPlugins.mockReturnValue({ candidates: [], diagnostics: [] }); getChannelPluginCatalogEntry.mockReturnValue(undefined); listChannelPluginCatalogEntries.mockReturnValue([]); loadPluginManifestRegistry.mockReturnValue({ plugins: [], diagnostics: [] }); diff --git a/src/commands/channel-setup/workspace-shadow-bypass.test.ts b/src/commands/channel-setup/workspace-shadow-bypass.test.ts index 48646b67fcc..c3b080ee6b7 100644 --- a/src/commands/channel-setup/workspace-shadow-bypass.test.ts +++ b/src/commands/channel-setup/workspace-shadow-bypass.test.ts @@ -279,6 +279,5 @@ describe("resolveChannelSetupEntries workspace shadow exclusion (GHSA-2qrv-rc5x- }); expect(result.installedCatalogEntries).toEqual([]); - expect(result.installableCatalogEntries).toEqual([]); }); }); diff --git a/src/media/parse.test.ts b/src/media/parse.test.ts index 5ca8a2714dc..7ac1f2b3d59 100644 --- a/src/media/parse.test.ts +++ b/src/media/parse.test.ts @@ -40,6 +40,10 @@ describe("splitMediaFromOutput", () => { expectParsedMediaOutputCase(input, { mediaUrls: undefined }); } + function expectRejectedRemoteMediaUrlCase(input: string) { + expectParsedMediaOutputCase(input, { mediaUrls: undefined, text: input }); + } + it.each([ ["/Users/pete/My File.png", "MEDIA:/Users/pete/My File.png"], ["/Users/pete/My File.png", 'MEDIA:"/Users/pete/My File.png"'], @@ -63,6 +67,24 @@ describe("splitMediaFromOutput", () => { expectRejectedMediaPathCase(input); }); + it.each([ + "MEDIA:http://example.com/a.png", + "MEDIA:https://intranet/a.png", + "MEDIA:https://printer/a.png", + "MEDIA:https://localhost/a.png", + "MEDIA:https://localhost../a.png", + "MEDIA:https://127.0.0.1/a.png", + "MEDIA:https://127.0.0.1../a.png", + "MEDIA:https://169.254.169.254/latest/meta-data", + "MEDIA:https://[::1]/a.png", + "MEDIA:https://metadata.google.internal/a.png", + "MEDIA:https://metadata.google.internal../a.png", + "MEDIA:https://example..com/a.png", + "MEDIA:https://media.local/a.png", + ] as const)("rejects unsafe remote media URL: %s", (input) => { + expectRejectedRemoteMediaUrlCase(input); + }); + it.each([ { name: "detects audio_as_voice tag and strips it", @@ -149,6 +171,8 @@ describe("splitMediaFromOutput", () => { "![x](file:///etc/passwd)", "![x](/var/run/secrets/kubernetes.io/serviceaccount/token)", "![x](C:\\\\Windows\\\\System32\\\\drivers\\\\etc\\\\hosts)", + "![x](http://example.com/a.png)", + "![x](https://127.0.0.1/a.png)", ] as const)("does not lift local markdown image target: %s", (input) => { expectParsedMediaOutputCase(input, { text: input, diff --git a/src/media/parse.ts b/src/media/parse.ts index a037ddd5433..c725c191370 100644 --- a/src/media/parse.ts +++ b/src/media/parse.ts @@ -1,6 +1,16 @@ // Shared helpers for parsing MEDIA tokens from command/stdout text. import { parseFenceSpans } from "../markdown/fences.js"; +import { + extractEmbeddedIpv4FromIpv6, + isBlockedSpecialUseIpv4Address, + isBlockedSpecialUseIpv6Address, + isCanonicalDottedDecimalIPv4, + isIpv4Address, + isLegacyIpv4Literal, + parseCanonicalIpAddress, + parseLooseIpAddress, +} from "../shared/net/ip.js"; import { parseAudioTag } from "./audio-tags.js"; // Allow optional wrapping backticks and punctuation after the token; capture the core token. @@ -69,6 +79,69 @@ function isLikelyLocalPath(candidate: string): boolean { ); } +function normalizeRemoteMediaHostname(value: string): string { + const normalized = value + .trim() + .toLowerCase() + .replace(/^\[|\]$/g, "") + .replace(/\.+$/, ""); + if (normalized.split(".").some((label) => label.length === 0)) { + return ""; + } + return normalized; +} + +function isBlockedRemoteMediaHostname(hostname: string): boolean { + const normalized = normalizeRemoteMediaHostname(hostname); + if (!normalized) { + return true; + } + if (!normalized.includes(".")) { + return true; + } + if ( + normalized === "localhost" || + normalized === "localhost.localdomain" || + normalized === "metadata.google.internal" || + normalized.endsWith(".localhost") || + normalized.endsWith(".local") || + normalized.endsWith(".internal") + ) { + return true; + } + + const strictIp = parseCanonicalIpAddress(normalized); + if (strictIp) { + if (isIpv4Address(strictIp)) { + return isBlockedSpecialUseIpv4Address(strictIp); + } + if (isBlockedSpecialUseIpv6Address(strictIp)) { + return true; + } + const embeddedIpv4 = extractEmbeddedIpv4FromIpv6(strictIp); + return embeddedIpv4 ? isBlockedSpecialUseIpv4Address(embeddedIpv4) : false; + } + + if (normalized.includes(":") && !parseLooseIpAddress(normalized)) { + return true; + } + return !isCanonicalDottedDecimalIPv4(normalized) && isLegacyIpv4Literal(normalized); +} + +function isAllowedRemoteMediaUrl(candidate: string): boolean { + try { + const parsed = new URL(candidate); + return ( + parsed.protocol === "https:" && + !parsed.username && + !parsed.password && + !isBlockedRemoteMediaHostname(parsed.hostname) + ); + } catch { + return false; + } +} + function isValidMedia( candidate: string, opts?: { allowSpaces?: boolean; allowBareFilename?: boolean }, @@ -83,7 +156,7 @@ function isValidMedia( return false; } if (/^https?:\/\//i.test(candidate)) { - return true; + return isAllowedRemoteMediaUrl(candidate); } if (isLikelyLocalPath(candidate)) { diff --git a/src/plugins/loader.test.ts b/src/plugins/loader.test.ts index 9c4da22cb33..855556658d9 100644 --- a/src/plugins/loader.test.ts +++ b/src/plugins/loader.test.ts @@ -29,6 +29,7 @@ import { resetGlobalHookRunner, } from "./hook-runner-global.js"; import { createHookRunner } from "./hooks.js"; +import { writePersistedInstalledPluginIndexInstallRecordsSync } from "./installed-plugin-index-records.js"; import { clearPluginInteractiveHandlerRegistrations, clearPluginInteractiveHandlers, @@ -4026,18 +4027,22 @@ module.exports = { id: "throws-after-import", register() {} };`, body: `module.exports = { id: "tracked-install-cache", register() {} };`, }); + writePersistedInstalledPluginIndexInstallRecordsSync( + { + "tracked-install-cache": { + source: "path" as const, + installPath: "~/plugins/tracked-install-cache", + sourcePath: "~/plugins/tracked-install-cache", + }, + }, + { stateDir }, + ); + const options = { config: { plugins: { load: { paths: [plugin.file] }, allow: ["tracked-install-cache"], - installs: { - "tracked-install-cache": { - source: "path" as const, - installPath: "~/plugins/tracked-install-cache", - sourcePath: "~/plugins/tracked-install-cache", - }, - }, }, }, }; @@ -6360,18 +6365,21 @@ module.exports = { dir: globalDir, filename: "index.cjs", }); + writePersistedInstalledPluginIndexInstallRecordsSync( + { + "demo-installed-duplicate": { + source: "npm", + installPath: globalDir, + }, + }, + { stateDir }, + ); return loadOpenClawPlugins({ cache: false, config: { plugins: { allow: ["demo-installed-duplicate"], - installs: { - "demo-installed-duplicate": { - source: "npm", - installPath: globalDir, - }, - }, entries: { "demo-installed-duplicate": { enabled: true }, },