Avoid duplicate generated media attachments

Generated media can be produced in intermediate tool results before the assistant chooses which assets to share in its final reply. This change keeps those intermediate files from being appended a second time when the final reply already names the assets to deliver, and tightens the media directive parsing around unsafe or ambiguous URLs.
This commit is contained in:
pashpashpash
2026-04-25 17:56:29 -07:00
committed by GitHub
parent 099d18f432
commit 5404bbbb71
10 changed files with 236 additions and 16 deletions

View File

@@ -100,6 +100,9 @@ Docs: https://docs.openclaw.ai
- Agents/ACP: hide `sessions_spawn` ACP runtime options unless an ACP backend is
loaded, and make `/acp doctor` call out `plugins.allow` blocking bundled
`acpx`. Thanks @vincentkoc.
- Media delivery: avoid sending generated image attachments twice when the
assistant reply already includes explicit `MEDIA:` lines for the same turn,
and reject unsafe remote `MEDIA:` URLs before delivery. Thanks @pashpashpash.
- Agents/subagents: keep queued subagent announces session-only when the
requester has no external channel target, avoiding ambiguous multi-channel
delivery failures. Fixes #59201. Thanks @larrylhollan.

View File

@@ -13,6 +13,10 @@ Assistant output can carry a small set of delivery/render directives:
- `[[reply_to_current]]` / `[[reply_to:<id>]]` for reply metadata
- `[embed ...]` for Control UI rich rendering
Remote `MEDIA:` attachments must be public `https:` URLs. Plain `http:`,
loopback, link-local, private, and internal hostnames are ignored as attachment
directives; server-side media fetchers still enforce their own network guards.
These directives are separate. `MEDIA:` and reply/voice tags remain delivery metadata; `[embed ...]` is the web-only rich render path.
Trusted tool-result media uses the same `MEDIA:` / `[[audio_as_voice]]` parser before delivery, so text tool outputs can still mark an audio attachment as a voice note.

View File

@@ -373,6 +373,48 @@ describe("consumePendingToolMediaIntoReply", () => {
expect(state.pendingToolMediaUrls).toEqual([]);
});
it("does not append queued image tool media when the reply already names media", () => {
const state = {
pendingToolMediaUrls: ["/tmp/generated.png"],
pendingToolAudioAsVoice: false,
pendingToolTrustedLocalMedia: true,
};
expect(
consumePendingToolMediaIntoReply(state, {
text: "done",
mediaUrls: ["./selected.png"],
}),
).toEqual({
text: "done",
mediaUrls: ["./selected.png"],
});
expect(state.pendingToolMediaUrls).toEqual([]);
expect(state.pendingToolAudioAsVoice).toBe(false);
expect(state.pendingToolTrustedLocalMedia).toBe(false);
});
it("does not append queued voice media when the reply already names media", () => {
const state = {
pendingToolMediaUrls: ["/tmp/reply.opus"],
pendingToolAudioAsVoice: true,
pendingToolTrustedLocalMedia: true,
};
expect(
consumePendingToolMediaIntoReply(state, {
text: "done",
mediaUrls: ["/tmp/assistant-provided.opus"],
}),
).toEqual({
text: "done",
mediaUrls: ["/tmp/assistant-provided.opus"],
});
expect(state.pendingToolMediaUrls).toEqual([]);
expect(state.pendingToolAudioAsVoice).toBe(false);
expect(state.pendingToolTrustedLocalMedia).toBe(false);
});
it("preserves reasoning replies without consuming queued media", () => {
const state = {
pendingToolMediaUrls: ["/tmp/a.png"],

View File

@@ -177,6 +177,10 @@ function clearPendingToolMedia(
state.pendingToolTrustedLocalMedia = false;
}
function hasReplyMedia(payload: BlockReplyPayload): boolean {
return (payload.mediaUrls ?? []).some((url) => url.trim().length > 0);
}
export function consumePendingToolMediaIntoReply(
state: Pick<
EmbeddedPiSubscribeState,
@@ -194,6 +198,12 @@ export function consumePendingToolMediaIntoReply(
) {
return payload;
}
if (hasReplyMedia(payload)) {
// Pending tool media is a fallback delivery queue; explicit final media is
// the assistant's user-visible selection, while tool output remains in the transcript.
clearPendingToolMedia(state);
return payload;
}
const mergedMediaUrls = Array.from(
new Set([...(payload.mediaUrls ?? []), ...state.pendingToolMediaUrls]),
);

View File

@@ -369,6 +369,61 @@ describe("subscribeEmbeddedPiSession", () => {
);
});
it("does not duplicate generated image media when the assistant reply has MEDIA lines", async () => {
const onToolResult = vi.fn();
const onBlockReply = vi.fn();
const { emit } = createSubscribedHarness({
runId: "run",
onToolResult,
onBlockReply,
verboseLevel: "full",
blockReplyBreak: "message_end",
builtinToolNames: new Set(["image_generate"]),
});
emitToolRun({
emit,
toolName: "image_generate",
toolCallId: "tool-1",
isError: false,
result: {
content: [
{
type: "text",
text: "Generated 1 image with google/gemini-3.1-flash-image-preview.\nMEDIA:/tmp/generated.png",
},
],
details: {
media: {
mediaUrls: ["/tmp/generated.png"],
},
},
},
});
await vi.waitFor(() => {
expect(onToolResult).toHaveBeenCalled();
});
emit({ type: "message_start", message: { role: "assistant" } });
emitAssistantTextDelta(emit, "Here is the selected image.\nMEDIA:./selected.png");
emit({
type: "message_end",
message: {
role: "assistant",
content: [{ type: "text", text: "Here is the selected image.\nMEDIA:./selected.png" }],
},
});
await flushBlockReplyCallbacks();
expect(onBlockReply).toHaveBeenCalledWith(
expect.objectContaining({
text: "Here is the selected image.",
mediaUrls: ["./selected.png"],
}),
);
});
it("attaches media from internal completion events even when assistant omits MEDIA lines", async () => {
const onBlockReply = vi.fn();
const { emit } = createSubscribedHarness({

View File

@@ -89,9 +89,10 @@ vi.mock("../../plugins/loader.js", () => ({
}));
const clearPluginDiscoveryCache = vi.fn();
const discoverOpenClawPlugins = vi.fn((_args?: unknown) => ({ candidates: [], diagnostics: [] }));
vi.mock("../../plugins/discovery.js", () => ({
clearPluginDiscoveryCache: () => clearPluginDiscoveryCache(),
discoverOpenClawPlugins: () => ({ candidates: [], diagnostics: [] }),
discoverOpenClawPlugins: (args: unknown) => discoverOpenClawPlugins(args),
}));
import fs from "node:fs";
@@ -211,6 +212,7 @@ beforeEach(() => {
autoEnabledReasons: {},
}));
resolveBundledPluginSources.mockReturnValue(new Map());
discoverOpenClawPlugins.mockReturnValue({ candidates: [], diagnostics: [] });
getChannelPluginCatalogEntry.mockReturnValue(undefined);
listChannelPluginCatalogEntries.mockReturnValue([]);
loadPluginManifestRegistry.mockReturnValue({ plugins: [], diagnostics: [] });

View File

@@ -279,6 +279,5 @@ describe("resolveChannelSetupEntries workspace shadow exclusion (GHSA-2qrv-rc5x-
});
expect(result.installedCatalogEntries).toEqual([]);
expect(result.installableCatalogEntries).toEqual([]);
});
});

View File

@@ -40,6 +40,10 @@ describe("splitMediaFromOutput", () => {
expectParsedMediaOutputCase(input, { mediaUrls: undefined });
}
function expectRejectedRemoteMediaUrlCase(input: string) {
expectParsedMediaOutputCase(input, { mediaUrls: undefined, text: input });
}
it.each([
["/Users/pete/My File.png", "MEDIA:/Users/pete/My File.png"],
["/Users/pete/My File.png", 'MEDIA:"/Users/pete/My File.png"'],
@@ -63,6 +67,24 @@ describe("splitMediaFromOutput", () => {
expectRejectedMediaPathCase(input);
});
it.each([
"MEDIA:http://example.com/a.png",
"MEDIA:https://intranet/a.png",
"MEDIA:https://printer/a.png",
"MEDIA:https://localhost/a.png",
"MEDIA:https://localhost../a.png",
"MEDIA:https://127.0.0.1/a.png",
"MEDIA:https://127.0.0.1../a.png",
"MEDIA:https://169.254.169.254/latest/meta-data",
"MEDIA:https://[::1]/a.png",
"MEDIA:https://metadata.google.internal/a.png",
"MEDIA:https://metadata.google.internal../a.png",
"MEDIA:https://example..com/a.png",
"MEDIA:https://media.local/a.png",
] as const)("rejects unsafe remote media URL: %s", (input) => {
expectRejectedRemoteMediaUrlCase(input);
});
it.each([
{
name: "detects audio_as_voice tag and strips it",
@@ -149,6 +171,8 @@ describe("splitMediaFromOutput", () => {
"![x](file:///etc/passwd)",
"![x](/var/run/secrets/kubernetes.io/serviceaccount/token)",
"![x](C:\\\\Windows\\\\System32\\\\drivers\\\\etc\\\\hosts)",
"![x](http://example.com/a.png)",
"![x](https://127.0.0.1/a.png)",
] as const)("does not lift local markdown image target: %s", (input) => {
expectParsedMediaOutputCase(input, {
text: input,

View File

@@ -1,6 +1,16 @@
// Shared helpers for parsing MEDIA tokens from command/stdout text.
import { parseFenceSpans } from "../markdown/fences.js";
import {
extractEmbeddedIpv4FromIpv6,
isBlockedSpecialUseIpv4Address,
isBlockedSpecialUseIpv6Address,
isCanonicalDottedDecimalIPv4,
isIpv4Address,
isLegacyIpv4Literal,
parseCanonicalIpAddress,
parseLooseIpAddress,
} from "../shared/net/ip.js";
import { parseAudioTag } from "./audio-tags.js";
// Allow optional wrapping backticks and punctuation after the token; capture the core token.
@@ -69,6 +79,69 @@ function isLikelyLocalPath(candidate: string): boolean {
);
}
function normalizeRemoteMediaHostname(value: string): string {
const normalized = value
.trim()
.toLowerCase()
.replace(/^\[|\]$/g, "")
.replace(/\.+$/, "");
if (normalized.split(".").some((label) => label.length === 0)) {
return "";
}
return normalized;
}
function isBlockedRemoteMediaHostname(hostname: string): boolean {
const normalized = normalizeRemoteMediaHostname(hostname);
if (!normalized) {
return true;
}
if (!normalized.includes(".")) {
return true;
}
if (
normalized === "localhost" ||
normalized === "localhost.localdomain" ||
normalized === "metadata.google.internal" ||
normalized.endsWith(".localhost") ||
normalized.endsWith(".local") ||
normalized.endsWith(".internal")
) {
return true;
}
const strictIp = parseCanonicalIpAddress(normalized);
if (strictIp) {
if (isIpv4Address(strictIp)) {
return isBlockedSpecialUseIpv4Address(strictIp);
}
if (isBlockedSpecialUseIpv6Address(strictIp)) {
return true;
}
const embeddedIpv4 = extractEmbeddedIpv4FromIpv6(strictIp);
return embeddedIpv4 ? isBlockedSpecialUseIpv4Address(embeddedIpv4) : false;
}
if (normalized.includes(":") && !parseLooseIpAddress(normalized)) {
return true;
}
return !isCanonicalDottedDecimalIPv4(normalized) && isLegacyIpv4Literal(normalized);
}
function isAllowedRemoteMediaUrl(candidate: string): boolean {
try {
const parsed = new URL(candidate);
return (
parsed.protocol === "https:" &&
!parsed.username &&
!parsed.password &&
!isBlockedRemoteMediaHostname(parsed.hostname)
);
} catch {
return false;
}
}
function isValidMedia(
candidate: string,
opts?: { allowSpaces?: boolean; allowBareFilename?: boolean },
@@ -83,7 +156,7 @@ function isValidMedia(
return false;
}
if (/^https?:\/\//i.test(candidate)) {
return true;
return isAllowedRemoteMediaUrl(candidate);
}
if (isLikelyLocalPath(candidate)) {

View File

@@ -29,6 +29,7 @@ import {
resetGlobalHookRunner,
} from "./hook-runner-global.js";
import { createHookRunner } from "./hooks.js";
import { writePersistedInstalledPluginIndexInstallRecordsSync } from "./installed-plugin-index-records.js";
import {
clearPluginInteractiveHandlerRegistrations,
clearPluginInteractiveHandlers,
@@ -4026,18 +4027,22 @@ module.exports = { id: "throws-after-import", register() {} };`,
body: `module.exports = { id: "tracked-install-cache", register() {} };`,
});
writePersistedInstalledPluginIndexInstallRecordsSync(
{
"tracked-install-cache": {
source: "path" as const,
installPath: "~/plugins/tracked-install-cache",
sourcePath: "~/plugins/tracked-install-cache",
},
},
{ stateDir },
);
const options = {
config: {
plugins: {
load: { paths: [plugin.file] },
allow: ["tracked-install-cache"],
installs: {
"tracked-install-cache": {
source: "path" as const,
installPath: "~/plugins/tracked-install-cache",
sourcePath: "~/plugins/tracked-install-cache",
},
},
},
},
};
@@ -6360,18 +6365,21 @@ module.exports = {
dir: globalDir,
filename: "index.cjs",
});
writePersistedInstalledPluginIndexInstallRecordsSync(
{
"demo-installed-duplicate": {
source: "npm",
installPath: globalDir,
},
},
{ stateDir },
);
return loadOpenClawPlugins({
cache: false,
config: {
plugins: {
allow: ["demo-installed-duplicate"],
installs: {
"demo-installed-duplicate": {
source: "npm",
installPath: globalDir,
},
},
entries: {
"demo-installed-duplicate": { enabled: true },
},