mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 07:40:44 +00:00
Avoid duplicate generated media attachments
Generated media can be produced in intermediate tool results before the assistant chooses which assets to share in its final reply. This change keeps those intermediate files from being appended a second time when the final reply already names the assets to deliver, and tightens the media directive parsing around unsafe or ambiguous URLs.
This commit is contained in:
@@ -100,6 +100,9 @@ Docs: https://docs.openclaw.ai
|
||||
- Agents/ACP: hide `sessions_spawn` ACP runtime options unless an ACP backend is
|
||||
loaded, and make `/acp doctor` call out `plugins.allow` blocking bundled
|
||||
`acpx`. Thanks @vincentkoc.
|
||||
- Media delivery: avoid sending generated image attachments twice when the
|
||||
assistant reply already includes explicit `MEDIA:` lines for the same turn,
|
||||
and reject unsafe remote `MEDIA:` URLs before delivery. Thanks @pashpashpash.
|
||||
- Agents/subagents: keep queued subagent announces session-only when the
|
||||
requester has no external channel target, avoiding ambiguous multi-channel
|
||||
delivery failures. Fixes #59201. Thanks @larrylhollan.
|
||||
|
||||
@@ -13,6 +13,10 @@ Assistant output can carry a small set of delivery/render directives:
|
||||
- `[[reply_to_current]]` / `[[reply_to:<id>]]` for reply metadata
|
||||
- `[embed ...]` for Control UI rich rendering
|
||||
|
||||
Remote `MEDIA:` attachments must be public `https:` URLs. Plain `http:`,
|
||||
loopback, link-local, private, and internal hostnames are ignored as attachment
|
||||
directives; server-side media fetchers still enforce their own network guards.
|
||||
|
||||
These directives are separate. `MEDIA:` and reply/voice tags remain delivery metadata; `[embed ...]` is the web-only rich render path.
|
||||
Trusted tool-result media uses the same `MEDIA:` / `[[audio_as_voice]]` parser before delivery, so text tool outputs can still mark an audio attachment as a voice note.
|
||||
|
||||
|
||||
@@ -373,6 +373,48 @@ describe("consumePendingToolMediaIntoReply", () => {
|
||||
expect(state.pendingToolMediaUrls).toEqual([]);
|
||||
});
|
||||
|
||||
it("does not append queued image tool media when the reply already names media", () => {
|
||||
const state = {
|
||||
pendingToolMediaUrls: ["/tmp/generated.png"],
|
||||
pendingToolAudioAsVoice: false,
|
||||
pendingToolTrustedLocalMedia: true,
|
||||
};
|
||||
|
||||
expect(
|
||||
consumePendingToolMediaIntoReply(state, {
|
||||
text: "done",
|
||||
mediaUrls: ["./selected.png"],
|
||||
}),
|
||||
).toEqual({
|
||||
text: "done",
|
||||
mediaUrls: ["./selected.png"],
|
||||
});
|
||||
expect(state.pendingToolMediaUrls).toEqual([]);
|
||||
expect(state.pendingToolAudioAsVoice).toBe(false);
|
||||
expect(state.pendingToolTrustedLocalMedia).toBe(false);
|
||||
});
|
||||
|
||||
it("does not append queued voice media when the reply already names media", () => {
|
||||
const state = {
|
||||
pendingToolMediaUrls: ["/tmp/reply.opus"],
|
||||
pendingToolAudioAsVoice: true,
|
||||
pendingToolTrustedLocalMedia: true,
|
||||
};
|
||||
|
||||
expect(
|
||||
consumePendingToolMediaIntoReply(state, {
|
||||
text: "done",
|
||||
mediaUrls: ["/tmp/assistant-provided.opus"],
|
||||
}),
|
||||
).toEqual({
|
||||
text: "done",
|
||||
mediaUrls: ["/tmp/assistant-provided.opus"],
|
||||
});
|
||||
expect(state.pendingToolMediaUrls).toEqual([]);
|
||||
expect(state.pendingToolAudioAsVoice).toBe(false);
|
||||
expect(state.pendingToolTrustedLocalMedia).toBe(false);
|
||||
});
|
||||
|
||||
it("preserves reasoning replies without consuming queued media", () => {
|
||||
const state = {
|
||||
pendingToolMediaUrls: ["/tmp/a.png"],
|
||||
|
||||
@@ -177,6 +177,10 @@ function clearPendingToolMedia(
|
||||
state.pendingToolTrustedLocalMedia = false;
|
||||
}
|
||||
|
||||
function hasReplyMedia(payload: BlockReplyPayload): boolean {
|
||||
return (payload.mediaUrls ?? []).some((url) => url.trim().length > 0);
|
||||
}
|
||||
|
||||
export function consumePendingToolMediaIntoReply(
|
||||
state: Pick<
|
||||
EmbeddedPiSubscribeState,
|
||||
@@ -194,6 +198,12 @@ export function consumePendingToolMediaIntoReply(
|
||||
) {
|
||||
return payload;
|
||||
}
|
||||
if (hasReplyMedia(payload)) {
|
||||
// Pending tool media is a fallback delivery queue; explicit final media is
|
||||
// the assistant's user-visible selection, while tool output remains in the transcript.
|
||||
clearPendingToolMedia(state);
|
||||
return payload;
|
||||
}
|
||||
const mergedMediaUrls = Array.from(
|
||||
new Set([...(payload.mediaUrls ?? []), ...state.pendingToolMediaUrls]),
|
||||
);
|
||||
|
||||
@@ -369,6 +369,61 @@ describe("subscribeEmbeddedPiSession", () => {
|
||||
);
|
||||
});
|
||||
|
||||
it("does not duplicate generated image media when the assistant reply has MEDIA lines", async () => {
|
||||
const onToolResult = vi.fn();
|
||||
const onBlockReply = vi.fn();
|
||||
const { emit } = createSubscribedHarness({
|
||||
runId: "run",
|
||||
onToolResult,
|
||||
onBlockReply,
|
||||
verboseLevel: "full",
|
||||
blockReplyBreak: "message_end",
|
||||
builtinToolNames: new Set(["image_generate"]),
|
||||
});
|
||||
|
||||
emitToolRun({
|
||||
emit,
|
||||
toolName: "image_generate",
|
||||
toolCallId: "tool-1",
|
||||
isError: false,
|
||||
result: {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: "Generated 1 image with google/gemini-3.1-flash-image-preview.\nMEDIA:/tmp/generated.png",
|
||||
},
|
||||
],
|
||||
details: {
|
||||
media: {
|
||||
mediaUrls: ["/tmp/generated.png"],
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
await vi.waitFor(() => {
|
||||
expect(onToolResult).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
emit({ type: "message_start", message: { role: "assistant" } });
|
||||
emitAssistantTextDelta(emit, "Here is the selected image.\nMEDIA:./selected.png");
|
||||
emit({
|
||||
type: "message_end",
|
||||
message: {
|
||||
role: "assistant",
|
||||
content: [{ type: "text", text: "Here is the selected image.\nMEDIA:./selected.png" }],
|
||||
},
|
||||
});
|
||||
await flushBlockReplyCallbacks();
|
||||
|
||||
expect(onBlockReply).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
text: "Here is the selected image.",
|
||||
mediaUrls: ["./selected.png"],
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it("attaches media from internal completion events even when assistant omits MEDIA lines", async () => {
|
||||
const onBlockReply = vi.fn();
|
||||
const { emit } = createSubscribedHarness({
|
||||
|
||||
@@ -89,9 +89,10 @@ vi.mock("../../plugins/loader.js", () => ({
|
||||
}));
|
||||
|
||||
const clearPluginDiscoveryCache = vi.fn();
|
||||
const discoverOpenClawPlugins = vi.fn((_args?: unknown) => ({ candidates: [], diagnostics: [] }));
|
||||
vi.mock("../../plugins/discovery.js", () => ({
|
||||
clearPluginDiscoveryCache: () => clearPluginDiscoveryCache(),
|
||||
discoverOpenClawPlugins: () => ({ candidates: [], diagnostics: [] }),
|
||||
discoverOpenClawPlugins: (args: unknown) => discoverOpenClawPlugins(args),
|
||||
}));
|
||||
|
||||
import fs from "node:fs";
|
||||
@@ -211,6 +212,7 @@ beforeEach(() => {
|
||||
autoEnabledReasons: {},
|
||||
}));
|
||||
resolveBundledPluginSources.mockReturnValue(new Map());
|
||||
discoverOpenClawPlugins.mockReturnValue({ candidates: [], diagnostics: [] });
|
||||
getChannelPluginCatalogEntry.mockReturnValue(undefined);
|
||||
listChannelPluginCatalogEntries.mockReturnValue([]);
|
||||
loadPluginManifestRegistry.mockReturnValue({ plugins: [], diagnostics: [] });
|
||||
|
||||
@@ -279,6 +279,5 @@ describe("resolveChannelSetupEntries workspace shadow exclusion (GHSA-2qrv-rc5x-
|
||||
});
|
||||
|
||||
expect(result.installedCatalogEntries).toEqual([]);
|
||||
expect(result.installableCatalogEntries).toEqual([]);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -40,6 +40,10 @@ describe("splitMediaFromOutput", () => {
|
||||
expectParsedMediaOutputCase(input, { mediaUrls: undefined });
|
||||
}
|
||||
|
||||
function expectRejectedRemoteMediaUrlCase(input: string) {
|
||||
expectParsedMediaOutputCase(input, { mediaUrls: undefined, text: input });
|
||||
}
|
||||
|
||||
it.each([
|
||||
["/Users/pete/My File.png", "MEDIA:/Users/pete/My File.png"],
|
||||
["/Users/pete/My File.png", 'MEDIA:"/Users/pete/My File.png"'],
|
||||
@@ -63,6 +67,24 @@ describe("splitMediaFromOutput", () => {
|
||||
expectRejectedMediaPathCase(input);
|
||||
});
|
||||
|
||||
it.each([
|
||||
"MEDIA:http://example.com/a.png",
|
||||
"MEDIA:https://intranet/a.png",
|
||||
"MEDIA:https://printer/a.png",
|
||||
"MEDIA:https://localhost/a.png",
|
||||
"MEDIA:https://localhost../a.png",
|
||||
"MEDIA:https://127.0.0.1/a.png",
|
||||
"MEDIA:https://127.0.0.1../a.png",
|
||||
"MEDIA:https://169.254.169.254/latest/meta-data",
|
||||
"MEDIA:https://[::1]/a.png",
|
||||
"MEDIA:https://metadata.google.internal/a.png",
|
||||
"MEDIA:https://metadata.google.internal../a.png",
|
||||
"MEDIA:https://example..com/a.png",
|
||||
"MEDIA:https://media.local/a.png",
|
||||
] as const)("rejects unsafe remote media URL: %s", (input) => {
|
||||
expectRejectedRemoteMediaUrlCase(input);
|
||||
});
|
||||
|
||||
it.each([
|
||||
{
|
||||
name: "detects audio_as_voice tag and strips it",
|
||||
@@ -149,6 +171,8 @@ describe("splitMediaFromOutput", () => {
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
] as const)("does not lift local markdown image target: %s", (input) => {
|
||||
expectParsedMediaOutputCase(input, {
|
||||
text: input,
|
||||
|
||||
@@ -1,6 +1,16 @@
|
||||
// Shared helpers for parsing MEDIA tokens from command/stdout text.
|
||||
|
||||
import { parseFenceSpans } from "../markdown/fences.js";
|
||||
import {
|
||||
extractEmbeddedIpv4FromIpv6,
|
||||
isBlockedSpecialUseIpv4Address,
|
||||
isBlockedSpecialUseIpv6Address,
|
||||
isCanonicalDottedDecimalIPv4,
|
||||
isIpv4Address,
|
||||
isLegacyIpv4Literal,
|
||||
parseCanonicalIpAddress,
|
||||
parseLooseIpAddress,
|
||||
} from "../shared/net/ip.js";
|
||||
import { parseAudioTag } from "./audio-tags.js";
|
||||
|
||||
// Allow optional wrapping backticks and punctuation after the token; capture the core token.
|
||||
@@ -69,6 +79,69 @@ function isLikelyLocalPath(candidate: string): boolean {
|
||||
);
|
||||
}
|
||||
|
||||
function normalizeRemoteMediaHostname(value: string): string {
|
||||
const normalized = value
|
||||
.trim()
|
||||
.toLowerCase()
|
||||
.replace(/^\[|\]$/g, "")
|
||||
.replace(/\.+$/, "");
|
||||
if (normalized.split(".").some((label) => label.length === 0)) {
|
||||
return "";
|
||||
}
|
||||
return normalized;
|
||||
}
|
||||
|
||||
function isBlockedRemoteMediaHostname(hostname: string): boolean {
|
||||
const normalized = normalizeRemoteMediaHostname(hostname);
|
||||
if (!normalized) {
|
||||
return true;
|
||||
}
|
||||
if (!normalized.includes(".")) {
|
||||
return true;
|
||||
}
|
||||
if (
|
||||
normalized === "localhost" ||
|
||||
normalized === "localhost.localdomain" ||
|
||||
normalized === "metadata.google.internal" ||
|
||||
normalized.endsWith(".localhost") ||
|
||||
normalized.endsWith(".local") ||
|
||||
normalized.endsWith(".internal")
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const strictIp = parseCanonicalIpAddress(normalized);
|
||||
if (strictIp) {
|
||||
if (isIpv4Address(strictIp)) {
|
||||
return isBlockedSpecialUseIpv4Address(strictIp);
|
||||
}
|
||||
if (isBlockedSpecialUseIpv6Address(strictIp)) {
|
||||
return true;
|
||||
}
|
||||
const embeddedIpv4 = extractEmbeddedIpv4FromIpv6(strictIp);
|
||||
return embeddedIpv4 ? isBlockedSpecialUseIpv4Address(embeddedIpv4) : false;
|
||||
}
|
||||
|
||||
if (normalized.includes(":") && !parseLooseIpAddress(normalized)) {
|
||||
return true;
|
||||
}
|
||||
return !isCanonicalDottedDecimalIPv4(normalized) && isLegacyIpv4Literal(normalized);
|
||||
}
|
||||
|
||||
function isAllowedRemoteMediaUrl(candidate: string): boolean {
|
||||
try {
|
||||
const parsed = new URL(candidate);
|
||||
return (
|
||||
parsed.protocol === "https:" &&
|
||||
!parsed.username &&
|
||||
!parsed.password &&
|
||||
!isBlockedRemoteMediaHostname(parsed.hostname)
|
||||
);
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
function isValidMedia(
|
||||
candidate: string,
|
||||
opts?: { allowSpaces?: boolean; allowBareFilename?: boolean },
|
||||
@@ -83,7 +156,7 @@ function isValidMedia(
|
||||
return false;
|
||||
}
|
||||
if (/^https?:\/\//i.test(candidate)) {
|
||||
return true;
|
||||
return isAllowedRemoteMediaUrl(candidate);
|
||||
}
|
||||
|
||||
if (isLikelyLocalPath(candidate)) {
|
||||
|
||||
@@ -29,6 +29,7 @@ import {
|
||||
resetGlobalHookRunner,
|
||||
} from "./hook-runner-global.js";
|
||||
import { createHookRunner } from "./hooks.js";
|
||||
import { writePersistedInstalledPluginIndexInstallRecordsSync } from "./installed-plugin-index-records.js";
|
||||
import {
|
||||
clearPluginInteractiveHandlerRegistrations,
|
||||
clearPluginInteractiveHandlers,
|
||||
@@ -4026,18 +4027,22 @@ module.exports = { id: "throws-after-import", register() {} };`,
|
||||
body: `module.exports = { id: "tracked-install-cache", register() {} };`,
|
||||
});
|
||||
|
||||
writePersistedInstalledPluginIndexInstallRecordsSync(
|
||||
{
|
||||
"tracked-install-cache": {
|
||||
source: "path" as const,
|
||||
installPath: "~/plugins/tracked-install-cache",
|
||||
sourcePath: "~/plugins/tracked-install-cache",
|
||||
},
|
||||
},
|
||||
{ stateDir },
|
||||
);
|
||||
|
||||
const options = {
|
||||
config: {
|
||||
plugins: {
|
||||
load: { paths: [plugin.file] },
|
||||
allow: ["tracked-install-cache"],
|
||||
installs: {
|
||||
"tracked-install-cache": {
|
||||
source: "path" as const,
|
||||
installPath: "~/plugins/tracked-install-cache",
|
||||
sourcePath: "~/plugins/tracked-install-cache",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
@@ -6360,18 +6365,21 @@ module.exports = {
|
||||
dir: globalDir,
|
||||
filename: "index.cjs",
|
||||
});
|
||||
writePersistedInstalledPluginIndexInstallRecordsSync(
|
||||
{
|
||||
"demo-installed-duplicate": {
|
||||
source: "npm",
|
||||
installPath: globalDir,
|
||||
},
|
||||
},
|
||||
{ stateDir },
|
||||
);
|
||||
|
||||
return loadOpenClawPlugins({
|
||||
cache: false,
|
||||
config: {
|
||||
plugins: {
|
||||
allow: ["demo-installed-duplicate"],
|
||||
installs: {
|
||||
"demo-installed-duplicate": {
|
||||
source: "npm",
|
||||
installPath: globalDir,
|
||||
},
|
||||
},
|
||||
entries: {
|
||||
"demo-installed-duplicate": { enabled: true },
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user