diff --git a/CHANGELOG.md b/CHANGELOG.md index 60d5a147112..12200846710 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,7 @@ Docs: https://docs.openclaw.ai - Gateway/watch: keep colored subsystem log prefixes in the managed tmux pane even when the parent shell exports `NO_COLOR`, while preserving explicit `FORCE_COLOR=0` opt-out. Thanks @vincentkoc. - Agents/compaction: submit a non-empty runtime-event marker for pre-compaction memory flush turns, so strict Anthropic providers no longer reject the silent flush as an empty user message. Fixes #75305. Thanks @sableassistant3777-source. - Plugin SDK: re-export `isPrivateIpAddress` from `plugin-sdk/ssrf-runtime`, restoring source-checkout builds for SearXNG and Firecrawl private-network guards. Thanks @vincentkoc. +- Discord/message actions: advertise `upload-file` and route it through Discord's send runtime with agent-scoped media reads, so agents can discover and send file attachments. Fixes #60652 and supersedes #60808, #61087, and #61100. Thanks @claw-io, @efe-arv, @joelnishanth, and @sjhddh. - CLI/directory: report unsupported directory operations for installed channel plugins instead of prompting to reinstall the plugin when it lacks a directory adapter. Fixes #75770. Thanks @lawong888. - Web search/SearXNG: show the JSON API `search.formats` prerequisite during SearXNG setup before prompting for the base URL. Supersedes #65592. Thanks @evanpaul14. - Web search/SearXNG: pass through `img_src` image URLs from SearXNG image-category results. Supersedes #61416. Thanks @sghael. diff --git a/extensions/discord/src/actions/handle-action.test.ts b/extensions/discord/src/actions/handle-action.test.ts index 6aaa6df9a81..b6c2620e97c 100644 --- a/extensions/discord/src/actions/handle-action.test.ts +++ b/extensions/discord/src/actions/handle-action.test.ts @@ -125,6 +125,97 @@ describe("handleDiscordMessageAction", () => { ); }); + it("maps upload-file to Discord sendMessage with media read context", async () => { + const mediaReadFile = vi.fn(async () => Buffer.from("image")); + const mediaAccess = { + localRoots: ["/tmp/agent-root"], + readFile: mediaReadFile, + }; + + await handleDiscordMessageAction({ + action: "upload-file", + params: { + target: "channel:123", + filePath: "/tmp/agent-root/image.png", + message: "caption", + filename: "image.png", + replyTo: "message-1", + silent: true, + __sessionKey: "session-1", + __agentId: "agent-1", + }, + cfg: { + channels: { discord: { token: "tok" } }, + } as OpenClawConfig, + mediaAccess, + mediaLocalRoots: ["/tmp/agent-root"], + mediaReadFile, + }); + + expect(handleDiscordActionMock).toHaveBeenCalledWith( + expect.objectContaining({ + action: "sendMessage", + to: "channel:123", + content: "caption", + mediaUrl: "/tmp/agent-root/image.png", + filename: "image.png", + replyTo: "message-1", + silent: true, + __sessionKey: "session-1", + __agentId: "agent-1", + }), + expect.any(Object), + { + mediaAccess, + mediaLocalRoots: ["/tmp/agent-root"], + mediaReadFile, + }, + ); + }); + + it("falls back to Discord toolContext.currentChannelId for upload-file", async () => { + await handleDiscordMessageAction({ + action: "upload-file", + params: { + path: "/tmp/agent-root/image.png", + }, + cfg: { + channels: { discord: { token: "tok" } }, + } as OpenClawConfig, + toolContext: { + currentChannelProvider: "discord", + currentChannelId: "channel:123", + }, + }); + + expect(handleDiscordActionMock).toHaveBeenCalledWith( + expect.objectContaining({ + action: "sendMessage", + to: "channel:123", + content: "", + mediaUrl: "/tmp/agent-root/image.png", + }), + expect.any(Object), + expect.any(Object), + ); + }); + + it("requires a file path for upload-file", async () => { + await expect( + handleDiscordMessageAction({ + action: "upload-file", + params: { + to: "channel:123", + }, + cfg: { + channels: { discord: { token: "tok" } }, + } as OpenClawConfig, + }), + ).rejects.toThrow(/upload-file requires filePath, path, or media/i); + + expect(handleDiscordActionMock).not.toHaveBeenCalled(); + }); + it("does not use another provider's current target for Discord sends", async () => { await expect( handleDiscordMessageAction({ diff --git a/extensions/discord/src/actions/handle-action.ts b/extensions/discord/src/actions/handle-action.ts index 1e7dcea7869..dda640aa26e 100644 --- a/extensions/discord/src/actions/handle-action.ts +++ b/extensions/discord/src/actions/handle-action.ts @@ -66,15 +66,19 @@ export async function handleDiscordMessageAction( return target; }; const resolveChannelId = () => resolveDiscordChannelId(readTarget()); - - if (action === "send") { - const to = + const readSendTarget = () => { + const target = readStringParam(params, "to") ?? readStringParam(params, "target") ?? readCurrentDiscordTarget(ctx.toolContext); - if (!to) { + if (!target) { throw new Error("Discord channel target is required (use channel:)."); } + return target; + }; + + if (action === "send") { + const to = readSendTarget(); const asVoice = readBooleanParam(params, "asVoice") === true; const rawComponents = buildDiscordPresentationComponents(normalizeMessagePresentation(params.presentation)) ?? @@ -83,15 +87,15 @@ export async function handleDiscordMessageAction( Boolean(rawComponents) && (typeof rawComponents === "function" || typeof rawComponents === "object"); const components = hasComponents ? rawComponents : undefined; - const content = readStringParam(params, "message", { - required: !asVoice && !hasComponents, - allowEmpty: true, - }); // Support media, path, and filePath for media URL const mediaUrl = readStringParam(params, "media", { trim: false }) ?? readStringParam(params, "path", { trim: false }) ?? readStringParam(params, "filePath", { trim: false }); + const content = readStringParam(params, "message", { + required: !asVoice && !hasComponents && !mediaUrl, + allowEmpty: true, + }); const filename = readStringParam(params, "filename"); const replyTo = readStringParam(params, "replyTo"); const rawEmbeds = params.embeds; @@ -104,7 +108,7 @@ export async function handleDiscordMessageAction( action: "sendMessage", accountId: accountId ?? undefined, to, - content, + content: content ?? "", mediaUrl: mediaUrl ?? undefined, filename: filename ?? undefined, replyTo: replyTo ?? undefined, @@ -120,6 +124,41 @@ export async function handleDiscordMessageAction( ); } + if (action === "upload-file") { + const to = readSendTarget(); + const mediaUrl = + readStringParam(params, "filePath", { trim: false }) ?? + readStringParam(params, "path", { trim: false }) ?? + readStringParam(params, "media", { trim: false }); + if (!mediaUrl) { + throw new Error("upload-file requires filePath, path, or media."); + } + const content = + readStringParam(params, "message", { allowEmpty: true }) ?? + readStringParam(params, "content", { allowEmpty: true }); + const filename = readStringParam(params, "filename"); + const replyTo = readStringParam(params, "replyTo"); + const silent = readBooleanParam(params, "silent") === true; + const sessionKey = readStringParam(params, "__sessionKey"); + const agentId = readStringParam(params, "__agentId"); + return await handleDiscordAction( + { + action: "sendMessage", + accountId: accountId ?? undefined, + to, + content: content ?? "", + mediaUrl, + filename: filename ?? undefined, + replyTo: replyTo ?? undefined, + silent, + __sessionKey: sessionKey ?? undefined, + __agentId: agentId ?? undefined, + }, + cfg, + actionOptions, + ); + } + if (action === "poll") { const to = readStringParam(params, "to", { required: true }); const question = readStringParam(params, "pollQuestion", { diff --git a/extensions/discord/src/actions/runtime.messaging.send.ts b/extensions/discord/src/actions/runtime.messaging.send.ts index 9d34723bcbe..35814685408 100644 --- a/extensions/discord/src/actions/runtime.messaging.send.ts +++ b/extensions/discord/src/actions/runtime.messaging.send.ts @@ -78,14 +78,14 @@ export async function handleDiscordMessageSendAction(ctx: DiscordMessagingAction Array.isArray(rawComponents) || typeof rawComponents === "function" ? (rawComponents as DiscordSendComponents) : undefined; - const content = readStringParam(ctx.params, "content", { - required: !asVoice && !componentSpec && !components, - allowEmpty: true, - }); const mediaUrl = readStringParam(ctx.params, "mediaUrl", { trim: false }) ?? readStringParam(ctx.params, "path", { trim: false }) ?? readStringParam(ctx.params, "filePath", { trim: false }); + const content = readStringParam(ctx.params, "content", { + required: !asVoice && !componentSpec && !components && !mediaUrl, + allowEmpty: true, + }); const filename = readStringParam(ctx.params, "filename"); const replyTo = readStringParam(ctx.params, "replyTo"); const rawEmbeds = ctx.params.embeds; @@ -117,6 +117,9 @@ export async function handleDiscordMessageSendAction(ctx: DiscordMessagingAction agentId: agentId ?? undefined, mediaUrl: mediaUrl ?? undefined, filename: filename ?? undefined, + mediaAccess: ctx.options?.mediaAccess, + mediaLocalRoots: ctx.options?.mediaLocalRoots, + mediaReadFile: ctx.options?.mediaReadFile, }, ); return jsonResult({ ok: true, result, components: true }); @@ -144,6 +147,7 @@ export async function handleDiscordMessageSendAction(ctx: DiscordMessagingAction const result = await discordMessagingActionRuntime.sendMessageDiscord(to, content ?? "", { ...ctx.withOpts(), + mediaAccess: ctx.options?.mediaAccess, mediaUrl, filename: filename ?? undefined, mediaLocalRoots: ctx.options?.mediaLocalRoots, diff --git a/extensions/discord/src/actions/runtime.messaging.shared.ts b/extensions/discord/src/actions/runtime.messaging.shared.ts index 2548846a4ac..b38bda26b1f 100644 --- a/extensions/discord/src/actions/runtime.messaging.shared.ts +++ b/extensions/discord/src/actions/runtime.messaging.shared.ts @@ -12,6 +12,11 @@ import { discordMessagingActionRuntime } from "./runtime.messaging.runtime.js"; import { createDiscordActionOptions } from "./runtime.shared.js"; export type DiscordMessagingActionOptions = { + mediaAccess?: { + localRoots?: readonly string[]; + readFile?: (filePath: string) => Promise; + workspaceDir?: string; + }; mediaLocalRoots?: readonly string[]; mediaReadFile?: (filePath: string) => Promise; }; diff --git a/extensions/discord/src/actions/runtime.test.ts b/extensions/discord/src/actions/runtime.test.ts index 97cc5e3461e..6e6cb503414 100644 --- a/extensions/discord/src/actions/runtime.test.ts +++ b/extensions/discord/src/actions/runtime.test.ts @@ -93,6 +93,11 @@ function handleMessagingAction( isActionEnabled: (key: keyof DiscordActionConfig) => boolean, cfg: OpenClawConfig = DISCORD_TEST_CFG, options?: { + mediaAccess?: { + localRoots?: readonly string[]; + readFile?: (filePath: string) => Promise; + workspaceDir?: string; + }; mediaLocalRoots?: readonly string[]; mediaReadFile?: (filePath: string) => Promise; }, @@ -463,6 +468,8 @@ describe("handleDiscordMessagingAction", () => { it("forwards trusted mediaLocalRoots into sendMessageDiscord", async () => { sendMessageDiscord.mockClear(); + const mediaReadFile = vi.fn(async () => Buffer.from("image")); + const mediaAccess = { localRoots: ["/tmp/agent-root"], readFile: mediaReadFile }; await handleMessagingAction( "sendMessage", { @@ -472,11 +479,35 @@ describe("handleDiscordMessagingAction", () => { }, enableAllActions, DISCORD_TEST_CFG, - { mediaLocalRoots: ["/tmp/agent-root"] }, + { mediaAccess, mediaLocalRoots: ["/tmp/agent-root"], mediaReadFile }, ); expect(sendMessageDiscord).toHaveBeenCalledWith( "channel:123", "hello", + expect.objectContaining({ + mediaAccess, + mediaUrl: "/tmp/image.png", + mediaLocalRoots: ["/tmp/agent-root"], + mediaReadFile, + }), + ); + }); + + it("allows media-only message sends", async () => { + sendMessageDiscord.mockClear(); + await handleMessagingAction( + "sendMessage", + { + to: "channel:123", + mediaUrl: "/tmp/image.png", + }, + enableAllActions, + DISCORD_TEST_CFG, + { mediaLocalRoots: ["/tmp/agent-root"] }, + ); + expect(sendMessageDiscord).toHaveBeenCalledWith( + "channel:123", + "", expect.objectContaining({ mediaUrl: "/tmp/image.png", mediaLocalRoots: ["/tmp/agent-root"], diff --git a/extensions/discord/src/actions/runtime.ts b/extensions/discord/src/actions/runtime.ts index 63ec13aa5bc..fea20b59521 100644 --- a/extensions/discord/src/actions/runtime.ts +++ b/extensions/discord/src/actions/runtime.ts @@ -58,7 +58,13 @@ export async function handleDiscordAction( params: Record, cfg: OpenClawConfig, options?: { + mediaAccess?: { + localRoots?: readonly string[]; + readFile?: (filePath: string) => Promise; + workspaceDir?: string; + }; mediaLocalRoots?: readonly string[]; + mediaReadFile?: (filePath: string) => Promise; }, ): Promise> { const action = readStringParam(params, "action", { required: true }); diff --git a/extensions/discord/src/channel-actions.test.ts b/extensions/discord/src/channel-actions.test.ts index e19a9a71baf..80b5cf4013a 100644 --- a/extensions/discord/src/channel-actions.test.ts +++ b/extensions/discord/src/channel-actions.test.ts @@ -55,7 +55,15 @@ describe("discordMessageActions", () => { expect(discovery?.capabilities).toEqual(["presentation"]); expect(discovery?.schema).toBeUndefined(); expect(discovery?.actions).toEqual( - expect.arrayContaining(["send", "poll", "react", "reactions", "emoji-list", "permissions"]), + expect.arrayContaining([ + "send", + "upload-file", + "poll", + "react", + "reactions", + "emoji-list", + "permissions", + ]), ); expect(discovery?.actions).not.toContain("channel-create"); expect(discovery?.actions).not.toContain("role-add"); @@ -144,13 +152,35 @@ describe("discordMessageActions", () => { }); expect(defaultDiscovery?.actions).toEqual(expect.arrayContaining(["send", "poll"])); + expect(defaultDiscovery?.actions).toContain("upload-file"); expect(defaultDiscovery?.actions).not.toContain("react"); expect(workDiscovery?.actions).toEqual( - expect.arrayContaining(["send", "react", "reactions", "emoji-list"]), + expect.arrayContaining(["send", "upload-file", "react", "reactions", "emoji-list"]), ); expect(workDiscovery?.actions).not.toContain("poll"); }); + it("hides upload-file when Discord message actions are disabled", () => { + const discovery = discordMessageActions.describeMessageTool?.({ + cfg: { + channels: { + discord: { + token: "Bot token-main", + actions: { + messages: false, + }, + }, + }, + } as OpenClawConfig, + }); + + expect(discovery?.actions).toContain("send"); + expect(discovery?.actions).not.toContain("upload-file"); + expect(discovery?.actions).not.toContain("read"); + expect(discovery?.actions).not.toContain("edit"); + expect(discovery?.actions).not.toContain("delete"); + }); + it("does not expose Discord-native message tool schema", () => { const discovery = discordMessageActions.describeMessageTool?.({ cfg: { @@ -170,7 +200,7 @@ describe("discordMessageActions", () => { ); }); - it.each(["send", "edit", "delete", "react", "pin", "poll"])( + it.each(["send", "upload-file", "edit", "delete", "react", "pin", "poll"])( "routes %s actions through local execution mode", (action) => { expect(discordMessageActions.resolveExecutionMode?.({ action: action as never })).toBe( @@ -210,6 +240,11 @@ describe("discordMessageActions", () => { const toolContext: ChannelMessageActionContext["toolContext"] = { currentChannelProvider: "discord", }; + const mediaReadFile = vi.fn(async () => Buffer.from("image")); + const mediaAccess: NonNullable = { + localRoots: ["/tmp/media"], + readFile: mediaReadFile, + }; const mediaLocalRoots = ["/tmp/media"]; await discordMessageActions.handleAction?.({ @@ -220,7 +255,9 @@ describe("discordMessageActions", () => { accountId: "ops", requesterSenderId: "user-1", toolContext, + mediaAccess, mediaLocalRoots, + mediaReadFile, }); expect(handleDiscordMessageActionMock).toHaveBeenCalledWith({ @@ -230,7 +267,9 @@ describe("discordMessageActions", () => { accountId: "ops", requesterSenderId: "user-1", toolContext, + mediaAccess, mediaLocalRoots, + mediaReadFile, }); }); }); diff --git a/extensions/discord/src/channel-actions.ts b/extensions/discord/src/channel-actions.ts index 6f7f1f76fbe..04484e00a5e 100644 --- a/extensions/discord/src/channel-actions.ts +++ b/extensions/discord/src/channel-actions.ts @@ -86,6 +86,7 @@ function describeDiscordMessageTool({ actions.add("emoji-list"); } if (discovery.isEnabled("messages")) { + actions.add("upload-file"); actions.add("read"); actions.add("edit"); actions.add("delete"); @@ -181,7 +182,9 @@ export const discordMessageActions: ChannelMessageActionAdapter = { accountId, requesterSenderId, toolContext, + mediaAccess, mediaLocalRoots, + mediaReadFile, }) => { return await ( await loadDiscordChannelActionsRuntime() @@ -192,7 +195,9 @@ export const discordMessageActions: ChannelMessageActionAdapter = { accountId, requesterSenderId, toolContext, + mediaAccess, mediaLocalRoots, + mediaReadFile, }); }, }; diff --git a/extensions/discord/src/send.components.ts b/extensions/discord/src/send.components.ts index dbd6a06e890..b85f04b7488 100644 --- a/extensions/discord/src/send.components.ts +++ b/extensions/discord/src/send.components.ts @@ -1,6 +1,7 @@ import { ChannelType } from "discord-api-types/v10"; import { recordChannelActivity } from "openclaw/plugin-sdk/channel-activity-runtime"; import type { MarkdownTableMode, OpenClawConfig } from "openclaw/plugin-sdk/config-types"; +import type { OutboundMediaAccess } from "openclaw/plugin-sdk/media-runtime"; import { requireRuntimeConfig } from "openclaw/plugin-sdk/plugin-config-runtime"; import type { ChunkMode } from "openclaw/plugin-sdk/reply-chunking"; import { resolveDiscordAccount } from "./accounts.js"; @@ -154,10 +155,7 @@ type DiscordComponentSendOpts = { sessionKey?: string; agentId?: string; mediaUrl?: string; - mediaAccess?: { - localRoots?: readonly string[]; - readFile?: (filePath: string) => Promise; - }; + mediaAccess?: OutboundMediaAccess; mediaLocalRoots?: readonly string[]; mediaReadFile?: (filePath: string) => Promise; filename?: string; diff --git a/extensions/discord/src/send.outbound.ts b/extensions/discord/src/send.outbound.ts index 9ed92d9fb23..fabcde6a425 100644 --- a/extensions/discord/src/send.outbound.ts +++ b/extensions/discord/src/send.outbound.ts @@ -2,7 +2,7 @@ import { ChannelType } from "discord-api-types/v10"; import { recordChannelActivity } from "openclaw/plugin-sdk/channel-activity-runtime"; import type { MarkdownTableMode, OpenClawConfig } from "openclaw/plugin-sdk/config-types"; import { resolveMarkdownTableMode } from "openclaw/plugin-sdk/markdown-table-runtime"; -import type { PollInput } from "openclaw/plugin-sdk/media-runtime"; +import type { OutboundMediaAccess, PollInput } from "openclaw/plugin-sdk/media-runtime"; import { requireRuntimeConfig } from "openclaw/plugin-sdk/plugin-config-runtime"; import { resolveChunkMode, type ChunkMode } from "openclaw/plugin-sdk/reply-chunking"; import type { RetryConfig } from "openclaw/plugin-sdk/retry-runtime"; @@ -35,10 +35,7 @@ type DiscordSendOpts = { accountId?: string; mediaUrl?: string; filename?: string; - mediaAccess?: { - localRoots?: readonly string[]; - readFile?: (filePath: string) => Promise; - }; + mediaAccess?: OutboundMediaAccess; mediaLocalRoots?: readonly string[]; mediaReadFile?: (filePath: string) => Promise; verbose?: boolean; @@ -225,6 +222,7 @@ export async function sendMessageDiscord( mediaCaption ?? "", opts.mediaUrl, opts.filename, + opts.mediaAccess, opts.mediaLocalRoots, opts.mediaReadFile, mediaMaxBytes, @@ -292,6 +290,7 @@ export async function sendMessageDiscord( textWithMentions, opts.mediaUrl, opts.filename, + opts.mediaAccess, opts.mediaLocalRoots, opts.mediaReadFile, mediaMaxBytes, diff --git a/extensions/discord/src/send.sends-basic-channel-messages.test.ts b/extensions/discord/src/send.sends-basic-channel-messages.test.ts index e776b8cd586..f82e88e7a35 100644 --- a/extensions/discord/src/send.sends-basic-channel-messages.test.ts +++ b/extensions/discord/src/send.sends-basic-channel-messages.test.ts @@ -444,6 +444,28 @@ describe("sendMessageDiscord", () => { ); }); + it("passes mediaAccess workspaceDir when loading relative media attachments", async () => { + const { rest, postMock } = makeDiscordRest(); + postMock.mockResolvedValue({ id: "msg", channel_id: "789" }); + + await sendMessageDiscord("channel:789", "", { + rest, + token: "t", + cfg: DISCORD_TEST_CFG, + mediaUrl: "chart.png", + mediaAccess: { + workspaceDir: "/tmp/agent-workspace", + }, + }); + + expect(loadWebMedia).toHaveBeenCalledWith( + "chart.png", + expect.objectContaining({ + workspaceDir: "/tmp/agent-workspace", + }), + ); + }); + it("prefers the caller-provided filename for media attachments", async () => { const { rest, postMock } = makeDiscordRest(); postMock.mockResolvedValue({ id: "msg", channel_id: "789" }); diff --git a/extensions/discord/src/send.shared.ts b/extensions/discord/src/send.shared.ts index b5b549c2c4e..a3e5cff2e7e 100644 --- a/extensions/discord/src/send.shared.ts +++ b/extensions/discord/src/send.shared.ts @@ -7,6 +7,7 @@ import { extensionForMime } from "openclaw/plugin-sdk/media-runtime"; import { normalizePollDurationHours, normalizePollInput, + type OutboundMediaAccess, type PollInput, } from "openclaw/plugin-sdk/media-runtime"; import { requireRuntimeConfig } from "openclaw/plugin-sdk/plugin-config-runtime"; @@ -345,6 +346,7 @@ async function sendDiscordMedia( text: string, mediaUrl: string, filename: string | undefined, + mediaAccess: OutboundMediaAccess | undefined, mediaLocalRoots: readonly string[] | undefined, mediaReadFile: ((filePath: string) => Promise) | undefined, maxBytes: number | undefined, @@ -359,7 +361,7 @@ async function sendDiscordMedia( ) { const media = await loadWebMedia( mediaUrl, - buildOutboundMediaLoadOptions({ maxBytes, mediaLocalRoots, mediaReadFile }), + buildOutboundMediaLoadOptions({ maxBytes, mediaAccess, mediaLocalRoots, mediaReadFile }), ); const requestedFileName = filename?.trim(); const resolvedFileName =