From 6603a174bcc6d68213ee4e0a05c833f791dd87f0 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Fri, 1 May 2026 03:48:27 +0100 Subject: [PATCH] fix: keep async music delivery agent-mediated (#75335) Keep async music generation completions on the requester-session wake path even when direct-send completion is enabled. Also aligns config help, generated schema text, public docs, and the changelog so tools.media.asyncCompletion.directSend no longer claims to direct-send music completions. Verification: - pnpm test src/agents/tools/music-generate-background.test.ts src/agents/tools/video-generate-background.test.ts - pnpm exec oxfmt --check --threads=1 src/agents/tools/media-generate-background-shared.ts src/agents/tools/music-generate-background.ts src/agents/tools/music-generate-background.test.ts src/config/schema.help.ts src/config/types.tools.ts docs/automation/tasks.md docs/gateway/config-tools.md CHANGELOG.md - pnpm config:schema:check - pnpm config:docs:check - pnpm check:changelog-attributions - git diff --check - OPENCLAW_TESTBOX=1 pnpm check:changed --- CHANGELOG.md | 1 + docs/.generated/config-baseline.sha256 | 4 +-- docs/automation/tasks.md | 4 +-- docs/gateway/config-tools.md | 4 +-- .../tools/media-generate-background-shared.ts | 22 ++++++++++++--- .../tools/music-generate-background.test.ts | 27 ++----------------- src/agents/tools/music-generate-background.ts | 1 + src/config/schema.base.generated.ts | 4 +-- src/config/schema.help.ts | 2 +- src/config/types.tools.ts | 3 ++- 10 files changed, 34 insertions(+), 38 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f6a21d64725..efabd1c82b7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ Docs: https://docs.openclaw.ai - Agents/commitments: keep inferred follow-ups internal when heartbeat target is none, strip raw source text from stored commitments, disable tools during due-commitment heartbeat turns, bound hidden extraction queue growth, expire stale commitments, and add QA/Docker safety coverage. Thanks @vignesh07. - Agents/commitments: run hidden follow-up extraction on the configured agent/default model instead of falling back to direct OpenAI, so OpenAI Codex OAuth-only gateways no longer spam background API-key failures. Fixes #75334. Thanks @sene1337. +- Agents/media: keep async music generation completions on the requester-session wake path even when direct-send completion is enabled, so finished audio stays agent-mediated while video can still opt into direct channel delivery. (#75335) Thanks @vincentkoc. - Security/config-audit: redact CLI argv and execArgv secrets before persisting config audit records, covering write, observe, and recovery paths. Fixes #60826. Thanks @koshaji. - Gateway/models: keep default and configured model-list views responsive when provider catalog discovery stalls, without hiding real catalog load failures, while `--all` still waits for the exact full catalog. Fixes #75297; refs #74404. Thanks @lisandromachado and @najef1979-code. - Plugins/runtime-deps: accept already materialized package-level runtime-deps supersets as converged, so later lazy plugin activation no longer prunes and relaunches `pnpm install` after gateway startup pre-staging, reducing event-loop pressure from repeated runtime-deps repair on packaged installs. Fixes #75283; refs #75297 and #72338. Thanks @brokemac79, @lisandromachado, and @midhunmonachan. diff --git a/docs/.generated/config-baseline.sha256 b/docs/.generated/config-baseline.sha256 index e96d3eb8f50..3f6a354abc6 100644 --- a/docs/.generated/config-baseline.sha256 +++ b/docs/.generated/config-baseline.sha256 @@ -1,4 +1,4 @@ -f2f5dc47ab9572fa5f80eb01b5a176edb04ca91c7a25bea3b9ea8e19dd21904b config-baseline.json -d81f9cadab9762a4b542795ed1f01f27e374f9811cf176f08cbbb7a20b044c15 config-baseline.core.json +a69e6b650513c2a697ee51087928bf78f63ba998c7c60f8cca61dd65a0184fd0 config-baseline.json +0a259216178a582c567d1fa48c5236bff4bbd27c3e6af838ffcd042459ffce3c config-baseline.core.json 92712871defa92eeda8161b516db85574681f2b70678b940508a808b987aeae2 config-baseline.channel.json 6005cf9f6e8c9f25ef97207b5eee29ae0e506cf910cdeca77fc9894ad1755b1f config-baseline.plugin.json diff --git a/docs/automation/tasks.md b/docs/automation/tasks.md index c7076ae945e..c908659767e 100644 --- a/docs/automation/tasks.md +++ b/docs/automation/tasks.md @@ -96,13 +96,13 @@ Not every agent run creates a task. Heartbeat turns and normal interactive chat | Subagent orchestration | `subagent` | Spawning a subagent via `sessions_spawn` | `done_only` | | Cron jobs (all types) | `cron` | Every cron execution (main-session and isolated) | `silent` | | CLI operations | `cli` | `openclaw agent` commands that run through the gateway | `silent` | -| Agent media jobs | `cli` | Session-backed `video_generate` runs | `silent` | +| Agent media jobs | `cli` | Session-backed `music_generate`/`video_generate` runs | `silent` | Main-session cron tasks use `silent` notify policy by default — they create records for tracking but do not generate notifications. Isolated cron tasks also default to `silent` but are more visible because they run in their own session. - Session-backed `video_generate` runs also use `silent` notify policy. They still create task records, but completion is handed back to the original agent session as an internal wake so the agent can write the follow-up message and attach the finished video itself. If you opt into `tools.media.asyncCompletion.directSend`, async `music_generate` and `video_generate` completions try direct channel delivery first before falling back to the requester-session wake path. + Session-backed `music_generate` and `video_generate` runs also use `silent` notify policy. They still create task records, but completion is handed back to the original agent session as an internal wake so the agent can write the follow-up message and attach the finished media itself. If you opt into `tools.media.asyncCompletion.directSend`, async `video_generate` completions can try direct channel delivery first; async `music_generate` completions stay on the requester-session wake path. diff --git a/docs/gateway/config-tools.md b/docs/gateway/config-tools.md index ebf7969d2c2..82f2a4157b7 100644 --- a/docs/gateway/config-tools.md +++ b/docs/gateway/config-tools.md @@ -201,7 +201,7 @@ Configures inbound media understanding (image/audio/video): media: { concurrency: 2, asyncCompletion: { - directSend: false, // opt-in: send finished async music/video directly to the channel + directSend: false, // opt-in: send finished async video directly to the channel }, audio: { enabled: true, @@ -254,7 +254,7 @@ Configures inbound media understanding (image/audio/video): **Async completion fields:** - - `asyncCompletion.directSend`: when `true`, completed async `music_generate` and `video_generate` tasks try direct channel delivery first. Default: `false` (legacy requester-session wake/model-delivery path). + - `asyncCompletion.directSend`: when `true`, completed async media tasks that support direct completion delivery try direct channel delivery first. Default: `false` (requester-session wake/model-delivery path). Today this applies to async `video_generate`; async `music_generate` completions stay requester-session mediated even when this is enabled. diff --git a/src/agents/tools/media-generate-background-shared.ts b/src/agents/tools/media-generate-background-shared.ts index 34816c0293b..8ffa957fd83 100644 --- a/src/agents/tools/media-generate-background-shared.ts +++ b/src/agents/tools/media-generate-background-shared.ts @@ -65,6 +65,8 @@ type WakeMediaGenerationTaskCompletionParams = { statsLine?: string; }; +type MediaGenerationDirectCompletionDelivery = "config" | "disabled"; + function touchMediaGenerationTaskRunContext(handle: MediaGenerationTaskHandle) { registerAgentRunContext(handle.runId, { sessionKey: handle.requesterSessionKey, @@ -242,8 +244,14 @@ function buildMediaGenerationReplyInstruction(params: { ].join(" "); } -function isAsyncMediaDirectSendEnabled(config: OpenClawConfig | undefined): boolean { - return config?.tools?.media?.asyncCompletion?.directSend === true; +function isAsyncMediaDirectSendEnabled(params: { + config: OpenClawConfig | undefined; + directCompletionDelivery: MediaGenerationDirectCompletionDelivery; +}): boolean { + if (params.directCompletionDelivery === "disabled") { + return false; + } + return params.config?.tools?.media?.asyncCompletion?.directSend === true; } async function maybeDeliverMediaGenerationResultDirectly(params: { @@ -296,12 +304,18 @@ export async function wakeMediaGenerationTaskCompletion(params: { announceType: string; toolName: string; completionLabel: string; + directCompletionDelivery: MediaGenerationDirectCompletionDelivery; }) { if (!params.handle) { return; } const announceId = `${params.toolName}:${params.handle.taskId}:${params.status}`; - if (isAsyncMediaDirectSendEnabled(params.config)) { + if ( + isAsyncMediaDirectSendEnabled({ + config: params.config, + directCompletionDelivery: params.directCompletionDelivery, + }) + ) { try { const deliveredDirect = await maybeDeliverMediaGenerationResultDirectly({ handle: params.handle, @@ -383,6 +397,7 @@ export function createMediaGenerationTaskLifecycle(params: { eventSource: AgentInternalEvent["source"]; announceType: string; completionLabel: string; + directCompletionDelivery?: MediaGenerationDirectCompletionDelivery; }) { return { createTaskRun(runParams: CreateMediaGenerationTaskRunParams): MediaGenerationTaskHandle | null { @@ -420,6 +435,7 @@ export function createMediaGenerationTaskLifecycle(params: { announceType: params.announceType, toolName: params.toolName, completionLabel: params.completionLabel, + directCompletionDelivery: params.directCompletionDelivery ?? "config", }); }, }; diff --git a/src/agents/tools/music-generate-background.test.ts b/src/agents/tools/music-generate-background.test.ts index 36aa909a4dc..260ba3411da 100644 --- a/src/agents/tools/music-generate-background.test.ts +++ b/src/agents/tools/music-generate-background.test.ts @@ -3,7 +3,6 @@ import { MUSIC_GENERATION_TASK_KIND } from "../music-generation-task-status.js"; import { announceDeliveryMocks, createMediaCompletionFixture, - expectDirectMediaSend, expectFallbackMediaAnnouncement, expectQueuedTaskRun, expectRecordedTaskProgress, @@ -96,34 +95,11 @@ describe("music generate background helpers", () => { expect(announceDeliveryMocks.deliverSubagentAnnouncement).toHaveBeenCalled(); }); - it("delivers completed music directly to the requester channel when enabled", async () => { + it("queues a completion event when direct send is enabled globally", async () => { taskDeliveryRuntimeMocks.sendMessage.mockResolvedValue({ channel: "discord", messageId: "msg-1", }); - - await wakeMusicGenerationTaskCompletion({ - ...createMediaCompletionFixture({ - directSend: true, - runId: "tool:music_generate:abc", - taskLabel: "night-drive synthwave", - result: "Generated 1 track.\nMEDIA:/tmp/generated-night-drive.mp3", - }), - }); - - expectDirectMediaSend({ - sendMessageMock: taskDeliveryRuntimeMocks.sendMessage, - channel: "discord", - to: "channel:1", - threadId: "thread-1", - content: "Generated 1 track.", - mediaUrls: ["/tmp/generated-night-drive.mp3"], - }); - expect(announceDeliveryMocks.deliverSubagentAnnouncement).not.toHaveBeenCalled(); - }); - - it("falls back to a music-generation completion event when direct delivery fails", async () => { - taskDeliveryRuntimeMocks.sendMessage.mockRejectedValue(new Error("discord upload failed")); announceDeliveryMocks.deliverSubagentAnnouncement.mockResolvedValue({ delivered: true, path: "direct", @@ -139,6 +115,7 @@ describe("music generate background helpers", () => { }), }); + expect(taskDeliveryRuntimeMocks.sendMessage).not.toHaveBeenCalled(); expectFallbackMediaAnnouncement({ deliverAnnouncementMock: announceDeliveryMocks.deliverSubagentAnnouncement, requesterSessionKey: "agent:main:discord:direct:123", diff --git a/src/agents/tools/music-generate-background.ts b/src/agents/tools/music-generate-background.ts index 9c07bd0c71e..b5ec7cd8d66 100644 --- a/src/agents/tools/music-generate-background.ts +++ b/src/agents/tools/music-generate-background.ts @@ -17,6 +17,7 @@ const musicGenerationTaskLifecycle = createMediaGenerationTaskLifecycle({ eventSource: "music_generation", announceType: "music generation task", completionLabel: "music", + directCompletionDelivery: "disabled", }); export const createMusicGenerationTaskRun = ( diff --git a/src/config/schema.base.generated.ts b/src/config/schema.base.generated.ts index b78cbcac3d3..0e95499a7d1 100644 --- a/src/config/schema.base.generated.ts +++ b/src/config/schema.base.generated.ts @@ -10100,7 +10100,7 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = { type: "boolean", title: "Async Media Completion Direct Send", description: - "Enable direct channel sends for completed async music/video generation tasks instead of relying on the requester session wake path. Default off so detached media completion keeps the legacy model-delivery flow unless you opt in.", + "Enable direct channel sends for completed async media generation tasks that support direct completion delivery. Currently this applies to video generation; music generation always stays requester-session mediated. Default off so detached media completion uses the requester session wake path.", }, }, additionalProperties: false, @@ -25135,7 +25135,7 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = { }, "tools.media.asyncCompletion.directSend": { label: "Async Media Completion Direct Send", - help: "Enable direct channel sends for completed async music/video generation tasks instead of relying on the requester session wake path. Default off so detached media completion keeps the legacy model-delivery flow unless you opt in.", + help: "Enable direct channel sends for completed async media generation tasks that support direct completion delivery. Currently this applies to video generation; music generation always stays requester-session mediated. Default off so detached media completion uses the requester session wake path.", tags: ["storage", "media", "tools"], }, "tools.media.audio.enabled": { diff --git a/src/config/schema.help.ts b/src/config/schema.help.ts index 54f8363bbf4..ca8b1540d04 100644 --- a/src/config/schema.help.ts +++ b/src/config/schema.help.ts @@ -698,7 +698,7 @@ export const FIELD_HELP: Record = { "tools.media.concurrency": "Maximum number of concurrent media understanding operations per turn across image, audio, and video tasks. Lower this in resource-constrained deployments to prevent CPU/network saturation.", "tools.media.asyncCompletion.directSend": - "Enable direct channel sends for completed async music/video generation tasks instead of relying on the requester session wake path. Default off so detached media completion keeps the legacy model-delivery flow unless you opt in.", + "Enable direct channel sends for completed async media generation tasks that support direct completion delivery. Currently this applies to video generation; music generation always stays requester-session mediated. Default off so detached media completion uses the requester session wake path.", "tools.media.image.enabled": "Enable image understanding so attached or referenced images can be interpreted into textual context. Disable if you need text-only operation or want to avoid image-processing cost.", "tools.media.image.maxBytes": diff --git a/src/config/types.tools.ts b/src/config/types.tools.ts index 9aae9945e92..11701b4b5c0 100644 --- a/src/config/types.tools.ts +++ b/src/config/types.tools.ts @@ -145,7 +145,8 @@ export type MediaToolsConfig = { concurrency?: number; asyncCompletion?: { /** - * Enable direct channel sends for completed async media generation tasks. + * Enable direct channel sends for async media generation tasks that support + * direct completion delivery. Music generation stays requester-session mediated. * Default: false. */ directSend?: boolean;