From aeb9ad52fa374b915cd4f740ecd4b284b0034b29 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Mon, 6 Apr 2026 01:42:46 +0100 Subject: [PATCH] feat: add comfy workflow media support --- CHANGELOG.md | 1 + docs/docs.json | 5 +- docs/help/testing.md | 9 + docs/providers/comfy.md | 202 +++++ docs/providers/index.md | 2 + docs/providers/models.md | 1 + docs/tools/image-generation.md | 34 +- docs/tools/index.md | 4 + docs/tools/music-generation.md | 59 ++ docs/tools/video-generation.md | 31 +- extensions/comfy/comfy.live.test.ts | 145 ++++ .../comfy/image-generation-provider.test.ts | 355 ++++++++ extensions/comfy/image-generation-provider.ts | 79 ++ extensions/comfy/index.ts | 24 + extensions/comfy/music-generate-tool.test.ts | 108 +++ extensions/comfy/music-generate-tool.ts | 90 ++ extensions/comfy/openclaw.plugin.json | 104 +++ extensions/comfy/package.json | 12 + .../plugin-registration.contract.test.ts | 11 + .../comfy/video-generation-provider.test.ts | 240 ++++++ extensions/comfy/video-generation-provider.ts | 91 ++ extensions/comfy/workflow-runtime.ts | 783 ++++++++++++++++++ .../pi-embedded-subscribe.tools.media.test.ts | 4 + src/agents/pi-embedded-subscribe.tools.ts | 8 +- ...plugin-registration.comfy.contract.test.ts | 4 + .../plugin-registration-contract-cases.ts | 9 + vitest.extension-provider-paths.mjs | 1 + 27 files changed, 2384 insertions(+), 32 deletions(-) create mode 100644 docs/providers/comfy.md create mode 100644 docs/tools/music-generation.md create mode 100644 extensions/comfy/comfy.live.test.ts create mode 100644 extensions/comfy/image-generation-provider.test.ts create mode 100644 extensions/comfy/image-generation-provider.ts create mode 100644 extensions/comfy/index.ts create mode 100644 extensions/comfy/music-generate-tool.test.ts create mode 100644 extensions/comfy/music-generate-tool.ts create mode 100644 extensions/comfy/openclaw.plugin.json create mode 100644 extensions/comfy/package.json create mode 100644 extensions/comfy/plugin-registration.contract.test.ts create mode 100644 extensions/comfy/video-generation-provider.test.ts create mode 100644 extensions/comfy/video-generation-provider.ts create mode 100644 extensions/comfy/workflow-runtime.ts create mode 100644 src/plugins/contracts/plugin-registration.comfy.contract.test.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 2fc1bc1b104..4b41b8a38dd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ Docs: https://docs.openclaw.ai - Plugins: add plugin-config TUI prompts to guided onboarding/setup flows, and add `openclaw plugins install --force` so existing plugin and hook-pack targets can be replaced without using the dangerous-code override flag. (#60590, #60544) - Channels/context visibility: add configurable `contextVisibility` per channel (`all`, `allowlist`, `allowlist_quote`) so supplemental quote, thread, and fetched history context can be filtered by sender allowlists instead of always passing through as received. - Providers: add bundled Qwen, Fireworks AI, and StepFun providers, plus MiniMax TTS, Ollama Web Search, and MiniMax Search integrations for chat, speech, and search workflows. (#60032, #55921, #59318, #54648) +- Providers/ComfyUI: add a bundled `comfy` workflow media plugin for local ComfyUI and Comfy Cloud workflows, including shared `image_generate` and `video_generate` support plus a bundled `music_generate` tool for audio workflows, with prompt injection, optional reference-image upload, live tests, and output download. - Providers/Amazon Bedrock: add bundled Mantle support plus inference-profile discovery and automatic request-region injection so Bedrock-hosted Claude, GPT-OSS, Qwen, Kimi, GLM, and similar routes work with less manual setup. (#61296, #61299) Thanks @wirjo. - Providers/request overrides: add shared model and media request transport overrides across OpenAI-, Anthropic-, Google-, and compatible provider paths, including headers, auth, proxy, and TLS controls. (#60200) - Prompt caching: keep prompt prefixes more reusable across transport fallback, deterministic MCP tool ordering, compaction, embedded image history, normalized system-prompt fingerprints, `openclaw status --verbose` cache diagnostics, and the removal of duplicate in-band tool inventories from agent system prompts so follow-up turns hit cache more reliably. (#58036, #58037, #58038, #59054, #60603, #60691) Thanks @bcherny and @vincentkoc. diff --git a/docs/docs.json b/docs/docs.json index 372c92a92a5..3011e475c49 100644 --- a/docs/docs.json +++ b/docs/docs.json @@ -1200,12 +1200,14 @@ "tools/exec", "tools/exec-approvals", "tools/image-generation", + "tools/music-generation", "tools/llm-task", "tools/lobster", "tools/loop-detection", "tools/pdf", "tools/reactions", - "tools/thinking" + "tools/thinking", + "tools/video-generation" ] }, { @@ -1238,6 +1240,7 @@ "providers/bedrock", "providers/bedrock-mantle", "providers/chutes", + "providers/comfy", "providers/claude-max-api-proxy", "providers/cloudflare-ai-gateway", "providers/deepgram", diff --git a/docs/help/testing.md b/docs/help/testing.md index 7c143d1dc8f..687c6d7864e 100644 --- a/docs/help/testing.md +++ b/docs/help/testing.md @@ -389,6 +389,15 @@ If you want to rely on env keys (e.g. exported in your `~/.profile`), run local - Enable: `BYTEPLUS_API_KEY=... BYTEPLUS_LIVE_TEST=1 pnpm test:live src/agents/byteplus.live.test.ts` - Optional model override: `BYTEPLUS_CODING_MODEL=ark-code-latest` +## ComfyUI workflow media live + +- Test: `extensions/comfy/comfy.live.test.ts` +- Enable: `OPENCLAW_LIVE_TEST=1 COMFY_LIVE_TEST=1 pnpm test:live -- extensions/comfy/comfy.live.test.ts` +- Scope: + - Exercises the bundled comfy image, video, and `music_generate` paths + - Skips each capability unless `models.providers.comfy.` is configured + - Useful after changing comfy workflow submission, polling, downloads, or plugin registration + ## Image generation live - Test: `src/image-generation/runtime.live.test.ts` diff --git a/docs/providers/comfy.md b/docs/providers/comfy.md new file mode 100644 index 00000000000..0294f2f2ec8 --- /dev/null +++ b/docs/providers/comfy.md @@ -0,0 +1,202 @@ +--- +title: "ComfyUI" +summary: "ComfyUI workflow image, video, and music generation setup in OpenClaw" +read_when: + - You want to use local ComfyUI workflows with OpenClaw + - You want to use Comfy Cloud with image, video, or music workflows + - You need the bundled comfy plugin config keys +--- + +# ComfyUI + +OpenClaw ships a bundled `comfy` plugin for workflow-driven ComfyUI runs. + +- Provider: `comfy` +- Models: `comfy/workflow` +- Shared surfaces: `image_generate`, `video_generate` +- Plugin tool: `music_generate` +- Auth: none for local ComfyUI; `COMFY_API_KEY` or `COMFY_CLOUD_API_KEY` for Comfy Cloud +- API: ComfyUI `/prompt` / `/history` / `/view` and Comfy Cloud `/api/*` + +## What it supports + +- Image generation from a workflow JSON +- Image editing with 1 uploaded reference image +- Video generation from a workflow JSON +- Video generation with 1 uploaded reference image +- Music or audio generation through the bundled `music_generate` tool +- Output download from a configured node or all matching output nodes + +The bundled plugin is workflow-driven, so OpenClaw does not try to map generic +`size`, `aspectRatio`, `resolution`, `durationSeconds`, or TTS-style controls +onto your graph. + +## Config layout + +Comfy supports shared top-level connection settings plus per-capability workflow +sections: + +```json5 +{ + models: { + providers: { + comfy: { + mode: "local", + baseUrl: "http://127.0.0.1:8188", + image: { + workflowPath: "./workflows/flux-api.json", + promptNodeId: "6", + outputNodeId: "9", + }, + video: { + workflowPath: "./workflows/video-api.json", + promptNodeId: "12", + outputNodeId: "21", + }, + music: { + workflowPath: "./workflows/music-api.json", + promptNodeId: "3", + outputNodeId: "18", + }, + }, + }, + }, +} +``` + +Shared keys: + +- `mode`: `local` or `cloud` +- `baseUrl`: defaults to `http://127.0.0.1:8188` for local or `https://cloud.comfy.org` for cloud +- `apiKey`: optional inline key alternative to env vars +- `allowPrivateNetwork`: allow a private/LAN `baseUrl` in cloud mode + +Per-capability keys under `image`, `video`, or `music`: + +- `workflow` or `workflowPath`: required +- `promptNodeId`: required +- `promptInputName`: defaults to `text` +- `outputNodeId`: optional +- `pollIntervalMs`: optional +- `timeoutMs`: optional + +Image and video sections also support: + +- `inputImageNodeId`: required when you pass a reference image +- `inputImageInputName`: defaults to `image` + +## Backward compatibility + +Existing top-level image config still works: + +```json5 +{ + models: { + providers: { + comfy: { + workflowPath: "./workflows/flux-api.json", + promptNodeId: "6", + outputNodeId: "9", + }, + }, + }, +} +``` + +OpenClaw treats that legacy shape as the image workflow config. + +## Image workflows + +Set the default image model: + +```json5 +{ + agents: { + defaults: { + imageGenerationModel: { + primary: "comfy/workflow", + }, + }, + }, +} +``` + +Reference-image editing example: + +```json5 +{ + models: { + providers: { + comfy: { + image: { + workflowPath: "./workflows/edit-api.json", + promptNodeId: "6", + inputImageNodeId: "7", + inputImageInputName: "image", + outputNodeId: "9", + }, + }, + }, + }, +} +``` + +## Video workflows + +Set the default video model: + +```json5 +{ + agents: { + defaults: { + videoGenerationModel: { + primary: "comfy/workflow", + }, + }, + }, +} +``` + +Comfy video workflows currently support text-to-video and image-to-video through +the configured graph. OpenClaw does not pass input videos into Comfy workflows. + +## Music workflows + +The bundled plugin registers a `music_generate` tool for workflow-defined audio +or music outputs: + +```text +/tool music_generate prompt="Warm ambient synth loop with soft tape texture" +``` + +Use the `music` config section to point at your audio workflow JSON and output +node. + +## Comfy Cloud + +Use `mode: "cloud"` plus one of: + +- `COMFY_API_KEY` +- `COMFY_CLOUD_API_KEY` +- `models.providers.comfy.apiKey` + +Cloud mode still uses the same `image`, `video`, and `music` workflow sections. + +## Live tests + +Opt-in live coverage exists for the bundled plugin: + +```bash +OPENCLAW_LIVE_TEST=1 COMFY_LIVE_TEST=1 pnpm test:live -- extensions/comfy/comfy.live.test.ts +``` + +The live test skips individual image, video, or music cases unless the matching +Comfy workflow section is configured. + +## Related + +- [Image Generation](/tools/image-generation) +- [Video Generation](/tools/video-generation) +- [Music Generation](/tools/music-generation) +- [Provider Directory](/providers/index) +- [Configuration Reference](/gateway/configuration-reference#agent-defaults) diff --git a/docs/providers/index.md b/docs/providers/index.md index 91a8dc02372..c6c3d8d558b 100644 --- a/docs/providers/index.md +++ b/docs/providers/index.md @@ -31,6 +31,7 @@ Looking for chat channel docs (WhatsApp/Telegram/Discord/Slack/Mattermost (plugi - [Anthropic (API + Claude CLI)](/providers/anthropic) - [BytePlus (International)](/concepts/model-providers#byteplus-international) - [Chutes](/providers/chutes) +- [ComfyUI](/providers/comfy) - [Cloudflare AI Gateway](/providers/cloudflare-ai-gateway) - [DeepSeek](/providers/deepseek) - [fal](/providers/fal) @@ -71,6 +72,7 @@ Looking for chat channel docs (WhatsApp/Telegram/Discord/Slack/Mattermost (plugi - [Additional bundled variants](/providers/models#additional-bundled-provider-variants) - Anthropic Vertex, Copilot Proxy, and Gemini CLI OAuth - [Image Generation](/tools/image-generation) - Shared `image_generate` tool, provider selection, and failover +- [Music Generation](/tools/music-generation) - Plugin-provided `music_generate` tool surfaces - [Video Generation](/tools/video-generation) - Shared `video_generate` tool, provider selection, and failover ## Transcription providers diff --git a/docs/providers/models.md b/docs/providers/models.md index dc898a7a424..c18b1e48140 100644 --- a/docs/providers/models.md +++ b/docs/providers/models.md @@ -29,6 +29,7 @@ model as `provider/model`. - [Amazon Bedrock](/providers/bedrock) - [BytePlus (International)](/concepts/model-providers#byteplus-international) - [Chutes](/providers/chutes) +- [ComfyUI](/providers/comfy) - [Cloudflare AI Gateway](/providers/cloudflare-ai-gateway) - [fal](/providers/fal) - [Fireworks](/providers/fireworks) diff --git a/docs/tools/image-generation.md b/docs/tools/image-generation.md index 25b9f1bd5c9..4bcce21420f 100644 --- a/docs/tools/image-generation.md +++ b/docs/tools/image-generation.md @@ -1,5 +1,5 @@ --- -summary: "Generate and edit images using configured providers (OpenAI, Google Gemini, fal, MiniMax)" +summary: "Generate and edit images using configured providers (OpenAI, Google Gemini, fal, MiniMax, ComfyUI)" read_when: - Generating images via the agent - Configuring image generation providers and models @@ -38,12 +38,13 @@ The agent calls `image_generate` automatically. No tool allow-listing needed — ## Supported providers -| Provider | Default model | Edit support | API key | -| -------- | -------------------------------- | ----------------------- | ----------------------------------------------------- | -| OpenAI | `gpt-image-1` | Yes (up to 5 images) | `OPENAI_API_KEY` | -| Google | `gemini-3.1-flash-image-preview` | Yes | `GEMINI_API_KEY` or `GOOGLE_API_KEY` | -| fal | `fal-ai/flux/dev` | Yes | `FAL_KEY` | -| MiniMax | `image-01` | Yes (subject reference) | `MINIMAX_API_KEY` or MiniMax OAuth (`minimax-portal`) | +| Provider | Default model | Edit support | API key | +| -------- | -------------------------------- | ---------------------------------- | ----------------------------------------------------- | +| OpenAI | `gpt-image-1` | Yes (up to 5 images) | `OPENAI_API_KEY` | +| Google | `gemini-3.1-flash-image-preview` | Yes | `GEMINI_API_KEY` or `GOOGLE_API_KEY` | +| fal | `fal-ai/flux/dev` | Yes | `FAL_KEY` | +| MiniMax | `image-01` | Yes (subject reference) | `MINIMAX_API_KEY` or MiniMax OAuth (`minimax-portal`) | +| ComfyUI | `workflow` | Yes (1 image, workflow-configured) | `COMFY_API_KEY` or `COMFY_CLOUD_API_KEY` for cloud | Use `action: "list"` to inspect available providers and models at runtime: @@ -107,13 +108,13 @@ Notes: ### Image editing -OpenAI, Google, fal, and MiniMax support editing reference images. Pass a reference image path or URL: +OpenAI, Google, fal, MiniMax, and ComfyUI support editing reference images. Pass a reference image path or URL: ``` "Generate a watercolor version of this photo" + image: "/path/to/photo.jpg" ``` -OpenAI and Google support up to 5 reference images via the `images` parameter. fal and MiniMax support 1. +OpenAI and Google support up to 5 reference images via the `images` parameter. fal, MiniMax, and ComfyUI support 1. MiniMax image generation is available through both bundled MiniMax auth paths: @@ -122,18 +123,19 @@ MiniMax image generation is available through both bundled MiniMax auth paths: ## Provider capabilities -| Capability | OpenAI | Google | fal | MiniMax | -| --------------------- | -------------------- | -------------------- | ------------------- | -------------------------- | -| Generate | Yes (up to 4) | Yes (up to 4) | Yes (up to 4) | Yes (up to 9) | -| Edit/reference | Yes (up to 5 images) | Yes (up to 5 images) | Yes (1 image) | Yes (1 image, subject ref) | -| Size control | Yes | Yes | Yes | No | -| Aspect ratio | No | Yes | Yes (generate only) | Yes | -| Resolution (1K/2K/4K) | No | Yes | Yes | No | +| Capability | OpenAI | Google | fal | MiniMax | ComfyUI | +| --------------------- | -------------------- | -------------------- | ------------------- | -------------------------- | ---------------------------------- | +| Generate | Yes (up to 4) | Yes (up to 4) | Yes (up to 4) | Yes (up to 9) | Yes (workflow-defined outputs) | +| Edit/reference | Yes (up to 5 images) | Yes (up to 5 images) | Yes (1 image) | Yes (1 image, subject ref) | Yes (1 image, workflow-configured) | +| Size control | Yes | Yes | Yes | No | No | +| Aspect ratio | No | Yes | Yes (generate only) | Yes | No | +| Resolution (1K/2K/4K) | No | Yes | Yes | No | No | ## Related - [Tools Overview](/tools) — all available agent tools - [fal](/providers/fal) — fal image and video provider setup +- [ComfyUI](/providers/comfy) — local ComfyUI and Comfy Cloud workflow setup - [Google (Gemini)](/providers/google) — Gemini image provider setup - [MiniMax](/providers/minimax) — MiniMax image provider setup - [OpenAI](/providers/openai) — OpenAI Images provider setup diff --git a/docs/tools/index.md b/docs/tools/index.md index 1dbb97a0624..4463306ac46 100644 --- a/docs/tools/index.md +++ b/docs/tools/index.md @@ -75,6 +75,9 @@ For image work, use `image` for analysis and `image_generate` for generation or For video work, use `video_generate`. If you target `qwen/*` or another non-default video provider, configure that provider's auth/API key first. +For workflow-driven audio generation, use `music_generate` when a plugin such as +ComfyUI registers it. This is separate from `tts`, which is text-to-speech. + `session_status` is the lightweight status/readback tool in the sessions group. It answers `/status`-style questions about the current session and can optionally set a per-session model override; `model=default` clears that @@ -100,6 +103,7 @@ Plugins can register additional tools. Some examples: - [Lobster](/tools/lobster) — typed workflow runtime with resumable approvals - [LLM Task](/tools/llm-task) — JSON-only LLM step for structured output +- [Music Generation](/tools/music-generation) — plugin-provided `music_generate` tool surfaces - [Diffs](/tools/diffs) — diff viewer and renderer - [OpenProse](/prose) — markdown-first workflow orchestration diff --git a/docs/tools/music-generation.md b/docs/tools/music-generation.md new file mode 100644 index 00000000000..b4df88c0958 --- /dev/null +++ b/docs/tools/music-generation.md @@ -0,0 +1,59 @@ +--- +summary: "Generate music or audio with plugin-provided tools such as ComfyUI workflows" +read_when: + - Generating music or audio via the agent + - Configuring plugin-provided music generation tools + - Understanding the music_generate tool parameters +title: "Music Generation" +--- + +# Music Generation + +The `music_generate` tool lets the agent create audio files when a plugin +registers music generation support. + +The bundled `comfy` plugin currently provides `music_generate` using a +workflow-configured ComfyUI graph. + +## Quick start + +1. Configure `models.providers.comfy.music` with a workflow JSON and prompt/output nodes. +2. If you use Comfy Cloud, set `COMFY_API_KEY` or `COMFY_CLOUD_API_KEY`. +3. Ask the agent for music or call the tool directly. + +Example: + +```text +/tool music_generate prompt="Warm ambient synth loop with soft tape texture" +``` + +## Tool parameters + +| Parameter | Type | Description | +| ---------- | ------ | --------------------------------------------------- | +| `prompt` | string | Music or audio generation prompt | +| `action` | string | `"generate"` (default) or `"list"` | +| `model` | string | Provider/model override. Currently `comfy/workflow` | +| `filename` | string | Output filename hint for the saved audio file | + +## Current provider support + +| Provider | Model | Notes | +| -------- | ---------- | ------------------------------- | +| ComfyUI | `workflow` | Workflow-defined music or audio | + +## Live test + +Opt-in live coverage for the bundled ComfyUI music path: + +```bash +OPENCLAW_LIVE_TEST=1 COMFY_LIVE_TEST=1 pnpm test:live -- extensions/comfy/comfy.live.test.ts +``` + +The live file also covers comfy image and video workflows when those sections +are configured. + +## Related + +- [ComfyUI](/providers/comfy) +- [Tools Overview](/tools) diff --git a/docs/tools/video-generation.md b/docs/tools/video-generation.md index 6e5ae578b00..518c7290b97 100644 --- a/docs/tools/video-generation.md +++ b/docs/tools/video-generation.md @@ -1,5 +1,5 @@ --- -summary: "Generate videos from text, images, or existing videos using 10 provider backends" +summary: "Generate videos from text, images, or existing videos using 11 provider backends" read_when: - Generating videos via the agent - Configuring video generation providers and models @@ -9,7 +9,7 @@ title: "Video Generation" # Video Generation -OpenClaw agents can generate videos from text prompts, reference images, or existing videos. Ten provider backends are supported, each with different model options, input modes, and feature sets. The agent picks the right provider automatically based on your configuration and available API keys. +OpenClaw agents can generate videos from text prompts, reference images, or existing videos. Eleven provider backends are supported, each with different model options, input modes, and feature sets. The agent picks the right provider automatically based on your configuration and available API keys. The `video_generate` tool only appears when at least one video-generation provider is available. If you do not see it in your agent tools, set a provider API key or configure `agents.defaults.videoGenerationModel`. @@ -50,18 +50,19 @@ Outside of session-backed agent runs (for example, direct tool invocations), the ## Supported providers -| Provider | Default model | Text | Image ref | Video ref | API key | -| -------- | ------------------------------- | ---- | ---------------- | ---------------- | --------------------- | -| Alibaba | `wan2.6-t2v` | Yes | Yes (remote URL) | Yes (remote URL) | `MODELSTUDIO_API_KEY` | -| BytePlus | `seedance-1-0-lite-t2v-250428` | Yes | 1 image | No | `BYTEPLUS_API_KEY` | -| fal | `fal-ai/minimax/video-01-live` | Yes | 1 image | No | `FAL_KEY` | -| Google | `veo-3.1-fast-generate-preview` | Yes | 1 image | 1 video | `GEMINI_API_KEY` | -| MiniMax | `MiniMax-Hailuo-2.3` | Yes | 1 image | No | `MINIMAX_API_KEY` | -| OpenAI | `sora-2` | Yes | 1 image | 1 video | `OPENAI_API_KEY` | -| Qwen | `wan2.6-t2v` | Yes | Yes (remote URL) | Yes (remote URL) | `QWEN_API_KEY` | -| Runway | `gen4.5` | Yes | 1 image | 1 video | `RUNWAYML_API_SECRET` | -| Together | `Wan-AI/Wan2.2-T2V-A14B` | Yes | 1 image | No | `TOGETHER_API_KEY` | -| xAI | `grok-imagine-video` | Yes | 1 image | 1 video | `XAI_API_KEY` | +| Provider | Default model | Text | Image ref | Video ref | API key | +| -------- | ------------------------------- | ---- | ---------------- | ---------------- | ---------------------------------------- | +| Alibaba | `wan2.6-t2v` | Yes | Yes (remote URL) | Yes (remote URL) | `MODELSTUDIO_API_KEY` | +| BytePlus | `seedance-1-0-lite-t2v-250428` | Yes | 1 image | No | `BYTEPLUS_API_KEY` | +| ComfyUI | `workflow` | Yes | 1 image | No | `COMFY_API_KEY` or `COMFY_CLOUD_API_KEY` | +| fal | `fal-ai/minimax/video-01-live` | Yes | 1 image | No | `FAL_KEY` | +| Google | `veo-3.1-fast-generate-preview` | Yes | 1 image | 1 video | `GEMINI_API_KEY` | +| MiniMax | `MiniMax-Hailuo-2.3` | Yes | 1 image | No | `MINIMAX_API_KEY` | +| OpenAI | `sora-2` | Yes | 1 image | 1 video | `OPENAI_API_KEY` | +| Qwen | `wan2.6-t2v` | Yes | Yes (remote URL) | Yes (remote URL) | `QWEN_API_KEY` | +| Runway | `gen4.5` | Yes | 1 image | 1 video | `RUNWAYML_API_SECRET` | +| Together | `Wan-AI/Wan2.2-T2V-A14B` | Yes | 1 image | No | `TOGETHER_API_KEY` | +| xAI | `grok-imagine-video` | Yes | 1 image | 1 video | `XAI_API_KEY` | Some providers accept additional or alternate API key env vars. See individual [provider pages](#related) for details. @@ -141,6 +142,7 @@ If a provider fails, the next candidate is tried automatically. If all candidate | -------- | ---------------------------------------------------------------------------------------------------------------------------------------- | | Alibaba | Uses DashScope/Model Studio async endpoint. Reference images and videos must be remote `http(s)` URLs. | | BytePlus | Single image reference only. | +| ComfyUI | Workflow-driven local or cloud execution. Supports text-to-video and image-to-video through the configured graph. | | fal | Uses queue-backed flow for long-running jobs. Single image reference only. | | Google | Uses Gemini/Veo. Supports one image or one video reference. | | MiniMax | Single image reference only. | @@ -179,6 +181,7 @@ openclaw config set agents.defaults.videoGenerationModel.primary "qwen/wan2.6-t2 - [Background Tasks](/automation/tasks) -- task tracking for async video generation - [Alibaba Model Studio](/providers/alibaba) - [BytePlus](/providers/byteplus) +- [ComfyUI](/providers/comfy) - [fal](/providers/fal) - [Google (Gemini)](/providers/google) - [MiniMax](/providers/minimax) diff --git a/extensions/comfy/comfy.live.test.ts b/extensions/comfy/comfy.live.test.ts new file mode 100644 index 00000000000..548fc7b7a78 --- /dev/null +++ b/extensions/comfy/comfy.live.test.ts @@ -0,0 +1,145 @@ +import { beforeAll, describe, expect, it, vi } from "vitest"; +import { resolveOpenClawAgentDir } from "../../src/agents/agent-paths.js"; +import { isLiveTestEnabled } from "../../src/agents/live-test-helpers.js"; +import { loadConfig } from "../../src/config/config.js"; +import { createTestPluginApi } from "../../test/helpers/plugins/plugin-api.js"; +import plugin from "./index.js"; +import { getComfyConfig, isComfyCapabilityConfigured } from "./workflow-runtime.js"; + +const LIVE = + isLiveTestEnabled(["COMFY_LIVE_TEST"]) && (process.env.COMFY_LIVE_TEST ?? "").trim() === "1"; +const describeLive = LIVE ? describe : describe.skip; + +type RegisteredTool = { + name: string; + execute: ( + id: string, + params: Record, + ) => Promise<{ + content: Array<{ type: string; text?: string }>; + details?: unknown; + }>; +}; + +function withPluginsEnabled(cfg: T): T { + if (!cfg || typeof cfg !== "object") { + return cfg; + } + const record = cfg as Record; + return { + ...record, + plugins: { + ...(record.plugins && typeof record.plugins === "object" ? (record.plugins as object) : {}), + enabled: true, + }, + } as T; +} + +describeLive("comfy live", () => { + let cfg = {} as ReturnType; + let agentDir = ""; + const imageProviders: Array<{ id: string; generateImage: Function; isConfigured?: Function }> = + []; + const videoProviders: Array<{ id: string; generateVideo: Function; isConfigured?: Function }> = + []; + const tools: RegisteredTool[] = []; + const saveMediaBuffer = vi.fn( + async ( + _buffer: Buffer, + _mimeType: string, + _subdir?: string, + _maxBytes?: number, + originalFilename?: string, + ) => ({ + path: `/tmp/${originalFilename ?? "generated.bin"}`, + id: "saved-1", + mimeType: _mimeType, + bytes: _buffer.byteLength, + }), + ); + + beforeAll(async () => { + cfg = withPluginsEnabled(loadConfig()); + agentDir = resolveOpenClawAgentDir(); + await plugin.register( + createTestPluginApi({ + config: cfg as never, + runtime: { + channel: { + media: { + saveMediaBuffer, + }, + }, + } as never, + registerImageGenerationProvider(provider) { + imageProviders.push(provider as never); + }, + registerVideoGenerationProvider(provider) { + videoProviders.push(provider as never); + }, + registerTool(tool) { + tools.push(tool as RegisteredTool); + }, + }), + ); + }); + + it.skipIf(!isComfyCapabilityConfigured({ cfg: cfg as never, agentDir, capability: "image" }))( + "runs an image workflow", + async () => { + const provider = imageProviders.find((entry) => entry.id === "comfy"); + expect(provider).toBeDefined(); + const result = await provider!.generateImage({ + provider: "comfy", + model: "workflow", + prompt: "A tiny orange lobster icon on a clean background.", + cfg: cfg as never, + agentDir, + }); + expect(result.images.length).toBeGreaterThan(0); + expect(result.images[0]?.mimeType.startsWith("image/")).toBe(true); + expect(result.images[0]?.buffer.byteLength).toBeGreaterThan(128); + }, + 120_000, + ); + + it.skipIf(!isComfyCapabilityConfigured({ cfg: cfg as never, agentDir, capability: "video" }))( + "runs a video workflow", + async () => { + const provider = videoProviders.find((entry) => entry.id === "comfy"); + expect(provider).toBeDefined(); + const result = await provider!.generateVideo({ + provider: "comfy", + model: "workflow", + prompt: "A tiny paper lobster gently waving, cinematic motion.", + cfg: cfg as never, + agentDir, + }); + expect(result.videos.length).toBeGreaterThan(0); + expect(result.videos[0]?.mimeType.startsWith("video/")).toBe(true); + expect(result.videos[0]?.buffer.byteLength).toBeGreaterThan(512); + }, + 180_000, + ); + + it.skipIf(!isComfyCapabilityConfigured({ cfg: cfg as never, agentDir, capability: "music" }))( + "runs a music workflow tool", + async () => { + const tool = tools.find((entry) => entry.name === "music_generate"); + expect(tool).toBeDefined(); + const result = await tool!.execute("music-live", { + prompt: "A gentle ambient synth loop with warm analog pads.", + filename: "comfy-live.mp3", + }); + const text = result.content.find((entry) => entry.type === "text")?.text ?? ""; + expect(text).toContain("MEDIA:/tmp/comfy-live.mp3"); + expect(saveMediaBuffer).toHaveBeenCalled(); + }, + 180_000, + ); + + it("documents the effective comfy config shape for live debugging", () => { + const comfyConfig = getComfyConfig(cfg as never); + expect(typeof comfyConfig).toBe("object"); + }); +}); diff --git a/extensions/comfy/image-generation-provider.test.ts b/extensions/comfy/image-generation-provider.test.ts new file mode 100644 index 00000000000..3fec5f228d0 --- /dev/null +++ b/extensions/comfy/image-generation-provider.test.ts @@ -0,0 +1,355 @@ +import type { OpenClawConfig } from "openclaw/plugin-sdk/config-runtime"; +import * as providerAuth from "openclaw/plugin-sdk/provider-auth-runtime"; +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { + _setComfyFetchGuardForTesting, + buildComfyImageGenerationProvider, +} from "./image-generation-provider.js"; + +const { fetchWithSsrFGuardMock } = vi.hoisted(() => ({ + fetchWithSsrFGuardMock: vi.fn(), +})); + +function parseJsonBody(call: number): Record { + const request = fetchWithSsrFGuardMock.mock.calls[call - 1]?.[0]; + expect(request?.init?.body).toBeTruthy(); + return JSON.parse(String(request.init.body)) as Record; +} + +function buildComfyConfig(config: Record): OpenClawConfig { + return { + models: { + providers: { + comfy: config, + }, + }, + } as unknown as OpenClawConfig; +} + +describe("comfy image-generation provider", () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + afterEach(() => { + _setComfyFetchGuardForTesting(null); + vi.restoreAllMocks(); + }); + + it("treats local comfy workflows as configured without an API key", () => { + const provider = buildComfyImageGenerationProvider(); + expect( + provider.isConfigured?.({ + cfg: buildComfyConfig({ + workflow: { + "6": { inputs: { text: "" } }, + }, + promptNodeId: "6", + }), + }), + ).toBe(true); + }); + + it("submits a local workflow, waits for history, and downloads images", async () => { + _setComfyFetchGuardForTesting(fetchWithSsrFGuardMock); + fetchWithSsrFGuardMock + .mockResolvedValueOnce({ + response: new Response(JSON.stringify({ prompt_id: "local-prompt-1" }), { + status: 200, + headers: { "content-type": "application/json" }, + }), + release: vi.fn(async () => {}), + }) + .mockResolvedValueOnce({ + response: new Response( + JSON.stringify({ + "local-prompt-1": { + outputs: { + "9": { + images: [{ filename: "generated.png", subfolder: "", type: "output" }], + }, + }, + }, + }), + { + status: 200, + headers: { "content-type": "application/json" }, + }, + ), + release: vi.fn(async () => {}), + }) + .mockResolvedValueOnce({ + response: new Response(Buffer.from("png-data"), { + status: 200, + headers: { "content-type": "image/png" }, + }), + release: vi.fn(async () => {}), + }); + + const provider = buildComfyImageGenerationProvider(); + const result = await provider.generateImage({ + provider: "comfy", + model: "workflow", + prompt: "draw a lobster", + cfg: buildComfyConfig({ + workflow: { + "6": { inputs: { text: "" } }, + "9": { inputs: {} }, + }, + promptNodeId: "6", + outputNodeId: "9", + }), + }); + + expect(fetchWithSsrFGuardMock).toHaveBeenNthCalledWith( + 1, + expect.objectContaining({ + url: "http://127.0.0.1:8188/prompt", + auditContext: "comfy-image-generate", + }), + ); + expect(parseJsonBody(1)).toEqual({ + prompt: { + "6": { inputs: { text: "draw a lobster" } }, + "9": { inputs: {} }, + }, + }); + expect(fetchWithSsrFGuardMock).toHaveBeenNthCalledWith( + 2, + expect.objectContaining({ + url: "http://127.0.0.1:8188/history/local-prompt-1", + auditContext: "comfy-history", + }), + ); + expect(fetchWithSsrFGuardMock).toHaveBeenNthCalledWith( + 3, + expect.objectContaining({ + url: "http://127.0.0.1:8188/view?filename=generated.png&subfolder=&type=output", + auditContext: "comfy-image-download", + }), + ); + expect(result).toEqual({ + images: [ + { + buffer: Buffer.from("png-data"), + mimeType: "image/png", + fileName: "generated.png", + metadata: { + nodeId: "9", + promptId: "local-prompt-1", + }, + }, + ], + model: "workflow", + metadata: { + promptId: "local-prompt-1", + outputNodeIds: ["9"], + }, + }); + }); + + it("uploads reference images for local edit workflows", async () => { + _setComfyFetchGuardForTesting(fetchWithSsrFGuardMock); + fetchWithSsrFGuardMock + .mockResolvedValueOnce({ + response: new Response(JSON.stringify({ name: "upload.png" }), { + status: 200, + headers: { "content-type": "application/json" }, + }), + release: vi.fn(async () => {}), + }) + .mockResolvedValueOnce({ + response: new Response(JSON.stringify({ prompt_id: "local-edit-1" }), { + status: 200, + headers: { "content-type": "application/json" }, + }), + release: vi.fn(async () => {}), + }) + .mockResolvedValueOnce({ + response: new Response( + JSON.stringify({ + "local-edit-1": { + outputs: { + "9": { + images: [{ filename: "edited.png", subfolder: "", type: "output" }], + }, + }, + }, + }), + { + status: 200, + headers: { "content-type": "application/json" }, + }, + ), + release: vi.fn(async () => {}), + }) + .mockResolvedValueOnce({ + response: new Response(Buffer.from("edited-data"), { + status: 200, + headers: { "content-type": "image/png" }, + }), + release: vi.fn(async () => {}), + }); + + const provider = buildComfyImageGenerationProvider(); + await provider.generateImage({ + provider: "comfy", + model: "workflow", + prompt: "turn this into a poster", + cfg: buildComfyConfig({ + workflow: { + "6": { inputs: { text: "" } }, + "7": { inputs: { image: "" } }, + "9": { inputs: {} }, + }, + promptNodeId: "6", + inputImageNodeId: "7", + outputNodeId: "9", + }), + inputImages: [ + { + buffer: Buffer.from("source"), + mimeType: "image/png", + fileName: "source.png", + }, + ], + }); + + const uploadRequest = fetchWithSsrFGuardMock.mock.calls[0]?.[0]; + expect(uploadRequest?.url).toBe("http://127.0.0.1:8188/upload/image"); + expect(uploadRequest?.auditContext).toBe("comfy-image-upload"); + expect(uploadRequest?.init?.method).toBe("POST"); + const uploadForm = uploadRequest?.init?.body; + expect(uploadForm).toBeInstanceOf(FormData); + expect(uploadForm?.get("type")).toBe("input"); + expect(uploadForm?.get("overwrite")).toBe("true"); + + expect(parseJsonBody(2)).toEqual({ + prompt: { + "6": { inputs: { text: "turn this into a poster" } }, + "7": { inputs: { image: "upload.png" } }, + "9": { inputs: {} }, + }, + }); + }); + + it("uses cloud endpoints, auth headers, and partner-node extra_data", async () => { + vi.spyOn(providerAuth, "resolveApiKeyForProvider").mockResolvedValue({ + apiKey: "comfy-test-key", + source: "env", + mode: "api-key", + }); + _setComfyFetchGuardForTesting(fetchWithSsrFGuardMock); + fetchWithSsrFGuardMock + .mockResolvedValueOnce({ + response: new Response(JSON.stringify({ prompt_id: "cloud-job-1" }), { + status: 200, + headers: { "content-type": "application/json" }, + }), + release: vi.fn(async () => {}), + }) + .mockResolvedValueOnce({ + response: new Response(JSON.stringify({ status: "completed" }), { + status: 200, + headers: { "content-type": "application/json" }, + }), + release: vi.fn(async () => {}), + }) + .mockResolvedValueOnce({ + response: new Response( + JSON.stringify({ + "cloud-job-1": { + outputs: { + "9": { + images: [{ filename: "cloud.png", subfolder: "", type: "output" }], + }, + }, + }, + }), + { + status: 200, + headers: { "content-type": "application/json" }, + }, + ), + release: vi.fn(async () => {}), + }) + .mockResolvedValueOnce({ + response: new Response(null, { + status: 302, + headers: { location: "https://cdn.example.com/cloud.png" }, + }), + release: vi.fn(async () => {}), + }) + .mockResolvedValueOnce({ + response: new Response(Buffer.from("cloud-data"), { + status: 200, + headers: { "content-type": "image/png" }, + }), + release: vi.fn(async () => {}), + }); + + const provider = buildComfyImageGenerationProvider(); + const result = await provider.generateImage({ + provider: "comfy", + model: "workflow", + prompt: "cloud workflow prompt", + cfg: buildComfyConfig({ + mode: "cloud", + workflow: { + "6": { inputs: { text: "" } }, + "9": { inputs: {} }, + }, + promptNodeId: "6", + outputNodeId: "9", + }), + }); + + const submitRequest = fetchWithSsrFGuardMock.mock.calls[0]?.[0]; + expect(submitRequest?.url).toBe("https://cloud.comfy.org/api/prompt"); + expect(submitRequest?.auditContext).toBe("comfy-image-generate"); + const submitHeaders = new Headers(submitRequest?.init?.headers); + expect(submitHeaders.get("x-api-key")).toBe("comfy-test-key"); + expect(parseJsonBody(1)).toEqual({ + prompt: { + "6": { inputs: { text: "cloud workflow prompt" } }, + "9": { inputs: {} }, + }, + extra_data: { + api_key_comfy_org: "comfy-test-key", + }, + }); + + expect(fetchWithSsrFGuardMock).toHaveBeenNthCalledWith( + 2, + expect.objectContaining({ + url: "https://cloud.comfy.org/api/job/cloud-job-1/status", + auditContext: "comfy-status", + }), + ); + expect(fetchWithSsrFGuardMock).toHaveBeenNthCalledWith( + 3, + expect.objectContaining({ + url: "https://cloud.comfy.org/api/history_v2/cloud-job-1", + auditContext: "comfy-history", + }), + ); + expect(fetchWithSsrFGuardMock).toHaveBeenNthCalledWith( + 4, + expect.objectContaining({ + url: "https://cloud.comfy.org/api/view?filename=cloud.png&subfolder=&type=output", + auditContext: "comfy-image-download", + }), + ); + expect(fetchWithSsrFGuardMock).toHaveBeenNthCalledWith( + 5, + expect.objectContaining({ + url: "https://cdn.example.com/cloud.png", + auditContext: "comfy-image-download", + }), + ); + expect(result.metadata).toEqual({ + promptId: "cloud-job-1", + outputNodeIds: ["9"], + }); + }); +}); diff --git a/extensions/comfy/image-generation-provider.ts b/extensions/comfy/image-generation-provider.ts new file mode 100644 index 00000000000..91654fc14f3 --- /dev/null +++ b/extensions/comfy/image-generation-provider.ts @@ -0,0 +1,79 @@ +import type { + GeneratedImageAsset, + ImageGenerationProvider, +} from "openclaw/plugin-sdk/image-generation"; +import { + DEFAULT_COMFY_MODEL, + _setComfyFetchGuardForTesting, + isComfyCapabilityConfigured, + runComfyWorkflow, +} from "./workflow-runtime.js"; + +export { _setComfyFetchGuardForTesting }; + +export function buildComfyImageGenerationProvider(): ImageGenerationProvider { + return { + id: "comfy", + label: "ComfyUI", + defaultModel: DEFAULT_COMFY_MODEL, + models: [DEFAULT_COMFY_MODEL], + isConfigured: ({ cfg, agentDir }) => + isComfyCapabilityConfigured({ + cfg, + agentDir, + capability: "image", + }), + capabilities: { + generate: { + maxCount: 1, + supportsSize: false, + supportsAspectRatio: false, + supportsResolution: false, + }, + edit: { + enabled: true, + maxCount: 1, + maxInputImages: 1, + supportsSize: false, + supportsAspectRatio: false, + supportsResolution: false, + }, + }, + async generateImage(req) { + if ((req.inputImages?.length ?? 0) > 1) { + throw new Error("Comfy image generation currently supports at most one reference image"); + } + + const result = await runComfyWorkflow({ + cfg: req.cfg, + agentDir: req.agentDir, + authStore: req.authStore, + prompt: req.prompt, + model: req.model, + timeoutMs: req.timeoutMs, + capability: "image", + outputKinds: ["images"], + inputImage: req.inputImages?.[0], + }); + + const images: GeneratedImageAsset[] = result.assets.map((asset) => ({ + buffer: asset.buffer, + mimeType: asset.mimeType, + fileName: asset.fileName, + metadata: { + nodeId: asset.nodeId, + promptId: result.promptId, + }, + })); + + return { + images, + model: result.model, + metadata: { + promptId: result.promptId, + outputNodeIds: result.outputNodeIds, + }, + }; + }, + }; +} diff --git a/extensions/comfy/index.ts b/extensions/comfy/index.ts new file mode 100644 index 00000000000..3c7001e29e8 --- /dev/null +++ b/extensions/comfy/index.ts @@ -0,0 +1,24 @@ +import { definePluginEntry } from "openclaw/plugin-sdk/plugin-entry"; +import { buildComfyImageGenerationProvider } from "./image-generation-provider.js"; +import { createComfyMusicGenerateTool } from "./music-generate-tool.js"; +import { buildComfyVideoGenerationProvider } from "./video-generation-provider.js"; + +const PROVIDER_ID = "comfy"; + +export default definePluginEntry({ + id: PROVIDER_ID, + name: "ComfyUI Provider", + description: "Bundled ComfyUI workflow media generation provider", + register(api) { + api.registerProvider({ + id: PROVIDER_ID, + label: "ComfyUI", + docsPath: "/providers/comfy", + envVars: ["COMFY_API_KEY", "COMFY_CLOUD_API_KEY"], + auth: [], + }); + api.registerImageGenerationProvider(buildComfyImageGenerationProvider()); + api.registerVideoGenerationProvider(buildComfyVideoGenerationProvider()); + api.registerTool(createComfyMusicGenerateTool(api)); + }, +}); diff --git a/extensions/comfy/music-generate-tool.test.ts b/extensions/comfy/music-generate-tool.test.ts new file mode 100644 index 00000000000..699c1530d9c --- /dev/null +++ b/extensions/comfy/music-generate-tool.test.ts @@ -0,0 +1,108 @@ +import { describe, expect, it, vi } from "vitest"; +import { createTestPluginApi } from "../../test/helpers/plugins/plugin-api.js"; +import { createComfyMusicGenerateTool } from "./music-generate-tool.js"; +import { _setComfyFetchGuardForTesting } from "./workflow-runtime.js"; + +const { fetchWithSsrFGuardMock, saveMediaBufferMock } = vi.hoisted(() => ({ + fetchWithSsrFGuardMock: vi.fn(), + saveMediaBufferMock: vi.fn(async () => ({ + path: "/tmp/generated-song.mp3", + id: "music-1", + mimeType: "audio/mpeg", + bytes: 12, + })), +})); + +describe("comfy music_generate tool", () => { + it("lists the comfy workflow model", async () => { + const tool = createComfyMusicGenerateTool(createTestPluginApi()); + const result = await tool.execute("tool-1", { action: "list" }); + expect((result.content[0] as { text: string }).text).toContain("comfy/workflow"); + }); + + it("runs a music workflow and saves audio outputs", async () => { + _setComfyFetchGuardForTesting(fetchWithSsrFGuardMock); + fetchWithSsrFGuardMock + .mockResolvedValueOnce({ + response: new Response(JSON.stringify({ prompt_id: "music-job-1" }), { + status: 200, + headers: { "content-type": "application/json" }, + }), + release: vi.fn(async () => {}), + }) + .mockResolvedValueOnce({ + response: new Response( + JSON.stringify({ + "music-job-1": { + outputs: { + "9": { + audio: [{ filename: "song.mp3", subfolder: "", type: "output" }], + }, + }, + }, + }), + { + status: 200, + headers: { "content-type": "application/json" }, + }, + ), + release: vi.fn(async () => {}), + }) + .mockResolvedValueOnce({ + response: new Response(Buffer.from("music-bytes"), { + status: 200, + headers: { "content-type": "audio/mpeg" }, + }), + release: vi.fn(async () => {}), + }); + + const api = createTestPluginApi({ + config: { + models: { + providers: { + comfy: { + music: { + workflow: { + "6": { inputs: { text: "" } }, + "9": { inputs: {} }, + }, + promptNodeId: "6", + outputNodeId: "9", + }, + }, + }, + }, + } as never, + runtime: { + channel: { + media: { + saveMediaBuffer: saveMediaBufferMock, + }, + }, + } as never, + }); + + const tool = createComfyMusicGenerateTool(api); + const result = await tool.execute("tool-2", { + prompt: "gentle ambient synth loop", + }); + + expect(saveMediaBufferMock).toHaveBeenCalledWith( + Buffer.from("music-bytes"), + "audio/mpeg", + "tool-music-generation", + undefined, + "song.mp3", + ); + expect((result.content[0] as { text: string }).text).toContain("MEDIA:/tmp/generated-song.mp3"); + expect(result.details).toMatchObject({ + provider: "comfy", + model: "workflow", + count: 1, + paths: ["/tmp/generated-song.mp3"], + media: { + mediaUrls: ["/tmp/generated-song.mp3"], + }, + }); + }); +}); diff --git a/extensions/comfy/music-generate-tool.ts b/extensions/comfy/music-generate-tool.ts new file mode 100644 index 00000000000..89c402df210 --- /dev/null +++ b/extensions/comfy/music-generate-tool.ts @@ -0,0 +1,90 @@ +import { Type } from "@sinclair/typebox"; +import type { AnyAgentTool, OpenClawPluginApi } from "openclaw/plugin-sdk/plugin-entry"; +import { runComfyWorkflow } from "./workflow-runtime.js"; + +function readStringParam(params: Record, key: string): string | undefined { + const value = params[key]; + if (typeof value !== "string") { + return undefined; + } + const trimmed = value.trim(); + return trimmed ? trimmed : undefined; +} + +export function createComfyMusicGenerateTool(api: OpenClawPluginApi): AnyAgentTool { + return { + name: "music_generate", + label: "music_generate", + description: "Generate audio or music with a workflow-configured ComfyUI graph.", + parameters: Type.Object({ + action: Type.Optional(Type.String({ default: "generate", enum: ["generate", "list"] })), + prompt: Type.Optional(Type.String()), + model: Type.Optional(Type.String()), + filename: Type.Optional(Type.String()), + }), + async execute(_id: string, params: Record) { + const action = readStringParam(params, "action") ?? "generate"; + if (action === "list") { + const text = ["Available music generation providers:", "- comfy/workflow"].join("\n"); + return { + content: [{ type: "text", text }], + details: { + providers: [ + { + provider: "comfy", + models: ["workflow"], + }, + ], + }, + }; + } + + const prompt = readStringParam(params, "prompt"); + if (!prompt) { + throw new Error("prompt required"); + } + + const result = await runComfyWorkflow({ + cfg: api.config, + prompt, + capability: "music", + model: readStringParam(params, "model"), + outputKinds: ["audio"], + }); + const filenameHint = readStringParam(params, "filename"); + const saved = await Promise.all( + result.assets.map((asset) => + api.runtime.channel.media.saveMediaBuffer( + asset.buffer, + asset.mimeType, + "tool-music-generation", + undefined, + filenameHint || asset.fileName, + ), + ), + ); + + const lines = [ + `Generated ${saved.length} audio file${saved.length === 1 ? "" : "s"} with comfy/${result.model}.`, + ...saved.map((entry) => `MEDIA:${entry.path}`), + ]; + + return { + content: [{ type: "text", text: lines.join("\n") }], + details: { + provider: "comfy", + model: result.model, + count: saved.length, + media: { + mediaUrls: saved.map((entry) => entry.path), + }, + paths: saved.map((entry) => entry.path), + metadata: { + promptId: result.promptId, + outputNodeIds: result.outputNodeIds, + }, + }, + }; + }, + }; +} diff --git a/extensions/comfy/openclaw.plugin.json b/extensions/comfy/openclaw.plugin.json new file mode 100644 index 00000000000..2dbdf89fe08 --- /dev/null +++ b/extensions/comfy/openclaw.plugin.json @@ -0,0 +1,104 @@ +{ + "id": "comfy", + "enabledByDefault": true, + "providers": ["comfy"], + "providerAuthEnvVars": { + "comfy": ["COMFY_API_KEY", "COMFY_CLOUD_API_KEY"] + }, + "contracts": { + "imageGenerationProviders": ["comfy"], + "videoGenerationProviders": ["comfy"], + "tools": ["music_generate"] + }, + "configSchema": { + "type": "object", + "additionalProperties": false, + "properties": { + "mode": { + "type": "string", + "enum": ["local", "cloud"] + }, + "baseUrl": { + "type": "string" + }, + "apiKey": { + "type": ["string", "object"] + }, + "allowPrivateNetwork": { + "type": "boolean" + }, + "workflowPath": { + "type": "string" + }, + "workflow": { + "type": "object" + }, + "promptNodeId": { + "type": "string" + }, + "promptInputName": { + "type": "string" + }, + "inputImageNodeId": { + "type": "string" + }, + "inputImageInputName": { + "type": "string" + }, + "outputNodeId": { + "type": "string" + }, + "pollIntervalMs": { + "type": "integer", + "minimum": 100 + }, + "timeoutMs": { + "type": "integer", + "minimum": 1000 + }, + "image": { + "type": "object", + "additionalProperties": false, + "properties": { + "workflowPath": { "type": "string" }, + "workflow": { "type": "object" }, + "promptNodeId": { "type": "string" }, + "promptInputName": { "type": "string" }, + "inputImageNodeId": { "type": "string" }, + "inputImageInputName": { "type": "string" }, + "outputNodeId": { "type": "string" }, + "pollIntervalMs": { "type": "integer", "minimum": 100 }, + "timeoutMs": { "type": "integer", "minimum": 1000 } + } + }, + "video": { + "type": "object", + "additionalProperties": false, + "properties": { + "workflowPath": { "type": "string" }, + "workflow": { "type": "object" }, + "promptNodeId": { "type": "string" }, + "promptInputName": { "type": "string" }, + "inputImageNodeId": { "type": "string" }, + "inputImageInputName": { "type": "string" }, + "outputNodeId": { "type": "string" }, + "pollIntervalMs": { "type": "integer", "minimum": 100 }, + "timeoutMs": { "type": "integer", "minimum": 1000 } + } + }, + "music": { + "type": "object", + "additionalProperties": false, + "properties": { + "workflowPath": { "type": "string" }, + "workflow": { "type": "object" }, + "promptNodeId": { "type": "string" }, + "promptInputName": { "type": "string" }, + "outputNodeId": { "type": "string" }, + "pollIntervalMs": { "type": "integer", "minimum": 100 }, + "timeoutMs": { "type": "integer", "minimum": 1000 } + } + } + } + } +} diff --git a/extensions/comfy/package.json b/extensions/comfy/package.json new file mode 100644 index 00000000000..55f7db7a205 --- /dev/null +++ b/extensions/comfy/package.json @@ -0,0 +1,12 @@ +{ + "name": "@openclaw/comfy-provider", + "version": "2026.4.5", + "private": true, + "description": "OpenClaw ComfyUI provider plugin", + "type": "module", + "openclaw": { + "extensions": [ + "./index.ts" + ] + } +} diff --git a/extensions/comfy/plugin-registration.contract.test.ts b/extensions/comfy/plugin-registration.contract.test.ts new file mode 100644 index 00000000000..6e0084ce5ff --- /dev/null +++ b/extensions/comfy/plugin-registration.contract.test.ts @@ -0,0 +1,11 @@ +import { describePluginRegistrationContract } from "../../test/helpers/plugins/plugin-registration-contract.js"; + +describePluginRegistrationContract({ + pluginId: "comfy", + providerIds: ["comfy"], + imageGenerationProviderIds: ["comfy"], + videoGenerationProviderIds: ["comfy"], + toolNames: ["music_generate"], + requireGenerateImage: true, + requireGenerateVideo: true, +}); diff --git a/extensions/comfy/video-generation-provider.test.ts b/extensions/comfy/video-generation-provider.test.ts new file mode 100644 index 00000000000..bd6fd6cf6e6 --- /dev/null +++ b/extensions/comfy/video-generation-provider.test.ts @@ -0,0 +1,240 @@ +import type { OpenClawConfig } from "openclaw/plugin-sdk/config-runtime"; +import * as providerAuth from "openclaw/plugin-sdk/provider-auth-runtime"; +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { + _setComfyFetchGuardForTesting, + buildComfyVideoGenerationProvider, +} from "./video-generation-provider.js"; + +const { fetchWithSsrFGuardMock } = vi.hoisted(() => ({ + fetchWithSsrFGuardMock: vi.fn(), +})); + +function parseJsonBody(call: number): Record { + const request = fetchWithSsrFGuardMock.mock.calls[call - 1]?.[0]; + expect(request?.init?.body).toBeTruthy(); + return JSON.parse(String(request.init.body)) as Record; +} + +function buildComfyConfig(config: Record): OpenClawConfig { + return { + models: { + providers: { + comfy: config, + }, + }, + } as unknown as OpenClawConfig; +} + +describe("comfy video-generation provider", () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + afterEach(() => { + _setComfyFetchGuardForTesting(null); + vi.restoreAllMocks(); + }); + + it("treats local comfy video workflows as configured without an API key", () => { + const provider = buildComfyVideoGenerationProvider(); + expect( + provider.isConfigured?.({ + cfg: buildComfyConfig({ + video: { + workflow: { + "6": { inputs: { text: "" } }, + }, + promptNodeId: "6", + }, + }), + }), + ).toBe(true); + }); + + it("submits a local workflow, waits for history, and downloads videos", async () => { + _setComfyFetchGuardForTesting(fetchWithSsrFGuardMock); + fetchWithSsrFGuardMock + .mockResolvedValueOnce({ + response: new Response(JSON.stringify({ prompt_id: "local-video-1" }), { + status: 200, + headers: { "content-type": "application/json" }, + }), + release: vi.fn(async () => {}), + }) + .mockResolvedValueOnce({ + response: new Response( + JSON.stringify({ + "local-video-1": { + outputs: { + "9": { + gifs: [{ filename: "generated.mp4", subfolder: "", type: "output" }], + }, + }, + }, + }), + { + status: 200, + headers: { "content-type": "application/json" }, + }, + ), + release: vi.fn(async () => {}), + }) + .mockResolvedValueOnce({ + response: new Response(Buffer.from("mp4-data"), { + status: 200, + headers: { "content-type": "video/mp4" }, + }), + release: vi.fn(async () => {}), + }); + + const provider = buildComfyVideoGenerationProvider(); + const result = await provider.generateVideo({ + provider: "comfy", + model: "workflow", + prompt: "animate a lobster", + cfg: buildComfyConfig({ + video: { + workflow: { + "6": { inputs: { text: "" } }, + "9": { inputs: {} }, + }, + promptNodeId: "6", + outputNodeId: "9", + }, + }), + }); + + expect(fetchWithSsrFGuardMock).toHaveBeenNthCalledWith( + 1, + expect.objectContaining({ + url: "http://127.0.0.1:8188/prompt", + auditContext: "comfy-video-generate", + }), + ); + expect(parseJsonBody(1)).toEqual({ + prompt: { + "6": { inputs: { text: "animate a lobster" } }, + "9": { inputs: {} }, + }, + }); + expect(fetchWithSsrFGuardMock).toHaveBeenNthCalledWith( + 2, + expect.objectContaining({ + url: "http://127.0.0.1:8188/history/local-video-1", + auditContext: "comfy-history", + }), + ); + expect(fetchWithSsrFGuardMock).toHaveBeenNthCalledWith( + 3, + expect.objectContaining({ + url: "http://127.0.0.1:8188/view?filename=generated.mp4&subfolder=&type=output", + auditContext: "comfy-video-download", + }), + ); + expect(result).toEqual({ + videos: [ + { + buffer: Buffer.from("mp4-data"), + mimeType: "video/mp4", + fileName: "generated.mp4", + metadata: { + nodeId: "9", + promptId: "local-video-1", + }, + }, + ], + model: "workflow", + metadata: { + promptId: "local-video-1", + outputNodeIds: ["9"], + }, + }); + }); + + it("uses cloud endpoints for video workflows", async () => { + vi.spyOn(providerAuth, "resolveApiKeyForProvider").mockResolvedValue({ + apiKey: "comfy-test-key", + source: "env", + mode: "api-key", + }); + _setComfyFetchGuardForTesting(fetchWithSsrFGuardMock); + fetchWithSsrFGuardMock + .mockResolvedValueOnce({ + response: new Response(JSON.stringify({ prompt_id: "cloud-video-1" }), { + status: 200, + headers: { "content-type": "application/json" }, + }), + release: vi.fn(async () => {}), + }) + .mockResolvedValueOnce({ + response: new Response(JSON.stringify({ status: "completed" }), { + status: 200, + headers: { "content-type": "application/json" }, + }), + release: vi.fn(async () => {}), + }) + .mockResolvedValueOnce({ + response: new Response( + JSON.stringify({ + "cloud-video-1": { + outputs: { + "9": { + gifs: [{ filename: "cloud.mp4", subfolder: "", type: "output" }], + }, + }, + }, + }), + { + status: 200, + headers: { "content-type": "application/json" }, + }, + ), + release: vi.fn(async () => {}), + }) + .mockResolvedValueOnce({ + response: new Response(null, { + status: 302, + headers: { location: "https://cdn.example.com/cloud.mp4" }, + }), + release: vi.fn(async () => {}), + }) + .mockResolvedValueOnce({ + response: new Response(Buffer.from("cloud-video-data"), { + status: 200, + headers: { "content-type": "video/mp4" }, + }), + release: vi.fn(async () => {}), + }); + + const provider = buildComfyVideoGenerationProvider(); + const result = await provider.generateVideo({ + provider: "comfy", + model: "workflow", + prompt: "cloud video workflow", + cfg: buildComfyConfig({ + mode: "cloud", + video: { + workflow: { + "6": { inputs: { text: "" } }, + "9": { inputs: {} }, + }, + promptNodeId: "6", + outputNodeId: "9", + }, + }), + }); + + expect(fetchWithSsrFGuardMock).toHaveBeenNthCalledWith( + 1, + expect.objectContaining({ + url: "https://cloud.comfy.org/api/prompt", + auditContext: "comfy-video-generate", + }), + ); + expect(result.metadata).toEqual({ + promptId: "cloud-video-1", + outputNodeIds: ["9"], + }); + }); +}); diff --git a/extensions/comfy/video-generation-provider.ts b/extensions/comfy/video-generation-provider.ts new file mode 100644 index 00000000000..81fff490051 --- /dev/null +++ b/extensions/comfy/video-generation-provider.ts @@ -0,0 +1,91 @@ +import type { + GeneratedVideoAsset, + VideoGenerationProvider, + VideoGenerationSourceAsset, +} from "openclaw/plugin-sdk/video-generation"; +import { + DEFAULT_COMFY_MODEL, + _setComfyFetchGuardForTesting, + isComfyCapabilityConfigured, + runComfyWorkflow, +} from "./workflow-runtime.js"; + +export { _setComfyFetchGuardForTesting }; + +function toComfyInputImage(inputImage?: VideoGenerationSourceAsset) { + if (!inputImage) { + return undefined; + } + if (!inputImage.buffer || !inputImage.mimeType) { + throw new Error("Comfy video generation requires a local reference image file"); + } + return { + buffer: inputImage.buffer, + mimeType: inputImage.mimeType, + fileName: inputImage.fileName, + }; +} + +export function buildComfyVideoGenerationProvider(): VideoGenerationProvider { + return { + id: "comfy", + label: "ComfyUI", + defaultModel: DEFAULT_COMFY_MODEL, + models: [DEFAULT_COMFY_MODEL], + isConfigured: ({ cfg, agentDir }) => + isComfyCapabilityConfigured({ + cfg, + agentDir, + capability: "video", + }), + capabilities: { + maxVideos: 1, + maxInputImages: 1, + maxInputVideos: 0, + supportsSize: false, + supportsAspectRatio: false, + supportsResolution: false, + supportsAudio: false, + supportsWatermark: false, + }, + async generateVideo(req) { + if ((req.inputImages?.length ?? 0) > 1) { + throw new Error("Comfy video generation currently supports at most one reference image"); + } + if ((req.inputVideos?.length ?? 0) > 0) { + throw new Error("Comfy video generation does not support input videos"); + } + + const result = await runComfyWorkflow({ + cfg: req.cfg, + agentDir: req.agentDir, + authStore: req.authStore, + prompt: req.prompt, + model: req.model, + timeoutMs: req.timeoutMs, + capability: "video", + outputKinds: ["gifs", "videos"], + inputImage: toComfyInputImage(req.inputImages?.[0]), + }); + + const videos: GeneratedVideoAsset[] = result.assets.map((asset) => ({ + buffer: asset.buffer, + mimeType: asset.mimeType, + fileName: asset.fileName, + metadata: { + nodeId: asset.nodeId, + promptId: result.promptId, + }, + })); + + return { + videos, + model: result.model, + metadata: { + promptId: result.promptId, + outputNodeIds: result.outputNodeIds, + }, + }; + }, + }; +} diff --git a/extensions/comfy/workflow-runtime.ts b/extensions/comfy/workflow-runtime.ts new file mode 100644 index 00000000000..774cb67d652 --- /dev/null +++ b/extensions/comfy/workflow-runtime.ts @@ -0,0 +1,783 @@ +import fs from "node:fs/promises"; +import type { OpenClawConfig } from "openclaw/plugin-sdk/config-runtime"; +import { + isProviderApiKeyConfigured, + type AuthProfileStore, +} from "openclaw/plugin-sdk/provider-auth"; +import { resolveApiKeyForProvider } from "openclaw/plugin-sdk/provider-auth-runtime"; +import { + assertOkOrThrowHttpError, + normalizeBaseUrl, + resolveProviderHttpRequestConfig, +} from "openclaw/plugin-sdk/provider-http"; +import { + buildHostnameAllowlistPolicyFromSuffixAllowlist, + fetchWithSsrFGuard, + isPrivateOrLoopbackHost, + ssrfPolicyFromDangerouslyAllowPrivateNetwork, + type SsrFPolicy, +} from "openclaw/plugin-sdk/ssrf-runtime"; +import { resolveUserPath } from "openclaw/plugin-sdk/text-runtime"; + +const DEFAULT_COMFY_LOCAL_BASE_URL = "http://127.0.0.1:8188"; +const DEFAULT_COMFY_CLOUD_BASE_URL = "https://cloud.comfy.org"; +const DEFAULT_PROMPT_INPUT_NAME = "text"; +const DEFAULT_INPUT_IMAGE_INPUT_NAME = "image"; +const DEFAULT_POLL_INTERVAL_MS = 1_500; +const DEFAULT_TIMEOUT_MS = 5 * 60_000; + +export const DEFAULT_COMFY_MODEL = "workflow"; + +export type ComfyMode = "local" | "cloud"; +export type ComfyCapability = "image" | "music" | "video"; +export type ComfyOutputKind = "audio" | "gifs" | "images" | "videos"; +export type ComfyWorkflow = Record; +export type ComfyProviderConfig = Record; +type ComfyFetchGuardParams = Parameters[0]; +type ComfyDispatcherPolicy = ComfyFetchGuardParams["dispatcherPolicy"]; +type ComfyPromptResponse = { + prompt_id?: string; +}; +type ComfyOutputFile = { + filename?: string; + name?: string; + subfolder?: string; + type?: string; +}; +type ComfyHistoryOutputEntry = Partial>; +type ComfyHistoryEntry = { + outputs?: Record; +}; +type ComfyUploadResponse = { + name?: string; + filename?: string; +}; +type ComfyStatusResponse = { + status?: string; + message?: string; + error?: string; +}; +type ComfyNetworkPolicy = { + apiPolicy?: SsrFPolicy; +}; + +export type ComfySourceImage = { + buffer: Buffer; + mimeType: string; + fileName?: string; +}; + +export type ComfyGeneratedAsset = { + buffer: Buffer; + mimeType: string; + fileName: string; + nodeId: string; +}; + +export type ComfyWorkflowResult = { + assets: ComfyGeneratedAsset[]; + model: string; + promptId: string; + outputNodeIds: string[]; +}; + +let comfyFetchGuard = fetchWithSsrFGuard; + +export function _setComfyFetchGuardForTesting(impl: typeof fetchWithSsrFGuard | null): void { + comfyFetchGuard = impl ?? fetchWithSsrFGuard; +} + +function isRecord(value: unknown): value is Record { + return typeof value === "object" && value !== null && !Array.isArray(value); +} + +function readConfigString(config: ComfyProviderConfig, key: string): string | undefined { + const value = config[key]; + if (typeof value !== "string") { + return undefined; + } + const trimmed = value.trim(); + return trimmed ? trimmed : undefined; +} + +function readConfigBoolean(config: ComfyProviderConfig, key: string): boolean | undefined { + const value = config[key]; + return typeof value === "boolean" ? value : undefined; +} + +function readConfigInteger(config: ComfyProviderConfig, key: string): number | undefined { + const value = config[key]; + return typeof value === "number" && Number.isInteger(value) && value > 0 ? value : undefined; +} + +function mergeSsrFPolicies(...policies: Array): SsrFPolicy | undefined { + const merged: SsrFPolicy = {}; + for (const policy of policies) { + if (!policy) { + continue; + } + if (policy.allowPrivateNetwork) { + merged.allowPrivateNetwork = true; + } + if (policy.dangerouslyAllowPrivateNetwork) { + merged.dangerouslyAllowPrivateNetwork = true; + } + if (policy.allowRfc2544BenchmarkRange) { + merged.allowRfc2544BenchmarkRange = true; + } + if (policy.allowedHostnames?.length) { + merged.allowedHostnames = Array.from( + new Set([...(merged.allowedHostnames ?? []), ...policy.allowedHostnames]), + ); + } + if (policy.hostnameAllowlist?.length) { + merged.hostnameAllowlist = Array.from( + new Set([...(merged.hostnameAllowlist ?? []), ...policy.hostnameAllowlist]), + ); + } + } + return Object.keys(merged).length > 0 ? merged : undefined; +} + +export function getComfyConfig(cfg?: OpenClawConfig): ComfyProviderConfig { + const raw = cfg?.models?.providers?.comfy; + return isRecord(raw) ? raw : {}; +} + +function stripNestedCapabilityConfig(config: ComfyProviderConfig): ComfyProviderConfig { + const next = { ...config }; + delete next.image; + delete next.video; + delete next.music; + return next; +} + +export function getComfyCapabilityConfig( + config: ComfyProviderConfig, + capability: ComfyCapability, +): ComfyProviderConfig { + const shared = stripNestedCapabilityConfig(config); + const nested = config[capability]; + if (!isRecord(nested)) { + return shared; + } + return { ...shared, ...nested }; +} + +export function resolveComfyMode(config: ComfyProviderConfig): ComfyMode { + return readConfigString(config, "mode") === "cloud" ? "cloud" : "local"; +} + +function getRequiredConfigString(config: ComfyProviderConfig, key: string): string { + const value = readConfigString(config, key); + if (!value) { + throw new Error(`models.providers.comfy.${key} is required`); + } + return value; +} + +function resolveComfyWorkflowSource(config: ComfyProviderConfig): { + workflow?: ComfyWorkflow; + workflowPath?: string; +} { + const workflow = config.workflow; + if (isRecord(workflow)) { + return { workflow: structuredClone(workflow) }; + } + const workflowPath = readConfigString(config, "workflowPath"); + return { workflowPath }; +} + +async function loadComfyWorkflow(config: ComfyProviderConfig): Promise { + const source = resolveComfyWorkflowSource(config); + if (source.workflow) { + return source.workflow; + } + if (!source.workflowPath) { + throw new Error("models.providers.comfy..workflow or workflowPath is required"); + } + + const resolvedPath = resolveUserPath(source.workflowPath); + const raw = await fs.readFile(resolvedPath, "utf8"); + const parsed = JSON.parse(raw) as unknown; + if (!isRecord(parsed)) { + throw new Error(`Comfy workflow at ${resolvedPath} must be a JSON object`); + } + return parsed; +} + +function setWorkflowInput(params: { + workflow: ComfyWorkflow; + nodeId: string; + inputName: string; + value: unknown; +}): void { + const node = params.workflow[params.nodeId]; + if (!isRecord(node)) { + throw new Error(`Comfy workflow missing node "${params.nodeId}"`); + } + const inputs = node.inputs; + if (!isRecord(inputs)) { + throw new Error(`Comfy workflow node "${params.nodeId}" is missing an inputs object`); + } + inputs[params.inputName] = params.value; +} + +function resolveComfyNetworkPolicy(params: { + baseUrl: string; + allowPrivateNetwork: boolean; +}): ComfyNetworkPolicy { + let parsed: URL; + try { + parsed = new URL(params.baseUrl); + } catch { + return {}; + } + + const hostname = parsed.hostname.trim().toLowerCase(); + if (!hostname || !params.allowPrivateNetwork || !isPrivateOrLoopbackHost(hostname)) { + return {}; + } + + const hostnamePolicy = buildHostnameAllowlistPolicyFromSuffixAllowlist([hostname]); + const privateNetworkPolicy = ssrfPolicyFromDangerouslyAllowPrivateNetwork(true); + return { + apiPolicy: mergeSsrFPolicies(hostnamePolicy, privateNetworkPolicy), + }; +} + +async function readJsonResponse(params: { + url: string; + init?: RequestInit; + timeoutMs?: number; + policy?: SsrFPolicy; + dispatcherPolicy?: ComfyDispatcherPolicy; + auditContext: string; + errorPrefix: string; +}): Promise { + const { response, release } = await comfyFetchGuard({ + url: params.url, + init: params.init, + timeoutMs: params.timeoutMs, + policy: params.policy, + dispatcherPolicy: params.dispatcherPolicy, + auditContext: params.auditContext, + }); + try { + await assertOkOrThrowHttpError(response, params.errorPrefix); + return (await response.json()) as T; + } finally { + await release(); + } +} + +function inferFileExtension(params: { fileName?: string; mimeType?: string }): string { + const normalizedMime = params.mimeType?.toLowerCase().trim(); + if (normalizedMime?.includes("jpeg")) { + return "jpg"; + } + if (normalizedMime?.includes("png")) { + return "png"; + } + if (normalizedMime?.includes("webm")) { + return "webm"; + } + if (normalizedMime?.includes("mp4")) { + return "mp4"; + } + if (normalizedMime?.includes("mpeg")) { + return "mp3"; + } + if (normalizedMime?.includes("wav")) { + return "wav"; + } + const fileName = params.fileName?.trim(); + if (!fileName) { + return "bin"; + } + const dotIndex = fileName.lastIndexOf("."); + if (dotIndex < 0 || dotIndex === fileName.length - 1) { + return "bin"; + } + return fileName.slice(dotIndex + 1); +} + +function toBlobBytes(buffer: Buffer): ArrayBuffer { + const arrayBuffer = new ArrayBuffer(buffer.byteLength); + new Uint8Array(arrayBuffer).set(buffer); + return arrayBuffer; +} + +async function uploadInputImage(params: { + baseUrl: string; + headers: Headers; + timeoutMs: number; + policy?: SsrFPolicy; + dispatcherPolicy?: ComfyDispatcherPolicy; + image: ComfySourceImage; + mode: ComfyMode; + capability: ComfyCapability; +}): Promise { + const form = new FormData(); + form.set( + "image", + new Blob([toBlobBytes(params.image.buffer)], { type: params.image.mimeType }), + params.image.fileName?.trim() || + `input.${inferFileExtension({ mimeType: params.image.mimeType })}`, + ); + form.set("type", "input"); + form.set("overwrite", "true"); + + const headers = new Headers(params.headers); + headers.delete("Content-Type"); + + const payload = await readJsonResponse({ + url: `${params.baseUrl}${params.mode === "cloud" ? "/api/upload/image" : "/upload/image"}`, + init: { + method: "POST", + headers, + body: form, + }, + timeoutMs: params.timeoutMs, + policy: params.policy, + dispatcherPolicy: params.dispatcherPolicy, + auditContext: `comfy-${params.capability}-upload`, + errorPrefix: "Comfy image upload failed", + }); + + const uploadedName = payload.filename?.trim() || payload.name?.trim(); + if (!uploadedName) { + throw new Error("Comfy image upload response missing filename"); + } + return uploadedName; +} + +function extractHistoryEntry(history: unknown, promptId: string): ComfyHistoryEntry | null { + if (!isRecord(history)) { + return null; + } + const directOutputs = history.outputs; + if (isRecord(directOutputs)) { + return history as ComfyHistoryEntry; + } + const nested = history[promptId]; + if (isRecord(nested)) { + return nested as ComfyHistoryEntry; + } + return null; +} + +async function waitForLocalHistory(params: { + baseUrl: string; + promptId: string; + headers: Headers; + timeoutMs: number; + pollIntervalMs: number; + policy?: SsrFPolicy; + dispatcherPolicy?: ComfyDispatcherPolicy; +}): Promise { + const deadline = Date.now() + params.timeoutMs; + while (Date.now() <= deadline) { + const history = await readJsonResponse({ + url: `${params.baseUrl}/history/${params.promptId}`, + init: { + method: "GET", + headers: params.headers, + }, + timeoutMs: params.timeoutMs, + policy: params.policy, + dispatcherPolicy: params.dispatcherPolicy, + auditContext: "comfy-history", + errorPrefix: "Comfy history lookup failed", + }); + + const entry = extractHistoryEntry(history, params.promptId); + if (entry?.outputs && Object.keys(entry.outputs).length > 0) { + return entry; + } + + await new Promise((resolve) => setTimeout(resolve, params.pollIntervalMs)); + } + + throw new Error(`Comfy workflow did not finish within ${Math.ceil(params.timeoutMs / 1000)}s`); +} + +async function waitForCloudCompletion(params: { + baseUrl: string; + promptId: string; + headers: Headers; + timeoutMs: number; + pollIntervalMs: number; + policy?: SsrFPolicy; + dispatcherPolicy?: ComfyDispatcherPolicy; +}): Promise { + const deadline = Date.now() + params.timeoutMs; + while (Date.now() <= deadline) { + const status = await readJsonResponse({ + url: `${params.baseUrl}/api/job/${params.promptId}/status`, + init: { + method: "GET", + headers: params.headers, + }, + timeoutMs: params.timeoutMs, + policy: params.policy, + dispatcherPolicy: params.dispatcherPolicy, + auditContext: "comfy-status", + errorPrefix: "Comfy status lookup failed", + }); + + if (status.status === "completed") { + return; + } + if (status.status === "failed" || status.status === "cancelled") { + throw new Error( + `Comfy workflow ${status.status}: ${status.error ?? status.message ?? params.promptId}`, + ); + } + + await new Promise((resolve) => setTimeout(resolve, params.pollIntervalMs)); + } + + throw new Error(`Comfy workflow did not finish within ${Math.ceil(params.timeoutMs / 1000)}s`); +} + +function collectOutputFiles(params: { + history: ComfyHistoryEntry; + outputNodeId?: string; + outputKinds: readonly ComfyOutputKind[]; +}): Array<{ nodeId: string; file: ComfyOutputFile }> { + const outputs = params.history.outputs; + if (!outputs) { + return []; + } + + const nodeIds = params.outputNodeId ? [params.outputNodeId] : Object.keys(outputs); + const files: Array<{ nodeId: string; file: ComfyOutputFile }> = []; + for (const nodeId of nodeIds) { + const entry = outputs[nodeId]; + if (!entry) { + continue; + } + for (const kind of params.outputKinds) { + const bucket = entry[kind]; + if (!Array.isArray(bucket)) { + continue; + } + for (const file of bucket) { + files.push({ nodeId, file }); + } + } + } + return files; +} + +async function downloadOutputFile(params: { + baseUrl: string; + headers: Headers; + timeoutMs: number; + policy?: SsrFPolicy; + dispatcherPolicy?: ComfyDispatcherPolicy; + file: ComfyOutputFile; + mode: ComfyMode; + capability: ComfyCapability; +}): Promise<{ buffer: Buffer; mimeType: string }> { + const fileName = params.file.filename?.trim() || params.file.name?.trim(); + if (!fileName) { + throw new Error("Comfy output entry missing filename"); + } + + const query = new URLSearchParams({ + filename: fileName, + subfolder: params.file.subfolder?.trim() ?? "", + type: params.file.type?.trim() ?? "output", + }); + const viewPath = params.mode === "cloud" ? "/api/view" : "/view"; + const auditContext = `comfy-${params.capability}-download`; + + const firstResponse = await comfyFetchGuard({ + url: `${params.baseUrl}${viewPath}?${query.toString()}`, + init: { + method: "GET", + headers: params.headers, + ...(params.mode === "cloud" ? { redirect: "manual" } : {}), + }, + timeoutMs: params.timeoutMs, + policy: params.policy, + dispatcherPolicy: params.dispatcherPolicy, + auditContext, + }); + + try { + if ( + params.mode === "cloud" && + [301, 302, 303, 307, 308].includes(firstResponse.response.status) + ) { + const redirectUrl = firstResponse.response.headers.get("location")?.trim(); + if (!redirectUrl) { + throw new Error("Comfy cloud output redirect missing location header"); + } + const redirected = await comfyFetchGuard({ + url: redirectUrl, + init: { + method: "GET", + }, + timeoutMs: params.timeoutMs, + dispatcherPolicy: params.dispatcherPolicy, + auditContext, + }); + try { + await assertOkOrThrowHttpError(redirected.response, "Comfy output download failed"); + const mimeType = + redirected.response.headers.get("content-type")?.trim() || "application/octet-stream"; + return { + buffer: Buffer.from(await redirected.response.arrayBuffer()), + mimeType, + }; + } finally { + await redirected.release(); + } + } + + await assertOkOrThrowHttpError(firstResponse.response, "Comfy output download failed"); + const mimeType = + firstResponse.response.headers.get("content-type")?.trim() || "application/octet-stream"; + return { + buffer: Buffer.from(await firstResponse.response.arrayBuffer()), + mimeType, + }; + } finally { + await firstResponse.release(); + } +} + +export function isComfyCapabilityConfigured(params: { + cfg?: OpenClawConfig; + agentDir?: string; + capability: ComfyCapability; +}): boolean { + const config = getComfyConfig(params.cfg); + const capabilityConfig = getComfyCapabilityConfig(config, params.capability); + const hasWorkflow = Boolean( + resolveComfyWorkflowSource(capabilityConfig).workflow || + readConfigString(capabilityConfig, "workflowPath"), + ); + const hasPromptNode = Boolean(readConfigString(capabilityConfig, "promptNodeId")); + if (!hasWorkflow || !hasPromptNode) { + return false; + } + if (resolveComfyMode(capabilityConfig) === "local") { + return true; + } + return isProviderApiKeyConfigured({ + provider: "comfy", + agentDir: params.agentDir, + }); +} + +export async function runComfyWorkflow(params: { + cfg: OpenClawConfig; + agentDir?: string; + authStore?: AuthProfileStore; + prompt: string; + model?: string; + timeoutMs?: number; + capability: ComfyCapability; + outputKinds: readonly ComfyOutputKind[]; + inputImage?: ComfySourceImage; +}): Promise { + const config = getComfyConfig(params.cfg); + const capabilityConfig = getComfyCapabilityConfig(config, params.capability); + const mode = resolveComfyMode(capabilityConfig); + const workflow = await loadComfyWorkflow(capabilityConfig); + const promptNodeId = getRequiredConfigString(capabilityConfig, "promptNodeId"); + const promptInputName = + readConfigString(capabilityConfig, "promptInputName") ?? DEFAULT_PROMPT_INPUT_NAME; + const inputImageNodeId = readConfigString(capabilityConfig, "inputImageNodeId"); + const inputImageInputName = + readConfigString(capabilityConfig, "inputImageInputName") ?? DEFAULT_INPUT_IMAGE_INPUT_NAME; + const outputNodeId = readConfigString(capabilityConfig, "outputNodeId"); + const pollIntervalMs = + readConfigInteger(capabilityConfig, "pollIntervalMs") ?? DEFAULT_POLL_INTERVAL_MS; + const timeoutMs = + readConfigInteger(capabilityConfig, "timeoutMs") ?? params.timeoutMs ?? DEFAULT_TIMEOUT_MS; + const providerModel = params.model?.trim() || DEFAULT_COMFY_MODEL; + + setWorkflowInput({ + workflow, + nodeId: promptNodeId, + inputName: promptInputName, + value: params.prompt, + }); + + const resolvedAuth = + mode === "cloud" + ? await resolveApiKeyForProvider({ + provider: "comfy", + cfg: params.cfg, + agentDir: params.agentDir, + store: params.authStore, + }) + : null; + if (mode === "cloud" && !resolvedAuth?.apiKey) { + throw new Error("Comfy Cloud API key missing"); + } + + const { baseUrl, allowPrivateNetwork, headers, dispatcherPolicy } = + resolveProviderHttpRequestConfig({ + baseUrl: readConfigString(capabilityConfig, "baseUrl"), + defaultBaseUrl: + mode === "cloud" ? DEFAULT_COMFY_CLOUD_BASE_URL : DEFAULT_COMFY_LOCAL_BASE_URL, + allowPrivateNetwork: + mode === "local" || readConfigBoolean(capabilityConfig, "allowPrivateNetwork") === true, + defaultHeaders: + mode === "cloud" + ? { + "X-API-Key": resolvedAuth?.apiKey ?? "", + "Content-Type": "application/json", + } + : { + "Content-Type": "application/json", + }, + provider: "comfy", + capability: params.capability === "music" ? "audio" : params.capability, + transport: "http", + }); + const normalizedBaseUrl = + normalizeBaseUrl(baseUrl) || + (mode === "cloud" ? DEFAULT_COMFY_CLOUD_BASE_URL : DEFAULT_COMFY_LOCAL_BASE_URL); + const networkPolicy = resolveComfyNetworkPolicy({ + baseUrl: normalizedBaseUrl, + allowPrivateNetwork, + }); + + if (params.inputImage) { + if (!inputImageNodeId) { + throw new Error( + "Comfy edit requests require models.providers.comfy..inputImageNodeId to be configured", + ); + } + const uploadedName = await uploadInputImage({ + baseUrl: normalizedBaseUrl, + headers: new Headers(headers), + timeoutMs, + policy: networkPolicy.apiPolicy, + dispatcherPolicy, + image: params.inputImage, + mode, + capability: params.capability, + }); + setWorkflowInput({ + workflow, + nodeId: inputImageNodeId, + inputName: inputImageInputName, + value: uploadedName, + }); + } + + const submitPayload = { + prompt: workflow, + ...(mode === "cloud" && resolvedAuth?.apiKey + ? { extra_data: { api_key_comfy_org: resolvedAuth.apiKey } } + : {}), + }; + + const promptResponse = await readJsonResponse({ + url: `${normalizedBaseUrl}${mode === "cloud" ? "/api/prompt" : "/prompt"}`, + init: { + method: "POST", + headers, + body: JSON.stringify(submitPayload), + }, + timeoutMs, + policy: networkPolicy.apiPolicy, + dispatcherPolicy, + auditContext: `comfy-${params.capability}-generate`, + errorPrefix: "Comfy workflow submit failed", + }); + + const promptId = promptResponse.prompt_id?.trim(); + if (!promptId) { + throw new Error("Comfy workflow submit response missing prompt_id"); + } + + const history = + mode === "cloud" + ? await (async () => { + await waitForCloudCompletion({ + baseUrl: normalizedBaseUrl, + promptId, + headers: new Headers(headers), + timeoutMs, + pollIntervalMs, + policy: networkPolicy.apiPolicy, + dispatcherPolicy, + }); + return await readJsonResponse({ + url: `${normalizedBaseUrl}/api/history_v2/${promptId}`, + init: { + method: "GET", + headers: new Headers(headers), + }, + timeoutMs, + policy: networkPolicy.apiPolicy, + dispatcherPolicy, + auditContext: "comfy-history", + errorPrefix: "Comfy history lookup failed", + }); + })() + : await waitForLocalHistory({ + baseUrl: normalizedBaseUrl, + promptId, + headers: new Headers(headers), + timeoutMs, + pollIntervalMs, + policy: networkPolicy.apiPolicy, + dispatcherPolicy, + }); + + const historyEntry = extractHistoryEntry(history, promptId); + if (!historyEntry) { + throw new Error(`Comfy history response missing outputs for prompt ${promptId}`); + } + + const outputFiles = collectOutputFiles({ + history: historyEntry, + outputNodeId, + outputKinds: params.outputKinds, + }); + if (outputFiles.length === 0) { + throw new Error(`Comfy workflow ${promptId} completed without ${params.capability} outputs`); + } + + const assets: ComfyGeneratedAsset[] = []; + let assetIndex = 0; + for (const output of outputFiles) { + const downloaded = await downloadOutputFile({ + baseUrl: normalizedBaseUrl, + headers: new Headers(headers), + timeoutMs, + policy: networkPolicy.apiPolicy, + dispatcherPolicy, + file: output.file, + mode, + capability: params.capability, + }); + assetIndex += 1; + const originalName = output.file.filename?.trim() || output.file.name?.trim(); + assets.push({ + buffer: downloaded.buffer, + mimeType: downloaded.mimeType, + fileName: + originalName || + `${params.capability}-${assetIndex}.${inferFileExtension({ mimeType: downloaded.mimeType })}`, + nodeId: output.nodeId, + }); + } + + return { + assets, + model: providerModel, + promptId, + outputNodeIds: Array.from(new Set(outputFiles.map((entry) => entry.nodeId))), + }; +} diff --git a/src/agents/pi-embedded-subscribe.tools.media.test.ts b/src/agents/pi-embedded-subscribe.tools.media.test.ts index 545c2d3b770..55ac9d7fc13 100644 --- a/src/agents/pi-embedded-subscribe.tools.media.test.ts +++ b/src/agents/pi-embedded-subscribe.tools.media.test.ts @@ -269,6 +269,10 @@ describe("extractToolResultMediaPaths", () => { expect(isToolResultMediaTrusted("video_generate")).toBe(true); }); + it("trusts bundled plugin tool local MEDIA paths", () => { + expect(isToolResultMediaTrusted("music_generate")).toBe(true); + }); + it("does not trust local MEDIA paths for MCP-provenance results", () => { expect( filterToolResultMediaUrls("browser", ["/tmp/screenshot.png"], { diff --git a/src/agents/pi-embedded-subscribe.tools.ts b/src/agents/pi-embedded-subscribe.tools.ts index f792c5cc6f5..0349d17c179 100644 --- a/src/agents/pi-embedded-subscribe.tools.ts +++ b/src/agents/pi-embedded-subscribe.tools.ts @@ -1,6 +1,7 @@ import { getChannelPlugin, normalizeChannelId } from "../channels/plugins/index.js"; import { normalizeTargetForProvider } from "../infra/outbound/target-normalization.js"; import { splitMediaFromOutput } from "../media/parse.js"; +import { pluginRegistrationContractRegistry } from "../plugins/contracts/registry.js"; import { truncateUtf16Safe } from "../utils.js"; import { collectTextContentBlocks } from "./content-blocks.js"; import { type MessagingToolSend } from "./pi-embedded-messaging.js"; @@ -162,6 +163,9 @@ const TRUSTED_TOOL_RESULT_MEDIA = new Set([ "x_search", "write", ]); +const TRUSTED_BUNDLED_PLUGIN_MEDIA_TOOLS = new Set( + pluginRegistrationContractRegistry.flatMap((entry) => entry.toolNames), +); const HTTP_URL_RE = /^https?:\/\//i; function readToolResultDetails(result: unknown): Record | undefined { @@ -192,7 +196,9 @@ export function isToolResultMediaTrusted(toolName?: string, result?: unknown): b return false; } const normalized = normalizeToolName(toolName); - return TRUSTED_TOOL_RESULT_MEDIA.has(normalized); + return ( + TRUSTED_TOOL_RESULT_MEDIA.has(normalized) || TRUSTED_BUNDLED_PLUGIN_MEDIA_TOOLS.has(normalized) + ); } export function filterToolResultMediaUrls( diff --git a/src/plugins/contracts/plugin-registration.comfy.contract.test.ts b/src/plugins/contracts/plugin-registration.comfy.contract.test.ts new file mode 100644 index 00000000000..160683e8703 --- /dev/null +++ b/src/plugins/contracts/plugin-registration.comfy.contract.test.ts @@ -0,0 +1,4 @@ +import { pluginRegistrationContractCases } from "../../../test/helpers/plugins/plugin-registration-contract-cases.js"; +import { describePluginRegistrationContract } from "../../../test/helpers/plugins/plugin-registration-contract.js"; + +describePluginRegistrationContract(pluginRegistrationContractCases.comfy); diff --git a/test/helpers/plugins/plugin-registration-contract-cases.ts b/test/helpers/plugins/plugin-registration-contract-cases.ts index ee001ccd74c..d09ab9195fe 100644 --- a/test/helpers/plugins/plugin-registration-contract-cases.ts +++ b/test/helpers/plugins/plugin-registration-contract-cases.ts @@ -13,6 +13,15 @@ export const pluginRegistrationContractCases = { pluginId: "brave", webSearchProviderIds: ["brave"], }, + comfy: { + pluginId: "comfy", + providerIds: ["comfy"], + imageGenerationProviderIds: ["comfy"], + videoGenerationProviderIds: ["comfy"], + toolNames: ["music_generate"], + requireGenerateImage: true, + requireGenerateVideo: true, + }, deepgram: { pluginId: "deepgram", mediaUnderstandingProviderIds: ["deepgram"], diff --git a/vitest.extension-provider-paths.mjs b/vitest.extension-provider-paths.mjs index 25011f1dbc7..4b0ea5839d6 100644 --- a/vitest.extension-provider-paths.mjs +++ b/vitest.extension-provider-paths.mjs @@ -7,6 +7,7 @@ export const providerExtensionIds = [ "anthropic-vertex", "byteplus", "chutes", + "comfy", "deepseek", "github-copilot", "google",