Skills/nano-banana-pro: support hosted input images (#37247)

* skills(nano-banana-pro): support remote edit image URLs * test(nano-banana-pro): cover remote input image validation * docs(nano-banana-pro): document remote input images * docs(changelog): note nano-banana remote image inputs * chore(nano-banana-pro): normalize script imports * test(nano-banana-pro): normalize test imports * ci: use published bun release tag * ci: skip prod audit on PRs without dependency changes * test(nano-banana-pro): remove pillow dependency from skill tests * docs(changelog): credit nano-banana input image follow-up
2026-03-12 07:20:45 +00:00 · 2026-03-06 01:02:23 -05:00
parent 7187bfd84b
commit 74e0729631
6 changed files with 291 additions and 9 deletions
--- a/.github/actions/setup-node-env/action.yml
+++ b/.github/actions/setup-node-env/action.yml
@@ -61,7 +61,7 @@ runs:
      if: inputs.install-bun == 'true'
      uses: oven-sh/setup-bun@v2
      with:
-        bun-version: "1.3.9+cf6cdbbba"
+        bun-version: "1.3.9"

    - name: Runtime versions
      shell: bash
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -327,7 +327,26 @@ jobs:
          pre-commit run zizmor --files "${workflow_files[@]}"

      - name: Audit production dependencies
-        run: pre-commit run --all-files pnpm-audit-prod
+        run: |
+          set -euo pipefail
+
+          if [ "${{ github.event_name }}" = "push" ]; then
+            pre-commit run --all-files pnpm-audit-prod
+            exit 0
+          fi
+
+          if [ "${{ github.event_name }}" != "pull_request" ]; then
+            pre-commit run --all-files pnpm-audit-prod
+            exit 0
+          fi
+
+          BASE="${{ github.event.pull_request.base.sha }}"
+          if ! git diff --name-only "$BASE" HEAD | grep -Eq '(^|/)package\.json$|^pnpm-lock\.yaml$|^pnpm-workspace\.yaml$'; then
+            echo "No dependency manifest changes detected; skipping pnpm audit on this PR."
+            exit 0
+          fi
+
+          pre-commit run --all-files pnpm-audit-prod

  checks-windows:
    needs: [docs-scope, changed-scope]
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -14,6 +14,7 @@ Docs: https://docs.openclaw.ai
 - Telegram/topic agent routing: support per-topic `agentId` overrides in forum groups and DM topics so topics can route to dedicated agents with isolated sessions. (#33647; based on #31513) Thanks @kesor and @Sid-Qin.
 - Slack/DM typing feedback: add `channels.slack.typingReaction` so Socket Mode DMs can show reaction-based processing status even when Slack native assistant typing is unavailable. (#19816) Thanks @dalefrieswthat.
 - Exec/process interactive recovery: add `process attach` plus input-wait metadata/hints (`waitingForInput`, `idleMs`, `stdinWritable`) so long-running interactive sessions can be observed and resumed without losing context. Fixes #33957. Thanks @westoque.
+- Skills/nano-banana-pro: accept public `http(s)` input images for edit/composition while keeping local path support, and return explicit errors for redirects, `file://`, and private-network URLs. Fixes #33960. Thanks @westoque and @vincentkoc.

 ### Fixes

--- a/skills/nano-banana-pro/SKILL.md
+++ b/skills/nano-banana-pro/SKILL.md
@@ -39,6 +39,12 @@ Edit (single image)
 uv run {baseDir}/scripts/generate_image.py --prompt "edit instructions" --filename "output.png" -i "/path/in.png" --resolution 2K
 ```

+Edit from a hosted image URL
+
+```bash
+uv run {baseDir}/scripts/generate_image.py --prompt "turn this into a watercolor poster" --filename "output.png" -i "https://images.example.com/source.png" --resolution 2K
+```
+
 Multi-image composition (up to 14 images)

 ```bash
@@ -53,6 +59,9 @@ API key
 Notes

 - Resolutions: `1K` (default), `2K`, `4K`.
+- Input images can be local paths or public `http(s)` URLs.
+- `file://` URLs are rejected; use a normal local path instead.
+- Remote input URLs reject redirects plus private/loopback/special-use hosts for safety.
 - Use timestamps in filenames: `yyyy-mm-dd-hh-mm-ss-name.png`.
 - The script prints a `MEDIA:` line for OpenClaw to auto-attach on supported chat providers.
 - Do not read the image back; report the saved path only.
--- a/skills/nano-banana-pro/scripts/generate_image.py
+++ b/skills/nano-banana-pro/scripts/generate_image.py
@@ -17,9 +17,22 @@ Multi-image editing (up to 14 images):
 """

 import argparse
+import ipaddress
 import os
+import re
+import socket
 import sys
+from io import BytesIO
 from pathlib import Path
+from urllib import error, parse, request
+
+MAX_REMOTE_IMAGE_BYTES = 20 * 1024 * 1024
+REMOTE_IMAGE_TIMEOUT_SEC = 20
+
+
+class NoRedirectHandler(request.HTTPRedirectHandler):
+    def redirect_request(self, req, fp, code, msg, headers, newurl):
+        return None


 def get_api_key(provided_key: str | None) -> str | None:
@@ -29,6 +42,127 @@ def get_api_key(provided_key: str | None) -> str | None:
    return os.environ.get("GEMINI_API_KEY")


+def is_remote_image_url(image_source: str) -> bool:
+    parsed = parse.urlparse(image_source)
+    return parsed.scheme.lower() in {"http", "https"}
+
+
+def _looks_like_windows_drive_path(image_source: str) -> bool:
+    return bool(re.match(r"^[a-zA-Z]:[\\/]", image_source))
+
+
+def _is_blocked_remote_ip(address: str) -> bool:
+    ip = ipaddress.ip_address(address)
+    return (
+        ip.is_private
+        or ip.is_loopback
+        or ip.is_link_local
+        or ip.is_multicast
+        or ip.is_reserved
+        or ip.is_unspecified
+    )
+
+
+def validate_remote_image_url(image_url: str) -> parse.ParseResult:
+    parsed = parse.urlparse(image_url)
+    scheme = parsed.scheme.lower()
+    if scheme not in {"http", "https"}:
+        if scheme == "file":
+            raise ValueError(
+                f"Unsupported input image URL '{image_url}'. "
+                "Use a local path instead of file:// URLs."
+            )
+        raise ValueError(
+            f"Unsupported input image URL '{image_url}'. Only public http(s) URLs are supported."
+        )
+    if not parsed.hostname:
+        raise ValueError(f"Invalid input image URL '{image_url}': hostname is required.")
+    if parsed.username or parsed.password:
+        raise ValueError(
+            f"Unsupported input image URL '{image_url}': embedded credentials are not allowed."
+        )
+
+    try:
+        resolved = socket.getaddrinfo(
+            parsed.hostname,
+            parsed.port or (443 if scheme == "https" else 80),
+            type=socket.SOCK_STREAM,
+        )
+    except socket.gaierror as exc:
+        raise ValueError(f"Could not resolve input image URL '{image_url}': {exc}.") from exc
+
+    blocked = sorted(
+        {
+            entry[4][0]
+            for entry in resolved
+            if entry[4] and entry[4][0] and _is_blocked_remote_ip(entry[4][0])
+        }
+    )
+    if blocked:
+        raise ValueError(
+            f"Unsafe input image URL '{image_url}': private, loopback, or "
+            f"special-use hosts are not allowed ({', '.join(blocked)})."
+        )
+    return parsed
+
+
+def load_input_image(image_source: str, pil_image_module):
+    if is_remote_image_url(image_source):
+        validate_remote_image_url(image_source)
+        opener = request.build_opener(NoRedirectHandler())
+        req = request.Request(
+            image_source,
+            headers={"User-Agent": "OpenClaw nano-banana-pro/1.0"},
+        )
+        try:
+            with opener.open(req, timeout=REMOTE_IMAGE_TIMEOUT_SEC) as response:
+                redirected_to = response.geturl()
+                if redirected_to != image_source:
+                    raise ValueError(
+                        "Redirected input image URLs are not supported for safety. "
+                        f"Re-run with the final asset URL: {redirected_to}"
+                    )
+                image_bytes = response.read(MAX_REMOTE_IMAGE_BYTES + 1)
+        except error.HTTPError as exc:
+            if 300 <= exc.code < 400:
+                location = exc.headers.get("Location")
+                detail = f" Redirect target: {location}" if location else ""
+                raise ValueError(
+                    f"Redirected input image URLs are not supported for safety.{detail}"
+                ) from exc
+            raise ValueError(
+                f"Error downloading input image '{image_source}': HTTP {exc.code}."
+            ) from exc
+        except error.URLError as exc:
+            raise ValueError(
+                f"Error downloading input image '{image_source}': {exc.reason}."
+            ) from exc
+
+        if len(image_bytes) > MAX_REMOTE_IMAGE_BYTES:
+            raise ValueError(
+                f"Input image URL '{image_source}' exceeded the "
+                f"{MAX_REMOTE_IMAGE_BYTES // (1024 * 1024)} MB download limit."
+            )
+        with pil_image_module.open(BytesIO(image_bytes)) as img:
+            return img.copy()
+
+    parsed = parse.urlparse(image_source)
+    if parsed.scheme and not _looks_like_windows_drive_path(image_source):
+        if parsed.scheme.lower() == "file":
+            raise ValueError(
+                f"Unsupported input image URL '{image_source}'. "
+                "Use a local path instead of file:// URLs."
+            )
+        raise ValueError(
+            f"Unsupported input image source '{image_source}'. "
+            "Use a local path or a public http(s) URL."
+        )
+
+    local_path = Path(image_source).expanduser()
+    with pil_image_module.open(local_path) as img:
+        return img.copy()
+
+
 def main():
    parser = argparse.ArgumentParser(
        description="Generate images using Nano Banana Pro (Gemini 3 Pro Image)"
@@ -48,7 +182,10 @@ def main():
        action="append",
        dest="input_images",
        metavar="IMAGE",
-        help="Input image path(s) for editing/composition. Can be specified multiple times (up to 14 images)."
+        help=(
+            "Input image path(s) for editing/composition. "
+            "Can be specified multiple times (up to 14 images)."
+        ),
    )
    parser.add_argument(
        "--resolution", "-r",
@@ -89,15 +226,17 @@ def main():
    output_resolution = args.resolution
    if args.input_images:
        if len(args.input_images) > 14:
-            print(f"Error: Too many input images ({len(args.input_images)}). Maximum is 14.", file=sys.stderr)
+            print(
+                f"Error: Too many input images ({len(args.input_images)}). Maximum is 14.",
+                file=sys.stderr,
+            )
            sys.exit(1)

        max_input_dim = 0
        for img_path in args.input_images:
            try:
-                with PILImage.open(img_path) as img:
-                    copied = img.copy()
-                    width, height = copied.size
+                copied = load_input_image(img_path, PILImage)
+                width, height = copied.size
                input_images.append(copied)
                print(f"Loaded input image: {img_path}")

@@ -115,13 +254,19 @@ def main():
                output_resolution = "2K"
            else:
                output_resolution = "1K"
-            print(f"Auto-detected resolution: {output_resolution} (from max input dimension {max_input_dim})")
+            print(
+                f"Auto-detected resolution: {output_resolution} "
+                f"(from max input dimension {max_input_dim})"
+            )

    # Build contents (images first if editing, prompt only if generating)
    if input_images:
        contents = [*input_images, args.prompt]
        img_count = len(input_images)
-        print(f"Processing {img_count} image{'s' if img_count > 1 else ''} with resolution {output_resolution}...")
+        print(
+            f"Processing {img_count} image{'s' if img_count > 1 else ''} "
+            f"with resolution {output_resolution}..."
+        )
    else:
        contents = args.prompt
        print(f"Generating image with resolution {output_resolution}...")
--- a/skills/nano-banana-pro/scripts/test_generate_image.py
+++ b/skills/nano-banana-pro/scripts/test_generate_image.py
@@ -0,0 +1,108 @@
+import tempfile
+import unittest
+from pathlib import Path
+from unittest.mock import patch
+
+import generate_image
+
+
+class FakeResponse:
+    def __init__(self, payload: bytes, url: str):
+        self._payload = payload
+        self._url = url
+
+    def geturl(self):
+        return self._url
+
+    def read(self, _limit: int):
+        return self._payload
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc, tb):
+        return False
+
+
+class FakeImage:
+    def __init__(self, size):
+        self.size = size
+
+    def copy(self):
+        return FakeImage(self.size)
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc, tb):
+        return False
+
+
+class FakePILImageModule:
+    def __init__(self, sizes_by_source):
+        self._sizes_by_source = sizes_by_source
+
+    def open(self, source):
+        if isinstance(source, (str, Path)):
+            key = source
+        else:
+            key = type(source).__name__
+        size = self._sizes_by_source[key]
+        return FakeImage(size)
+
+
+class LoadInputImageTests(unittest.TestCase):
+    def test_load_input_image_accepts_local_path(self):
+        with tempfile.TemporaryDirectory() as tmpdir:
+            image_path = Path(tmpdir) / "input.png"
+            image_path.write_bytes(b"not-a-real-image")
+            fake_pil = FakePILImageModule({image_path: (16, 12)})
+
+            loaded = generate_image.load_input_image(str(image_path), fake_pil)
+
+            self.assertEqual(loaded.size, (16, 12))
+
+    def test_load_input_image_accepts_public_https_url(self):
+        fake_opener = type(
+            "FakeOpener",
+            (),
+            {
+                "open": lambda self, req, timeout=0: FakeResponse(
+                    b"fake-image-bytes",
+                    req.full_url,
+                )
+            },
+        )()
+        fake_pil = FakePILImageModule({"BytesIO": (20, 10)})
+
+        with patch.object(
+            generate_image.socket,
+            "getaddrinfo",
+            return_value=[(None, None, None, None, ("93.184.216.34", 443))],
+        ), patch.object(generate_image.request, "build_opener", return_value=fake_opener):
+            loaded = generate_image.load_input_image("https://example.com/input.png", fake_pil)
+
+        self.assertEqual(loaded.size, (20, 10))
+
+    def test_load_input_image_rejects_private_network_url(self):
+        with patch.object(
+            generate_image.socket,
+            "getaddrinfo",
+            return_value=[(None, None, None, None, ("127.0.0.1", 443))],
+        ):
+            with self.assertRaisesRegex(ValueError, "private, loopback, or special-use hosts"):
+                generate_image.load_input_image(
+                    "https://localhost/input.png",
+                    FakePILImageModule({}),
+                )
+
+    def test_load_input_image_rejects_file_url(self):
+        with self.assertRaisesRegex(ValueError, "Use a local path instead of file:// URLs"):
+            generate_image.load_input_image(
+                "file:///tmp/input.png",
+                FakePILImageModule({}),
+            )
+
+
+if __name__ == "__main__":
+    unittest.main()