From 74e07296313e627e42c5a06227f9dbeec96bcf78 Mon Sep 17 00:00:00 2001 From: Vincent Koc Date: Fri, 6 Mar 2026 01:02:23 -0500 Subject: [PATCH] Skills/nano-banana-pro: support hosted input images (#37247) * skills(nano-banana-pro): support remote edit image URLs * test(nano-banana-pro): cover remote input image validation * docs(nano-banana-pro): document remote input images * docs(changelog): note nano-banana remote image inputs * chore(nano-banana-pro): normalize script imports * test(nano-banana-pro): normalize test imports * ci: use published bun release tag * ci: skip prod audit on PRs without dependency changes * test(nano-banana-pro): remove pillow dependency from skill tests * docs(changelog): credit nano-banana input image follow-up --- .github/actions/setup-node-env/action.yml | 2 +- .github/workflows/ci.yml | 21 ++- CHANGELOG.md | 1 + skills/nano-banana-pro/SKILL.md | 9 + .../nano-banana-pro/scripts/generate_image.py | 159 +++++++++++++++++- .../scripts/test_generate_image.py | 108 ++++++++++++ 6 files changed, 291 insertions(+), 9 deletions(-) create mode 100644 skills/nano-banana-pro/scripts/test_generate_image.py diff --git a/.github/actions/setup-node-env/action.yml b/.github/actions/setup-node-env/action.yml index 1b70385ca54..c46387517e4 100644 --- a/.github/actions/setup-node-env/action.yml +++ b/.github/actions/setup-node-env/action.yml @@ -61,7 +61,7 @@ runs: if: inputs.install-bun == 'true' uses: oven-sh/setup-bun@v2 with: - bun-version: "1.3.9+cf6cdbbba" + bun-version: "1.3.9" - name: Runtime versions shell: bash diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a30087d6ec9..34b0e9f9349 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -327,7 +327,26 @@ jobs: pre-commit run zizmor --files "${workflow_files[@]}" - name: Audit production dependencies - run: pre-commit run --all-files pnpm-audit-prod + run: | + set -euo pipefail + + if [ "${{ github.event_name }}" = "push" ]; then + pre-commit run --all-files pnpm-audit-prod + exit 0 + fi + + if [ "${{ github.event_name }}" != "pull_request" ]; then + pre-commit run --all-files pnpm-audit-prod + exit 0 + fi + + BASE="${{ github.event.pull_request.base.sha }}" + if ! git diff --name-only "$BASE" HEAD | grep -Eq '(^|/)package\.json$|^pnpm-lock\.yaml$|^pnpm-workspace\.yaml$'; then + echo "No dependency manifest changes detected; skipping pnpm audit on this PR." + exit 0 + fi + + pre-commit run --all-files pnpm-audit-prod checks-windows: needs: [docs-scope, changed-scope] diff --git a/CHANGELOG.md b/CHANGELOG.md index 4016dcc4df8..870a403b9df 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ Docs: https://docs.openclaw.ai - Telegram/topic agent routing: support per-topic `agentId` overrides in forum groups and DM topics so topics can route to dedicated agents with isolated sessions. (#33647; based on #31513) Thanks @kesor and @Sid-Qin. - Slack/DM typing feedback: add `channels.slack.typingReaction` so Socket Mode DMs can show reaction-based processing status even when Slack native assistant typing is unavailable. (#19816) Thanks @dalefrieswthat. - Exec/process interactive recovery: add `process attach` plus input-wait metadata/hints (`waitingForInput`, `idleMs`, `stdinWritable`) so long-running interactive sessions can be observed and resumed without losing context. Fixes #33957. Thanks @westoque. +- Skills/nano-banana-pro: accept public `http(s)` input images for edit/composition while keeping local path support, and return explicit errors for redirects, `file://`, and private-network URLs. Fixes #33960. Thanks @westoque and @vincentkoc. ### Fixes diff --git a/skills/nano-banana-pro/SKILL.md b/skills/nano-banana-pro/SKILL.md index 20bf59a2e92..b891f0c83ac 100644 --- a/skills/nano-banana-pro/SKILL.md +++ b/skills/nano-banana-pro/SKILL.md @@ -39,6 +39,12 @@ Edit (single image) uv run {baseDir}/scripts/generate_image.py --prompt "edit instructions" --filename "output.png" -i "/path/in.png" --resolution 2K ``` +Edit from a hosted image URL + +```bash +uv run {baseDir}/scripts/generate_image.py --prompt "turn this into a watercolor poster" --filename "output.png" -i "https://images.example.com/source.png" --resolution 2K +``` + Multi-image composition (up to 14 images) ```bash @@ -53,6 +59,9 @@ API key Notes - Resolutions: `1K` (default), `2K`, `4K`. +- Input images can be local paths or public `http(s)` URLs. +- `file://` URLs are rejected; use a normal local path instead. +- Remote input URLs reject redirects plus private/loopback/special-use hosts for safety. - Use timestamps in filenames: `yyyy-mm-dd-hh-mm-ss-name.png`. - The script prints a `MEDIA:` line for OpenClaw to auto-attach on supported chat providers. - Do not read the image back; report the saved path only. diff --git a/skills/nano-banana-pro/scripts/generate_image.py b/skills/nano-banana-pro/scripts/generate_image.py index 8d60882c456..b99655f464b 100755 --- a/skills/nano-banana-pro/scripts/generate_image.py +++ b/skills/nano-banana-pro/scripts/generate_image.py @@ -17,9 +17,22 @@ Multi-image editing (up to 14 images): """ import argparse +import ipaddress import os +import re +import socket import sys +from io import BytesIO from pathlib import Path +from urllib import error, parse, request + +MAX_REMOTE_IMAGE_BYTES = 20 * 1024 * 1024 +REMOTE_IMAGE_TIMEOUT_SEC = 20 + + +class NoRedirectHandler(request.HTTPRedirectHandler): + def redirect_request(self, req, fp, code, msg, headers, newurl): + return None def get_api_key(provided_key: str | None) -> str | None: @@ -29,6 +42,127 @@ def get_api_key(provided_key: str | None) -> str | None: return os.environ.get("GEMINI_API_KEY") +def is_remote_image_url(image_source: str) -> bool: + parsed = parse.urlparse(image_source) + return parsed.scheme.lower() in {"http", "https"} + + +def _looks_like_windows_drive_path(image_source: str) -> bool: + return bool(re.match(r"^[a-zA-Z]:[\\/]", image_source)) + + +def _is_blocked_remote_ip(address: str) -> bool: + ip = ipaddress.ip_address(address) + return ( + ip.is_private + or ip.is_loopback + or ip.is_link_local + or ip.is_multicast + or ip.is_reserved + or ip.is_unspecified + ) + + +def validate_remote_image_url(image_url: str) -> parse.ParseResult: + parsed = parse.urlparse(image_url) + scheme = parsed.scheme.lower() + if scheme not in {"http", "https"}: + if scheme == "file": + raise ValueError( + f"Unsupported input image URL '{image_url}'. " + "Use a local path instead of file:// URLs." + ) + raise ValueError( + f"Unsupported input image URL '{image_url}'. Only public http(s) URLs are supported." + ) + if not parsed.hostname: + raise ValueError(f"Invalid input image URL '{image_url}': hostname is required.") + if parsed.username or parsed.password: + raise ValueError( + f"Unsupported input image URL '{image_url}': embedded credentials are not allowed." + ) + + try: + resolved = socket.getaddrinfo( + parsed.hostname, + parsed.port or (443 if scheme == "https" else 80), + type=socket.SOCK_STREAM, + ) + except socket.gaierror as exc: + raise ValueError(f"Could not resolve input image URL '{image_url}': {exc}.") from exc + + blocked = sorted( + { + entry[4][0] + for entry in resolved + if entry[4] and entry[4][0] and _is_blocked_remote_ip(entry[4][0]) + } + ) + if blocked: + raise ValueError( + f"Unsafe input image URL '{image_url}': private, loopback, or " + f"special-use hosts are not allowed ({', '.join(blocked)})." + ) + return parsed + + +def load_input_image(image_source: str, pil_image_module): + if is_remote_image_url(image_source): + validate_remote_image_url(image_source) + opener = request.build_opener(NoRedirectHandler()) + req = request.Request( + image_source, + headers={"User-Agent": "OpenClaw nano-banana-pro/1.0"}, + ) + try: + with opener.open(req, timeout=REMOTE_IMAGE_TIMEOUT_SEC) as response: + redirected_to = response.geturl() + if redirected_to != image_source: + raise ValueError( + "Redirected input image URLs are not supported for safety. " + f"Re-run with the final asset URL: {redirected_to}" + ) + image_bytes = response.read(MAX_REMOTE_IMAGE_BYTES + 1) + except error.HTTPError as exc: + if 300 <= exc.code < 400: + location = exc.headers.get("Location") + detail = f" Redirect target: {location}" if location else "" + raise ValueError( + f"Redirected input image URLs are not supported for safety.{detail}" + ) from exc + raise ValueError( + f"Error downloading input image '{image_source}': HTTP {exc.code}." + ) from exc + except error.URLError as exc: + raise ValueError( + f"Error downloading input image '{image_source}': {exc.reason}." + ) from exc + + if len(image_bytes) > MAX_REMOTE_IMAGE_BYTES: + raise ValueError( + f"Input image URL '{image_source}' exceeded the " + f"{MAX_REMOTE_IMAGE_BYTES // (1024 * 1024)} MB download limit." + ) + with pil_image_module.open(BytesIO(image_bytes)) as img: + return img.copy() + + parsed = parse.urlparse(image_source) + if parsed.scheme and not _looks_like_windows_drive_path(image_source): + if parsed.scheme.lower() == "file": + raise ValueError( + f"Unsupported input image URL '{image_source}'. " + "Use a local path instead of file:// URLs." + ) + raise ValueError( + f"Unsupported input image source '{image_source}'. " + "Use a local path or a public http(s) URL." + ) + + local_path = Path(image_source).expanduser() + with pil_image_module.open(local_path) as img: + return img.copy() + + def main(): parser = argparse.ArgumentParser( description="Generate images using Nano Banana Pro (Gemini 3 Pro Image)" @@ -48,7 +182,10 @@ def main(): action="append", dest="input_images", metavar="IMAGE", - help="Input image path(s) for editing/composition. Can be specified multiple times (up to 14 images)." + help=( + "Input image path(s) for editing/composition. " + "Can be specified multiple times (up to 14 images)." + ), ) parser.add_argument( "--resolution", "-r", @@ -89,15 +226,17 @@ def main(): output_resolution = args.resolution if args.input_images: if len(args.input_images) > 14: - print(f"Error: Too many input images ({len(args.input_images)}). Maximum is 14.", file=sys.stderr) + print( + f"Error: Too many input images ({len(args.input_images)}). Maximum is 14.", + file=sys.stderr, + ) sys.exit(1) max_input_dim = 0 for img_path in args.input_images: try: - with PILImage.open(img_path) as img: - copied = img.copy() - width, height = copied.size + copied = load_input_image(img_path, PILImage) + width, height = copied.size input_images.append(copied) print(f"Loaded input image: {img_path}") @@ -115,13 +254,19 @@ def main(): output_resolution = "2K" else: output_resolution = "1K" - print(f"Auto-detected resolution: {output_resolution} (from max input dimension {max_input_dim})") + print( + f"Auto-detected resolution: {output_resolution} " + f"(from max input dimension {max_input_dim})" + ) # Build contents (images first if editing, prompt only if generating) if input_images: contents = [*input_images, args.prompt] img_count = len(input_images) - print(f"Processing {img_count} image{'s' if img_count > 1 else ''} with resolution {output_resolution}...") + print( + f"Processing {img_count} image{'s' if img_count > 1 else ''} " + f"with resolution {output_resolution}..." + ) else: contents = args.prompt print(f"Generating image with resolution {output_resolution}...") diff --git a/skills/nano-banana-pro/scripts/test_generate_image.py b/skills/nano-banana-pro/scripts/test_generate_image.py new file mode 100644 index 00000000000..db5dda79baf --- /dev/null +++ b/skills/nano-banana-pro/scripts/test_generate_image.py @@ -0,0 +1,108 @@ +import tempfile +import unittest +from pathlib import Path +from unittest.mock import patch + +import generate_image + + +class FakeResponse: + def __init__(self, payload: bytes, url: str): + self._payload = payload + self._url = url + + def geturl(self): + return self._url + + def read(self, _limit: int): + return self._payload + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return False + + +class FakeImage: + def __init__(self, size): + self.size = size + + def copy(self): + return FakeImage(self.size) + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return False + + +class FakePILImageModule: + def __init__(self, sizes_by_source): + self._sizes_by_source = sizes_by_source + + def open(self, source): + if isinstance(source, (str, Path)): + key = source + else: + key = type(source).__name__ + size = self._sizes_by_source[key] + return FakeImage(size) + + +class LoadInputImageTests(unittest.TestCase): + def test_load_input_image_accepts_local_path(self): + with tempfile.TemporaryDirectory() as tmpdir: + image_path = Path(tmpdir) / "input.png" + image_path.write_bytes(b"not-a-real-image") + fake_pil = FakePILImageModule({image_path: (16, 12)}) + + loaded = generate_image.load_input_image(str(image_path), fake_pil) + + self.assertEqual(loaded.size, (16, 12)) + + def test_load_input_image_accepts_public_https_url(self): + fake_opener = type( + "FakeOpener", + (), + { + "open": lambda self, req, timeout=0: FakeResponse( + b"fake-image-bytes", + req.full_url, + ) + }, + )() + fake_pil = FakePILImageModule({"BytesIO": (20, 10)}) + + with patch.object( + generate_image.socket, + "getaddrinfo", + return_value=[(None, None, None, None, ("93.184.216.34", 443))], + ), patch.object(generate_image.request, "build_opener", return_value=fake_opener): + loaded = generate_image.load_input_image("https://example.com/input.png", fake_pil) + + self.assertEqual(loaded.size, (20, 10)) + + def test_load_input_image_rejects_private_network_url(self): + with patch.object( + generate_image.socket, + "getaddrinfo", + return_value=[(None, None, None, None, ("127.0.0.1", 443))], + ): + with self.assertRaisesRegex(ValueError, "private, loopback, or special-use hosts"): + generate_image.load_input_image( + "https://localhost/input.png", + FakePILImageModule({}), + ) + + def test_load_input_image_rejects_file_url(self): + with self.assertRaisesRegex(ValueError, "Use a local path instead of file:// URLs"): + generate_image.load_input_image( + "file:///tmp/input.png", + FakePILImageModule({}), + ) + + +if __name__ == "__main__": + unittest.main()