Skills/nano-banana-pro: support hosted input images (#37247)

* skills(nano-banana-pro): support remote edit image URLs

* test(nano-banana-pro): cover remote input image validation

* docs(nano-banana-pro): document remote input images

* docs(changelog): note nano-banana remote image inputs

* chore(nano-banana-pro): normalize script imports

* test(nano-banana-pro): normalize test imports

* ci: use published bun release tag

* ci: skip prod audit on PRs without dependency changes

* test(nano-banana-pro): remove pillow dependency from skill tests

* docs(changelog): credit nano-banana input image follow-up
This commit is contained in:
Vincent Koc
2026-03-06 01:02:23 -05:00
committed by GitHub
parent 7187bfd84b
commit 74e0729631
6 changed files with 291 additions and 9 deletions

View File

@@ -61,7 +61,7 @@ runs:
if: inputs.install-bun == 'true' if: inputs.install-bun == 'true'
uses: oven-sh/setup-bun@v2 uses: oven-sh/setup-bun@v2
with: with:
bun-version: "1.3.9+cf6cdbbba" bun-version: "1.3.9"
- name: Runtime versions - name: Runtime versions
shell: bash shell: bash

View File

@@ -327,7 +327,26 @@ jobs:
pre-commit run zizmor --files "${workflow_files[@]}" pre-commit run zizmor --files "${workflow_files[@]}"
- name: Audit production dependencies - name: Audit production dependencies
run: pre-commit run --all-files pnpm-audit-prod run: |
set -euo pipefail
if [ "${{ github.event_name }}" = "push" ]; then
pre-commit run --all-files pnpm-audit-prod
exit 0
fi
if [ "${{ github.event_name }}" != "pull_request" ]; then
pre-commit run --all-files pnpm-audit-prod
exit 0
fi
BASE="${{ github.event.pull_request.base.sha }}"
if ! git diff --name-only "$BASE" HEAD | grep -Eq '(^|/)package\.json$|^pnpm-lock\.yaml$|^pnpm-workspace\.yaml$'; then
echo "No dependency manifest changes detected; skipping pnpm audit on this PR."
exit 0
fi
pre-commit run --all-files pnpm-audit-prod
checks-windows: checks-windows:
needs: [docs-scope, changed-scope] needs: [docs-scope, changed-scope]

View File

@@ -14,6 +14,7 @@ Docs: https://docs.openclaw.ai
- Telegram/topic agent routing: support per-topic `agentId` overrides in forum groups and DM topics so topics can route to dedicated agents with isolated sessions. (#33647; based on #31513) Thanks @kesor and @Sid-Qin. - Telegram/topic agent routing: support per-topic `agentId` overrides in forum groups and DM topics so topics can route to dedicated agents with isolated sessions. (#33647; based on #31513) Thanks @kesor and @Sid-Qin.
- Slack/DM typing feedback: add `channels.slack.typingReaction` so Socket Mode DMs can show reaction-based processing status even when Slack native assistant typing is unavailable. (#19816) Thanks @dalefrieswthat. - Slack/DM typing feedback: add `channels.slack.typingReaction` so Socket Mode DMs can show reaction-based processing status even when Slack native assistant typing is unavailable. (#19816) Thanks @dalefrieswthat.
- Exec/process interactive recovery: add `process attach` plus input-wait metadata/hints (`waitingForInput`, `idleMs`, `stdinWritable`) so long-running interactive sessions can be observed and resumed without losing context. Fixes #33957. Thanks @westoque. - Exec/process interactive recovery: add `process attach` plus input-wait metadata/hints (`waitingForInput`, `idleMs`, `stdinWritable`) so long-running interactive sessions can be observed and resumed without losing context. Fixes #33957. Thanks @westoque.
- Skills/nano-banana-pro: accept public `http(s)` input images for edit/composition while keeping local path support, and return explicit errors for redirects, `file://`, and private-network URLs. Fixes #33960. Thanks @westoque and @vincentkoc.
### Fixes ### Fixes

View File

@@ -39,6 +39,12 @@ Edit (single image)
uv run {baseDir}/scripts/generate_image.py --prompt "edit instructions" --filename "output.png" -i "/path/in.png" --resolution 2K uv run {baseDir}/scripts/generate_image.py --prompt "edit instructions" --filename "output.png" -i "/path/in.png" --resolution 2K
``` ```
Edit from a hosted image URL
```bash
uv run {baseDir}/scripts/generate_image.py --prompt "turn this into a watercolor poster" --filename "output.png" -i "https://images.example.com/source.png" --resolution 2K
```
Multi-image composition (up to 14 images) Multi-image composition (up to 14 images)
```bash ```bash
@@ -53,6 +59,9 @@ API key
Notes Notes
- Resolutions: `1K` (default), `2K`, `4K`. - Resolutions: `1K` (default), `2K`, `4K`.
- Input images can be local paths or public `http(s)` URLs.
- `file://` URLs are rejected; use a normal local path instead.
- Remote input URLs reject redirects plus private/loopback/special-use hosts for safety.
- Use timestamps in filenames: `yyyy-mm-dd-hh-mm-ss-name.png`. - Use timestamps in filenames: `yyyy-mm-dd-hh-mm-ss-name.png`.
- The script prints a `MEDIA:` line for OpenClaw to auto-attach on supported chat providers. - The script prints a `MEDIA:` line for OpenClaw to auto-attach on supported chat providers.
- Do not read the image back; report the saved path only. - Do not read the image back; report the saved path only.

View File

@@ -17,9 +17,22 @@ Multi-image editing (up to 14 images):
""" """
import argparse import argparse
import ipaddress
import os import os
import re
import socket
import sys import sys
from io import BytesIO
from pathlib import Path from pathlib import Path
from urllib import error, parse, request
MAX_REMOTE_IMAGE_BYTES = 20 * 1024 * 1024
REMOTE_IMAGE_TIMEOUT_SEC = 20
class NoRedirectHandler(request.HTTPRedirectHandler):
def redirect_request(self, req, fp, code, msg, headers, newurl):
return None
def get_api_key(provided_key: str | None) -> str | None: def get_api_key(provided_key: str | None) -> str | None:
@@ -29,6 +42,127 @@ def get_api_key(provided_key: str | None) -> str | None:
return os.environ.get("GEMINI_API_KEY") return os.environ.get("GEMINI_API_KEY")
def is_remote_image_url(image_source: str) -> bool:
parsed = parse.urlparse(image_source)
return parsed.scheme.lower() in {"http", "https"}
def _looks_like_windows_drive_path(image_source: str) -> bool:
return bool(re.match(r"^[a-zA-Z]:[\\/]", image_source))
def _is_blocked_remote_ip(address: str) -> bool:
ip = ipaddress.ip_address(address)
return (
ip.is_private
or ip.is_loopback
or ip.is_link_local
or ip.is_multicast
or ip.is_reserved
or ip.is_unspecified
)
def validate_remote_image_url(image_url: str) -> parse.ParseResult:
parsed = parse.urlparse(image_url)
scheme = parsed.scheme.lower()
if scheme not in {"http", "https"}:
if scheme == "file":
raise ValueError(
f"Unsupported input image URL '{image_url}'. "
"Use a local path instead of file:// URLs."
)
raise ValueError(
f"Unsupported input image URL '{image_url}'. Only public http(s) URLs are supported."
)
if not parsed.hostname:
raise ValueError(f"Invalid input image URL '{image_url}': hostname is required.")
if parsed.username or parsed.password:
raise ValueError(
f"Unsupported input image URL '{image_url}': embedded credentials are not allowed."
)
try:
resolved = socket.getaddrinfo(
parsed.hostname,
parsed.port or (443 if scheme == "https" else 80),
type=socket.SOCK_STREAM,
)
except socket.gaierror as exc:
raise ValueError(f"Could not resolve input image URL '{image_url}': {exc}.") from exc
blocked = sorted(
{
entry[4][0]
for entry in resolved
if entry[4] and entry[4][0] and _is_blocked_remote_ip(entry[4][0])
}
)
if blocked:
raise ValueError(
f"Unsafe input image URL '{image_url}': private, loopback, or "
f"special-use hosts are not allowed ({', '.join(blocked)})."
)
return parsed
def load_input_image(image_source: str, pil_image_module):
if is_remote_image_url(image_source):
validate_remote_image_url(image_source)
opener = request.build_opener(NoRedirectHandler())
req = request.Request(
image_source,
headers={"User-Agent": "OpenClaw nano-banana-pro/1.0"},
)
try:
with opener.open(req, timeout=REMOTE_IMAGE_TIMEOUT_SEC) as response:
redirected_to = response.geturl()
if redirected_to != image_source:
raise ValueError(
"Redirected input image URLs are not supported for safety. "
f"Re-run with the final asset URL: {redirected_to}"
)
image_bytes = response.read(MAX_REMOTE_IMAGE_BYTES + 1)
except error.HTTPError as exc:
if 300 <= exc.code < 400:
location = exc.headers.get("Location")
detail = f" Redirect target: {location}" if location else ""
raise ValueError(
f"Redirected input image URLs are not supported for safety.{detail}"
) from exc
raise ValueError(
f"Error downloading input image '{image_source}': HTTP {exc.code}."
) from exc
except error.URLError as exc:
raise ValueError(
f"Error downloading input image '{image_source}': {exc.reason}."
) from exc
if len(image_bytes) > MAX_REMOTE_IMAGE_BYTES:
raise ValueError(
f"Input image URL '{image_source}' exceeded the "
f"{MAX_REMOTE_IMAGE_BYTES // (1024 * 1024)} MB download limit."
)
with pil_image_module.open(BytesIO(image_bytes)) as img:
return img.copy()
parsed = parse.urlparse(image_source)
if parsed.scheme and not _looks_like_windows_drive_path(image_source):
if parsed.scheme.lower() == "file":
raise ValueError(
f"Unsupported input image URL '{image_source}'. "
"Use a local path instead of file:// URLs."
)
raise ValueError(
f"Unsupported input image source '{image_source}'. "
"Use a local path or a public http(s) URL."
)
local_path = Path(image_source).expanduser()
with pil_image_module.open(local_path) as img:
return img.copy()
def main(): def main():
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
description="Generate images using Nano Banana Pro (Gemini 3 Pro Image)" description="Generate images using Nano Banana Pro (Gemini 3 Pro Image)"
@@ -48,7 +182,10 @@ def main():
action="append", action="append",
dest="input_images", dest="input_images",
metavar="IMAGE", metavar="IMAGE",
help="Input image path(s) for editing/composition. Can be specified multiple times (up to 14 images)." help=(
"Input image path(s) for editing/composition. "
"Can be specified multiple times (up to 14 images)."
),
) )
parser.add_argument( parser.add_argument(
"--resolution", "-r", "--resolution", "-r",
@@ -89,15 +226,17 @@ def main():
output_resolution = args.resolution output_resolution = args.resolution
if args.input_images: if args.input_images:
if len(args.input_images) > 14: if len(args.input_images) > 14:
print(f"Error: Too many input images ({len(args.input_images)}). Maximum is 14.", file=sys.stderr) print(
f"Error: Too many input images ({len(args.input_images)}). Maximum is 14.",
file=sys.stderr,
)
sys.exit(1) sys.exit(1)
max_input_dim = 0 max_input_dim = 0
for img_path in args.input_images: for img_path in args.input_images:
try: try:
with PILImage.open(img_path) as img: copied = load_input_image(img_path, PILImage)
copied = img.copy() width, height = copied.size
width, height = copied.size
input_images.append(copied) input_images.append(copied)
print(f"Loaded input image: {img_path}") print(f"Loaded input image: {img_path}")
@@ -115,13 +254,19 @@ def main():
output_resolution = "2K" output_resolution = "2K"
else: else:
output_resolution = "1K" output_resolution = "1K"
print(f"Auto-detected resolution: {output_resolution} (from max input dimension {max_input_dim})") print(
f"Auto-detected resolution: {output_resolution} "
f"(from max input dimension {max_input_dim})"
)
# Build contents (images first if editing, prompt only if generating) # Build contents (images first if editing, prompt only if generating)
if input_images: if input_images:
contents = [*input_images, args.prompt] contents = [*input_images, args.prompt]
img_count = len(input_images) img_count = len(input_images)
print(f"Processing {img_count} image{'s' if img_count > 1 else ''} with resolution {output_resolution}...") print(
f"Processing {img_count} image{'s' if img_count > 1 else ''} "
f"with resolution {output_resolution}..."
)
else: else:
contents = args.prompt contents = args.prompt
print(f"Generating image with resolution {output_resolution}...") print(f"Generating image with resolution {output_resolution}...")

View File

@@ -0,0 +1,108 @@
import tempfile
import unittest
from pathlib import Path
from unittest.mock import patch
import generate_image
class FakeResponse:
def __init__(self, payload: bytes, url: str):
self._payload = payload
self._url = url
def geturl(self):
return self._url
def read(self, _limit: int):
return self._payload
def __enter__(self):
return self
def __exit__(self, exc_type, exc, tb):
return False
class FakeImage:
def __init__(self, size):
self.size = size
def copy(self):
return FakeImage(self.size)
def __enter__(self):
return self
def __exit__(self, exc_type, exc, tb):
return False
class FakePILImageModule:
def __init__(self, sizes_by_source):
self._sizes_by_source = sizes_by_source
def open(self, source):
if isinstance(source, (str, Path)):
key = source
else:
key = type(source).__name__
size = self._sizes_by_source[key]
return FakeImage(size)
class LoadInputImageTests(unittest.TestCase):
def test_load_input_image_accepts_local_path(self):
with tempfile.TemporaryDirectory() as tmpdir:
image_path = Path(tmpdir) / "input.png"
image_path.write_bytes(b"not-a-real-image")
fake_pil = FakePILImageModule({image_path: (16, 12)})
loaded = generate_image.load_input_image(str(image_path), fake_pil)
self.assertEqual(loaded.size, (16, 12))
def test_load_input_image_accepts_public_https_url(self):
fake_opener = type(
"FakeOpener",
(),
{
"open": lambda self, req, timeout=0: FakeResponse(
b"fake-image-bytes",
req.full_url,
)
},
)()
fake_pil = FakePILImageModule({"BytesIO": (20, 10)})
with patch.object(
generate_image.socket,
"getaddrinfo",
return_value=[(None, None, None, None, ("93.184.216.34", 443))],
), patch.object(generate_image.request, "build_opener", return_value=fake_opener):
loaded = generate_image.load_input_image("https://example.com/input.png", fake_pil)
self.assertEqual(loaded.size, (20, 10))
def test_load_input_image_rejects_private_network_url(self):
with patch.object(
generate_image.socket,
"getaddrinfo",
return_value=[(None, None, None, None, ("127.0.0.1", 443))],
):
with self.assertRaisesRegex(ValueError, "private, loopback, or special-use hosts"):
generate_image.load_input_image(
"https://localhost/input.png",
FakePILImageModule({}),
)
def test_load_input_image_rejects_file_url(self):
with self.assertRaisesRegex(ValueError, "Use a local path instead of file:// URLs"):
generate_image.load_input_image(
"file:///tmp/input.png",
FakePILImageModule({}),
)
if __name__ == "__main__":
unittest.main()