Files
openclaw/skills/openai-image-gen/scripts/gen.py
Mark Zhang 81f22ae109 openai-image-gen: validate and normalize --output-format (#36648)
* openai-image-gen: validate and normalize output format

* Skills/openai-image-gen: cover output-format edge cases

* Changelog: note openai image output format validation

---------

Co-authored-by: Vincent Koc <vincentkoc@ieee.org>
2026-03-06 11:14:30 -05:00

329 lines
11 KiB
Python

#!/usr/bin/env python3
import argparse
import base64
import datetime as dt
import json
import os
import random
import re
import sys
import urllib.error
import urllib.request
from collections.abc import Callable
from html import escape as html_escape
from pathlib import Path
def slugify(text: str) -> str:
text = text.lower().strip()
text = re.sub(r"[^a-z0-9]+", "-", text)
text = re.sub(r"-{2,}", "-", text).strip("-")
return text or "image"
def default_out_dir() -> Path:
now = dt.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
preferred = Path.home() / "Projects" / "tmp"
base = preferred if preferred.is_dir() else Path("./tmp")
base.mkdir(parents=True, exist_ok=True)
return base / f"openai-image-gen-{now}"
def pick_prompts(count: int) -> list[str]:
subjects = [
"a lobster astronaut",
"a brutalist lighthouse",
"a cozy reading nook",
"a cyberpunk noodle shop",
"a Vienna street at dusk",
"a minimalist product photo",
"a surreal underwater library",
]
styles = [
"ultra-detailed studio photo",
"35mm film still",
"isometric illustration",
"editorial photography",
"soft watercolor",
"architectural render",
"high-contrast monochrome",
]
lighting = [
"golden hour",
"overcast soft light",
"neon lighting",
"dramatic rim light",
"candlelight",
"foggy atmosphere",
]
prompts: list[str] = []
for _ in range(count):
prompts.append(
f"{random.choice(styles)} of {random.choice(subjects)}, {random.choice(lighting)}"
)
return prompts
def get_model_defaults(model: str) -> tuple[str, str]:
"""Return (default_size, default_quality) for the given model."""
if model == "dall-e-2":
# quality will be ignored
return ("1024x1024", "standard")
elif model == "dall-e-3":
return ("1024x1024", "standard")
else:
# GPT image or future models
return ("1024x1024", "high")
def normalize_optional_flag(
*,
model: str,
raw_value: str,
flag_name: str,
supported: Callable[[str], bool],
allowed: set[str],
allowed_text: str,
unsupported_message: str,
aliases: dict[str, str] | None = None,
) -> str:
"""Normalize a string flag, warn when unsupported, and reject invalid values."""
value = raw_value.strip().lower()
if not value:
return ""
if not supported(model):
print(unsupported_message.format(model=model), file=sys.stderr)
return ""
if aliases:
value = aliases.get(value, value)
if value not in allowed:
raise ValueError(
f"Invalid --{flag_name} '{raw_value}'. Allowed values: {allowed_text}."
)
return value
def normalize_background(model: str, background: str) -> str:
"""Validate --background for GPT image models."""
return normalize_optional_flag(
model=model,
raw_value=background,
flag_name="background",
supported=lambda candidate: candidate.startswith("gpt-image"),
allowed={"transparent", "opaque", "auto"},
allowed_text="transparent, opaque, auto",
unsupported_message=(
"Warning: --background is only supported for gpt-image models; "
"ignoring for '{model}'."
),
)
def normalize_style(model: str, style: str) -> str:
"""Validate --style for dall-e-3."""
return normalize_optional_flag(
model=model,
raw_value=style,
flag_name="style",
supported=lambda candidate: candidate == "dall-e-3",
allowed={"vivid", "natural"},
allowed_text="vivid, natural",
unsupported_message=(
"Warning: --style is only supported for dall-e-3; ignoring for '{model}'."
),
)
def normalize_output_format(model: str, output_format: str) -> str:
"""Normalize output format for GPT image models and validate allowed values."""
return normalize_optional_flag(
model=model,
raw_value=output_format,
flag_name="output-format",
supported=lambda candidate: candidate.startswith("gpt-image"),
allowed={"png", "jpeg", "webp"},
allowed_text="png, jpeg, webp",
unsupported_message=(
"Warning: --output-format is only supported for gpt-image models; "
"ignoring for '{model}'."
),
aliases={"jpg": "jpeg"},
)
def request_images(
api_key: str,
prompt: str,
model: str,
size: str,
quality: str,
background: str = "",
output_format: str = "",
style: str = "",
) -> dict:
url = "https://api.openai.com/v1/images/generations"
args = {
"model": model,
"prompt": prompt,
"size": size,
"n": 1,
}
# Quality parameter - dall-e-2 doesn't accept this parameter
if model != "dall-e-2":
args["quality"] = quality
# Note: response_format no longer supported by OpenAI Images API
# dall-e models now return URLs by default
if model.startswith("gpt-image"):
if background:
args["background"] = background
if output_format:
args["output_format"] = output_format
if model == "dall-e-3" and style:
args["style"] = style
body = json.dumps(args).encode("utf-8")
req = urllib.request.Request(
url,
method="POST",
headers={
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json",
},
data=body,
)
try:
with urllib.request.urlopen(req, timeout=300) as resp:
return json.loads(resp.read().decode("utf-8"))
except urllib.error.HTTPError as e:
payload = e.read().decode("utf-8", errors="replace")
raise RuntimeError(f"OpenAI Images API failed ({e.code}): {payload}") from e
def write_gallery(out_dir: Path, items: list[dict]) -> None:
thumbs = "\n".join(
[
f"""
<figure>
<a href="{html_escape(it["file"], quote=True)}"><img src="{html_escape(it["file"], quote=True)}" loading="lazy" /></a>
<figcaption>{html_escape(it["prompt"])}</figcaption>
</figure>
""".strip()
for it in items
]
)
html = f"""<!doctype html>
<meta charset="utf-8" />
<title>openai-image-gen</title>
<style>
:root {{ color-scheme: dark; }}
body {{ margin: 24px; font: 14px/1.4 ui-sans-serif, system-ui; background: #0b0f14; color: #e8edf2; }}
h1 {{ font-size: 18px; margin: 0 0 16px; }}
.grid {{ display: grid; grid-template-columns: repeat(auto-fill, minmax(240px, 1fr)); gap: 16px; }}
figure {{ margin: 0; padding: 12px; border: 1px solid #1e2a36; border-radius: 14px; background: #0f1620; }}
img {{ width: 100%; height: auto; border-radius: 10px; display: block; }}
figcaption {{ margin-top: 10px; color: #b7c2cc; }}
code {{ color: #9cd1ff; }}
</style>
<h1>openai-image-gen</h1>
<p>Output: <code>{html_escape(out_dir.as_posix())}</code></p>
<div class="grid">
{thumbs}
</div>
"""
(out_dir / "index.html").write_text(html, encoding="utf-8")
def main() -> int:
ap = argparse.ArgumentParser(description="Generate images via OpenAI Images API.")
ap.add_argument("--prompt", help="Single prompt. If omitted, random prompts are generated.")
ap.add_argument("--count", type=int, default=8, help="How many images to generate.")
ap.add_argument("--model", default="gpt-image-1", help="Image model id.")
ap.add_argument("--size", default="", help="Image size (e.g. 1024x1024, 1536x1024). Defaults based on model if not specified.")
ap.add_argument("--quality", default="", help="Image quality (e.g. high, standard). Defaults based on model if not specified.")
ap.add_argument("--background", default="", help="Background transparency (GPT models only): transparent, opaque, or auto.")
ap.add_argument("--output-format", default="", help="Output format (GPT models only): png, jpeg, or webp.")
ap.add_argument("--style", default="", help="Image style (dall-e-3 only): vivid or natural.")
ap.add_argument("--out-dir", default="", help="Output directory (default: ./tmp/openai-image-gen-<ts>).")
args = ap.parse_args()
api_key = (os.environ.get("OPENAI_API_KEY") or "").strip()
if not api_key:
print("Missing OPENAI_API_KEY", file=sys.stderr)
return 2
# Apply model-specific defaults if not specified
default_size, default_quality = get_model_defaults(args.model)
size = args.size or default_size
quality = args.quality or default_quality
count = args.count
if args.model == "dall-e-3" and count > 1:
print(f"Warning: dall-e-3 only supports generating 1 image at a time. Reducing count from {count} to 1.", file=sys.stderr)
count = 1
out_dir = Path(args.out_dir).expanduser() if args.out_dir else default_out_dir()
out_dir.mkdir(parents=True, exist_ok=True)
prompts = [args.prompt] * count if args.prompt else pick_prompts(count)
try:
normalized_background = normalize_background(args.model, args.background)
normalized_style = normalize_style(args.model, args.style)
normalized_output_format = normalize_output_format(args.model, args.output_format)
except ValueError as e:
print(str(e), file=sys.stderr)
return 2
# Determine file extension based on output format
if args.model.startswith("gpt-image") and normalized_output_format:
file_ext = normalized_output_format
else:
file_ext = "png"
items: list[dict] = []
for idx, prompt in enumerate(prompts, start=1):
print(f"[{idx}/{len(prompts)}] {prompt}")
res = request_images(
api_key,
prompt,
args.model,
size,
quality,
normalized_background,
normalized_output_format,
normalized_style,
)
data = res.get("data", [{}])[0]
image_b64 = data.get("b64_json")
image_url = data.get("url")
if not image_b64 and not image_url:
raise RuntimeError(f"Unexpected response: {json.dumps(res)[:400]}")
filename = f"{idx:03d}-{slugify(prompt)[:40]}.{file_ext}"
filepath = out_dir / filename
if image_b64:
filepath.write_bytes(base64.b64decode(image_b64))
else:
try:
urllib.request.urlretrieve(image_url, filepath)
except urllib.error.URLError as e:
raise RuntimeError(f"Failed to download image from {image_url}: {e}") from e
items.append({"prompt": prompt, "file": filename})
(out_dir / "prompts.json").write_text(json.dumps(items, indent=2), encoding="utf-8")
write_gallery(out_dir, items)
print(f"\nWrote: {(out_dir / 'index.html').as_posix()}")
return 0
if __name__ == "__main__":
raise SystemExit(main())