fix(media): surface vision pipeline diagnostics

* fix: improve error message in optimizeImageToJpeg to include actual error details * fix: improve error message to include configured input for Model does not support images * fix(media): surface vision pipeline diagnostics --------- Co-authored-by: Peter Steinberger <steipete@gmail.com>
2026-05-06 06:40:44 +00:00 · 2026-04-30 03:13:19 +08:00
parent 945c910f20
commit 4eb30fc13a
5 changed files with 57 additions and 4 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -23,6 +23,7 @@ Docs: https://docs.openclaw.ai

 ### Fixes

+- Media: include redacted per-attempt resize failures and resolved model input capabilities in vision-pipeline errors so ARM64 image failures are diagnosable without closing the remaining routing investigation. Refs #74552. Thanks @1yihui.
 - Auto-reply: honor explicit `silentReply.direct: "allow"` for clean empty or reasoning-only direct chat turns while keeping the default direct-chat empty-response guard conservative. Fixes #74409. Thanks @jesuskannolis.
 - OpenAI Codex: send a non-empty Responses input item when a Codex turn only has systemPrompt-backed instructions, avoiding ChatGPT backend 400s from `input: []`. Fixes #73820. Thanks @woodhouse-bot.
 - Ollama: normalize provider-prefixed tool-call names at the native stream boundary so Kimi/Ollama calls such as `functions.exec` dispatch as `exec` instead of missing configured tools. Fixes #74487. Thanks @afurm and @carreipeia.
--- a/src/media-understanding/image.test.ts
+++ b/src/media-understanding/image.test.ts
@@ -296,6 +296,35 @@ describe("describeImageWithModel", () => {
    expect(completeMock).toHaveBeenCalledOnce();
  });

+  it("reports the resolved model input when an image model is text-only", async () => {
+    discoverModelsMock.mockReturnValue({
+      find: vi.fn(() => ({
+        provider: "lmstudio",
+        id: "text-only",
+        api: "openai-completions",
+        input: ["text"],
+        baseUrl: "http://127.0.0.1:1234",
+      })),
+    });
+
+    await expect(
+      describeImageWithModel({
+        cfg: {},
+        agentDir: "/tmp/openclaw-agent",
+        provider: "lmstudio",
+        model: "text-only",
+        buffer: Buffer.from("png-bytes"),
+        fileName: "image.png",
+        mime: "image/png",
+        prompt: "Describe the image.",
+        timeoutMs: 1000,
+      }),
+    ).rejects.toThrow(
+      "Model does not support images: lmstudio/text-only (resolved lmstudio/text-only input: text)",
+    );
+    expect(completeMock).not.toHaveBeenCalled();
+  });
+
  it("passes image prompt as system instructions for codex image requests", async () => {
    discoverModelsMock.mockReturnValue({
      find: vi.fn(() => ({
--- a/src/media-understanding/image.ts
+++ b/src/media-understanding/image.ts
@@ -64,6 +64,10 @@ function isNativeResponsesReasoningPayload(model: Model<Api>): boolean {
  }).usesKnownNativeOpenAIRoute;
 }

+function formatModelInputCapabilities(input: Model<Api>["input"] | undefined): string {
+  return input && input.length > 0 ? input.join(", ") : "none";
+}
+
 function removeReasoningInclude(value: unknown): unknown {
  if (!Array.isArray(value)) {
    return value;
@@ -192,7 +196,10 @@ async function resolveImageRuntime(params: {
    if (isMinimaxVlmModel(resolvedRef.provider, resolvedRef.model)) {
      throw new Error(`Unknown model: ${resolvedRef.provider}/${resolvedRef.model}`);
    }
-    throw new Error(`Model does not support images: ${params.provider}/${params.model}`);
+    throw new Error(
+      `Model does not support images: ${params.provider}/${params.model} ` +
+        `(resolved ${model.provider}/${model.id} input: ${formatModelInputCapabilities(model.input)})`,
+    );
  }
  const apiKeyInfo = await getApiKeyForModel({
    model,
--- a/src/media/web-media.test.ts
+++ b/src/media/web-media.test.ts
@@ -7,6 +7,7 @@ import { resolveStateDir } from "../config/paths.js";
 import { resolvePreferredOpenClawTmpDir } from "../infra/tmp-openclaw-dir.js";

 let loadWebMedia: typeof import("./web-media.js").loadWebMedia;
+let optimizeImageToJpeg: typeof import("./web-media.js").optimizeImageToJpeg;

 const TINY_PNG_BASE64 =
  "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/woAAn8B9FD5fHAAAAAASUVORK5CYII=";
@@ -19,7 +20,7 @@ let workspaceDir = "";
 let workspacePngFile = "";

 beforeAll(async () => {
-  ({ loadWebMedia } = await import("./web-media.js"));
+  ({ loadWebMedia, optimizeImageToJpeg } = await import("./web-media.js"));
  fixtureRoot = await fs.mkdtemp(path.join(resolvePreferredOpenClawTmpDir(), "web-media-core-"));
  tinyPngFile = path.join(fixtureRoot, "tiny.png");
  await fs.writeFile(tinyPngFile, Buffer.from(TINY_PNG_BASE64, "base64"));
@@ -156,6 +157,12 @@ describe("loadWebMedia", () => {
    expect(result.buffer.length).toBeGreaterThan(0);
  });

+  it("includes resize failure details when image optimization cannot produce a JPEG", async () => {
+    await expect(optimizeImageToJpeg(Buffer.from("not an image"), 8)).rejects.toThrow(
+      /Failed to optimize image: .+/,
+    );
+  });
+
  it("resolves relative local media paths against the provided workspace directory", async () => {
    const result = await loadWebMedia("chart.png", {
      maxBytes: 1024 * 1024,
--- a/src/media/web-media.ts
+++ b/src/media/web-media.ts
@@ -1,6 +1,7 @@
 import path from "node:path";
 import { resolveCanvasHttpPathToLocalPath } from "../gateway/canvas-documents.js";
 import { logVerbose, shouldLogVerbose } from "../globals.js";
+import { formatErrorMessage } from "../infra/errors.js";
 import { SafeOpenError, readLocalFileSafely } from "../infra/fs-safe.js";
 import { assertNoWindowsNetworkPath, safeFileURLToPath } from "../infra/local-file-access.js";
 import type { PinnedDispatcherPolicy, SsrFPolicy } from "../infra/net/ssrf.js";
@@ -616,6 +617,8 @@ export async function optimizeImageToJpeg(
    resizeSide: number;
    quality: number;
  } | null = null;
+  let firstResizeError: unknown;
+  const errors: string[] = [];

  for (const side of sides) {
    for (const quality of qualities) {
@@ -638,7 +641,12 @@ export async function optimizeImageToJpeg(
            quality,
          };
        }
-      } catch {
+      } catch (err) {
+        firstResizeError ??= err;
+        const message = formatErrorMessage(err).trim();
+        if (message && !errors.includes(message)) {
+          errors.push(message);
+        }
        // Continue trying other size/quality combinations
      }
    }
@@ -653,7 +661,8 @@ export async function optimizeImageToJpeg(
    };
  }

-  throw new Error("Failed to optimize image");
+  const detail = errors.length > 0 ? `: ${errors.slice(0, 3).join("; ")}` : "";
+  throw new Error(`Failed to optimize image${detail}`, { cause: firstResizeError });
 }

 export { optimizeImageToPng };