fix(media): surface vision pipeline diagnostics

* fix: improve error message in optimizeImageToJpeg to include actual error details

* fix: improve error message to include configured input for Model does not support images

* fix(media): surface vision pipeline diagnostics

---------

Co-authored-by: Peter Steinberger <steipete@gmail.com>
This commit is contained in:
辉哥
2026-04-30 03:13:19 +08:00
committed by GitHub
parent 945c910f20
commit 4eb30fc13a
5 changed files with 57 additions and 4 deletions

View File

@@ -23,6 +23,7 @@ Docs: https://docs.openclaw.ai
### Fixes
- Media: include redacted per-attempt resize failures and resolved model input capabilities in vision-pipeline errors so ARM64 image failures are diagnosable without closing the remaining routing investigation. Refs #74552. Thanks @1yihui.
- Auto-reply: honor explicit `silentReply.direct: "allow"` for clean empty or reasoning-only direct chat turns while keeping the default direct-chat empty-response guard conservative. Fixes #74409. Thanks @jesuskannolis.
- OpenAI Codex: send a non-empty Responses input item when a Codex turn only has systemPrompt-backed instructions, avoiding ChatGPT backend 400s from `input: []`. Fixes #73820. Thanks @woodhouse-bot.
- Ollama: normalize provider-prefixed tool-call names at the native stream boundary so Kimi/Ollama calls such as `functions.exec` dispatch as `exec` instead of missing configured tools. Fixes #74487. Thanks @afurm and @carreipeia.

View File

@@ -296,6 +296,35 @@ describe("describeImageWithModel", () => {
expect(completeMock).toHaveBeenCalledOnce();
});
it("reports the resolved model input when an image model is text-only", async () => {
discoverModelsMock.mockReturnValue({
find: vi.fn(() => ({
provider: "lmstudio",
id: "text-only",
api: "openai-completions",
input: ["text"],
baseUrl: "http://127.0.0.1:1234",
})),
});
await expect(
describeImageWithModel({
cfg: {},
agentDir: "/tmp/openclaw-agent",
provider: "lmstudio",
model: "text-only",
buffer: Buffer.from("png-bytes"),
fileName: "image.png",
mime: "image/png",
prompt: "Describe the image.",
timeoutMs: 1000,
}),
).rejects.toThrow(
"Model does not support images: lmstudio/text-only (resolved lmstudio/text-only input: text)",
);
expect(completeMock).not.toHaveBeenCalled();
});
it("passes image prompt as system instructions for codex image requests", async () => {
discoverModelsMock.mockReturnValue({
find: vi.fn(() => ({

View File

@@ -64,6 +64,10 @@ function isNativeResponsesReasoningPayload(model: Model<Api>): boolean {
}).usesKnownNativeOpenAIRoute;
}
function formatModelInputCapabilities(input: Model<Api>["input"] | undefined): string {
return input && input.length > 0 ? input.join(", ") : "none";
}
function removeReasoningInclude(value: unknown): unknown {
if (!Array.isArray(value)) {
return value;
@@ -192,7 +196,10 @@ async function resolveImageRuntime(params: {
if (isMinimaxVlmModel(resolvedRef.provider, resolvedRef.model)) {
throw new Error(`Unknown model: ${resolvedRef.provider}/${resolvedRef.model}`);
}
throw new Error(`Model does not support images: ${params.provider}/${params.model}`);
throw new Error(
`Model does not support images: ${params.provider}/${params.model} ` +
`(resolved ${model.provider}/${model.id} input: ${formatModelInputCapabilities(model.input)})`,
);
}
const apiKeyInfo = await getApiKeyForModel({
model,

View File

@@ -7,6 +7,7 @@ import { resolveStateDir } from "../config/paths.js";
import { resolvePreferredOpenClawTmpDir } from "../infra/tmp-openclaw-dir.js";
let loadWebMedia: typeof import("./web-media.js").loadWebMedia;
let optimizeImageToJpeg: typeof import("./web-media.js").optimizeImageToJpeg;
const TINY_PNG_BASE64 =
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/woAAn8B9FD5fHAAAAAASUVORK5CYII=";
@@ -19,7 +20,7 @@ let workspaceDir = "";
let workspacePngFile = "";
beforeAll(async () => {
({ loadWebMedia } = await import("./web-media.js"));
({ loadWebMedia, optimizeImageToJpeg } = await import("./web-media.js"));
fixtureRoot = await fs.mkdtemp(path.join(resolvePreferredOpenClawTmpDir(), "web-media-core-"));
tinyPngFile = path.join(fixtureRoot, "tiny.png");
await fs.writeFile(tinyPngFile, Buffer.from(TINY_PNG_BASE64, "base64"));
@@ -156,6 +157,12 @@ describe("loadWebMedia", () => {
expect(result.buffer.length).toBeGreaterThan(0);
});
it("includes resize failure details when image optimization cannot produce a JPEG", async () => {
await expect(optimizeImageToJpeg(Buffer.from("not an image"), 8)).rejects.toThrow(
/Failed to optimize image: .+/,
);
});
it("resolves relative local media paths against the provided workspace directory", async () => {
const result = await loadWebMedia("chart.png", {
maxBytes: 1024 * 1024,

View File

@@ -1,6 +1,7 @@
import path from "node:path";
import { resolveCanvasHttpPathToLocalPath } from "../gateway/canvas-documents.js";
import { logVerbose, shouldLogVerbose } from "../globals.js";
import { formatErrorMessage } from "../infra/errors.js";
import { SafeOpenError, readLocalFileSafely } from "../infra/fs-safe.js";
import { assertNoWindowsNetworkPath, safeFileURLToPath } from "../infra/local-file-access.js";
import type { PinnedDispatcherPolicy, SsrFPolicy } from "../infra/net/ssrf.js";
@@ -616,6 +617,8 @@ export async function optimizeImageToJpeg(
resizeSide: number;
quality: number;
} | null = null;
let firstResizeError: unknown;
const errors: string[] = [];
for (const side of sides) {
for (const quality of qualities) {
@@ -638,7 +641,12 @@ export async function optimizeImageToJpeg(
quality,
};
}
} catch {
} catch (err) {
firstResizeError ??= err;
const message = formatErrorMessage(err).trim();
if (message && !errors.includes(message)) {
errors.push(message);
}
// Continue trying other size/quality combinations
}
}
@@ -653,7 +661,8 @@ export async function optimizeImageToJpeg(
};
}
throw new Error("Failed to optimize image");
const detail = errors.length > 0 ? `: ${errors.slice(0, 3).join("; ")}` : "";
throw new Error(`Failed to optimize image${detail}`, { cause: firstResizeError });
}
export { optimizeImageToPng };