fix: restore OpenRouter vision prompts

This commit is contained in:
Peter Steinberger
2026-04-24 00:42:23 +01:00
parent d16b879334
commit 178a314a4c
3 changed files with 77 additions and 7 deletions

View File

@@ -16,6 +16,7 @@ Docs: https://docs.openclaw.ai
### Fixes
- Agents/transport: stop embedded runs from lowering the process-wide undici stream timeouts, so slow Gemini image generation and other long-running provider requests no longer inherit short run-attempt headers timeouts. Fixes #70423. Thanks @giangthb.
- Providers/OpenRouter: send image-understanding prompts as user text before image parts, restoring non-empty vision responses for OpenRouter multimodal models. Fixes #70410.
- Memory/QMD: recreate stale managed QMD collections when startup repair finds the collection name already exists, so root memory narrows back to `MEMORY.md` instead of staying on broad workspace markdown indexing.
- Agents/OpenAI: surface selected-model capacity failures from PI, Codex, and auto-reply harness paths with a model-switch hint instead of the generic empty-response error. Thanks @vincentkoc.
- Providers/OpenAI: route `openai/gpt-image-2` through configured Codex OAuth directly when an `openai-codex` profile is active, instead of probing `OPENAI_API_KEY` first.

View File

@@ -249,6 +249,53 @@ describe("describeImageWithModel", () => {
expect(context?.messages?.[0]?.content).toHaveLength(1);
});
it("places OpenRouter image prompts in user content before images", async () => {
discoverModelsMock.mockReturnValue({
find: vi.fn(() => ({
api: "openai-completions",
provider: "openrouter",
id: "google/gemini-2.5-flash",
input: ["text", "image"],
baseUrl: "https://openrouter.ai/api/v1",
})),
});
completeMock.mockResolvedValue({
role: "assistant",
api: "openai-completions",
provider: "openrouter",
model: "google/gemini-2.5-flash",
stopReason: "stop",
timestamp: Date.now(),
content: [{ type: "text", text: "openrouter ok" }],
});
const result = await describeImageWithModel({
cfg: {},
agentDir: "/tmp/openclaw-agent",
provider: "openrouter",
model: "google/gemini-2.5-flash",
buffer: Buffer.from("png-bytes"),
fileName: "image.png",
mime: "image/png",
prompt: "Describe the image.",
timeoutMs: 1000,
});
expect(result).toEqual({
text: "openrouter ok",
model: "google/gemini-2.5-flash",
});
const [, context] = completeMock.mock.calls[0] ?? [];
expect(context?.systemPrompt).toBeUndefined();
expect(context?.messages?.[0]?.content).toEqual([
{ type: "text", text: "Describe the image." },
expect.objectContaining({
type: "image",
mimeType: "image/png",
}),
]);
});
it.each([
{
name: "direct OpenAI Responses baseUrl",

View File

@@ -132,23 +132,43 @@ async function resolveImageRuntime(params: {
function buildImageContext(
prompt: string,
images: Array<{ buffer: Buffer; mime?: string }>,
opts?: { promptInUserContent?: boolean },
): Context {
const imageContent = images.map((image) => ({
type: "image" as const,
data: image.buffer.toString("base64"),
mimeType: image.mime ?? "image/jpeg",
}));
const content = opts?.promptInUserContent
? [{ type: "text" as const, text: prompt }, ...imageContent]
: imageContent;
return {
systemPrompt: prompt,
...(opts?.promptInUserContent ? {} : { systemPrompt: prompt }),
messages: [
{
role: "user",
content: images.map((image) => ({
type: "image" as const,
data: image.buffer.toString("base64"),
mimeType: image.mime ?? "image/jpeg",
})),
content,
timestamp: Date.now(),
},
],
};
}
function shouldPlaceImagePromptInUserContent(model: Model<Api>): boolean {
const capabilities = resolveProviderRequestCapabilities({
provider: model.provider,
api: model.api,
baseUrl: model.baseUrl,
capability: "image",
transport: "media-understanding",
});
return (
capabilities.endpointClass === "openrouter" ||
(model.provider.toLowerCase() === "openrouter" && capabilities.endpointClass === "default")
);
}
async function describeImagesWithMinimax(params: {
apiKey: string;
modelId: string;
@@ -252,7 +272,9 @@ export async function describeImagesWithModel(
agentDir: params.agentDir,
});
const context = buildImageContext(prompt, params.images);
const context = buildImageContext(prompt, params.images, {
promptInUserContent: shouldPlaceImagePromptInUserContent(model),
});
const controller = new AbortController();
const timeout =
typeof params.timeoutMs === "number" &&