mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 10:20:42 +00:00
fix: restore OpenRouter vision prompts
This commit is contained in:
@@ -16,6 +16,7 @@ Docs: https://docs.openclaw.ai
|
||||
### Fixes
|
||||
|
||||
- Agents/transport: stop embedded runs from lowering the process-wide undici stream timeouts, so slow Gemini image generation and other long-running provider requests no longer inherit short run-attempt headers timeouts. Fixes #70423. Thanks @giangthb.
|
||||
- Providers/OpenRouter: send image-understanding prompts as user text before image parts, restoring non-empty vision responses for OpenRouter multimodal models. Fixes #70410.
|
||||
- Memory/QMD: recreate stale managed QMD collections when startup repair finds the collection name already exists, so root memory narrows back to `MEMORY.md` instead of staying on broad workspace markdown indexing.
|
||||
- Agents/OpenAI: surface selected-model capacity failures from PI, Codex, and auto-reply harness paths with a model-switch hint instead of the generic empty-response error. Thanks @vincentkoc.
|
||||
- Providers/OpenAI: route `openai/gpt-image-2` through configured Codex OAuth directly when an `openai-codex` profile is active, instead of probing `OPENAI_API_KEY` first.
|
||||
|
||||
@@ -249,6 +249,53 @@ describe("describeImageWithModel", () => {
|
||||
expect(context?.messages?.[0]?.content).toHaveLength(1);
|
||||
});
|
||||
|
||||
it("places OpenRouter image prompts in user content before images", async () => {
|
||||
discoverModelsMock.mockReturnValue({
|
||||
find: vi.fn(() => ({
|
||||
api: "openai-completions",
|
||||
provider: "openrouter",
|
||||
id: "google/gemini-2.5-flash",
|
||||
input: ["text", "image"],
|
||||
baseUrl: "https://openrouter.ai/api/v1",
|
||||
})),
|
||||
});
|
||||
completeMock.mockResolvedValue({
|
||||
role: "assistant",
|
||||
api: "openai-completions",
|
||||
provider: "openrouter",
|
||||
model: "google/gemini-2.5-flash",
|
||||
stopReason: "stop",
|
||||
timestamp: Date.now(),
|
||||
content: [{ type: "text", text: "openrouter ok" }],
|
||||
});
|
||||
|
||||
const result = await describeImageWithModel({
|
||||
cfg: {},
|
||||
agentDir: "/tmp/openclaw-agent",
|
||||
provider: "openrouter",
|
||||
model: "google/gemini-2.5-flash",
|
||||
buffer: Buffer.from("png-bytes"),
|
||||
fileName: "image.png",
|
||||
mime: "image/png",
|
||||
prompt: "Describe the image.",
|
||||
timeoutMs: 1000,
|
||||
});
|
||||
|
||||
expect(result).toEqual({
|
||||
text: "openrouter ok",
|
||||
model: "google/gemini-2.5-flash",
|
||||
});
|
||||
const [, context] = completeMock.mock.calls[0] ?? [];
|
||||
expect(context?.systemPrompt).toBeUndefined();
|
||||
expect(context?.messages?.[0]?.content).toEqual([
|
||||
{ type: "text", text: "Describe the image." },
|
||||
expect.objectContaining({
|
||||
type: "image",
|
||||
mimeType: "image/png",
|
||||
}),
|
||||
]);
|
||||
});
|
||||
|
||||
it.each([
|
||||
{
|
||||
name: "direct OpenAI Responses baseUrl",
|
||||
|
||||
@@ -132,23 +132,43 @@ async function resolveImageRuntime(params: {
|
||||
function buildImageContext(
|
||||
prompt: string,
|
||||
images: Array<{ buffer: Buffer; mime?: string }>,
|
||||
opts?: { promptInUserContent?: boolean },
|
||||
): Context {
|
||||
const imageContent = images.map((image) => ({
|
||||
type: "image" as const,
|
||||
data: image.buffer.toString("base64"),
|
||||
mimeType: image.mime ?? "image/jpeg",
|
||||
}));
|
||||
const content = opts?.promptInUserContent
|
||||
? [{ type: "text" as const, text: prompt }, ...imageContent]
|
||||
: imageContent;
|
||||
|
||||
return {
|
||||
systemPrompt: prompt,
|
||||
...(opts?.promptInUserContent ? {} : { systemPrompt: prompt }),
|
||||
messages: [
|
||||
{
|
||||
role: "user",
|
||||
content: images.map((image) => ({
|
||||
type: "image" as const,
|
||||
data: image.buffer.toString("base64"),
|
||||
mimeType: image.mime ?? "image/jpeg",
|
||||
})),
|
||||
content,
|
||||
timestamp: Date.now(),
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
|
||||
function shouldPlaceImagePromptInUserContent(model: Model<Api>): boolean {
|
||||
const capabilities = resolveProviderRequestCapabilities({
|
||||
provider: model.provider,
|
||||
api: model.api,
|
||||
baseUrl: model.baseUrl,
|
||||
capability: "image",
|
||||
transport: "media-understanding",
|
||||
});
|
||||
return (
|
||||
capabilities.endpointClass === "openrouter" ||
|
||||
(model.provider.toLowerCase() === "openrouter" && capabilities.endpointClass === "default")
|
||||
);
|
||||
}
|
||||
|
||||
async function describeImagesWithMinimax(params: {
|
||||
apiKey: string;
|
||||
modelId: string;
|
||||
@@ -252,7 +272,9 @@ export async function describeImagesWithModel(
|
||||
agentDir: params.agentDir,
|
||||
});
|
||||
|
||||
const context = buildImageContext(prompt, params.images);
|
||||
const context = buildImageContext(prompt, params.images, {
|
||||
promptInUserContent: shouldPlaceImagePromptInUserContent(model),
|
||||
});
|
||||
const controller = new AbortController();
|
||||
const timeout =
|
||||
typeof params.timeoutMs === "number" &&
|
||||
|
||||
Reference in New Issue
Block a user