From 0889223a07df09bad03a4028bde459ad75ba20d2 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sat, 9 May 2026 06:11:28 +0100 Subject: [PATCH] fix: validate inline images against session agent model (#79416) --- CHANGELOG.md | 1 + src/gateway/server-methods/agent.ts | 3 +- ...erver.agent.gateway-server-agent-a.test.ts | 65 +++++++++++++++++++ src/gateway/test-helpers.runtime-state.ts | 1 + 4 files changed, 69 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a22731ff7fd..fb98895d1d6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -205,6 +205,7 @@ Docs: https://docs.openclaw.ai ### Fixes +- Gateway/agent: pass the session-key agent id into inline image attachment validation so the first image in a fresh per-agent session uses the agent's vision-capable model override instead of the text-only system default. Fixes #79407. Thanks @pandadev66. - Gateway/maintenance: prune dedupe overflow against a stable excess count and keep active agent retries from starting duplicate runs after cache eviction. (#73841) Thanks @thesomewhatyou. - Control UI/subagents: suppress internal `subagent_announce` handoff prompts from requester transcripts and hide legacy inter-session wrapper rows so completed subagent results no longer surface runtime context in WebChat history. (#79618) Thanks @joshavant. - Discord: preserve username target resolution for Discord outbound sends. (#79076) Thanks @vincentkoc. diff --git a/src/gateway/server-methods/agent.ts b/src/gateway/server-methods/agent.ts index cd95b83b038..e910c143606 100644 --- a/src/gateway/server-methods/agent.ts +++ b/src/gateway/server-methods/agent.ts @@ -649,7 +649,8 @@ export const agentHandlers: GatewayRequestHandlers = { let baseModel: string | undefined; if (requestedSessionKeyRaw) { const { cfg: sessCfg, entry: sessEntry } = loadSessionEntry(requestedSessionKeyRaw); - const modelRef = resolveSessionModelRef(sessCfg, sessEntry, undefined); + const sessionAgentId = resolveAgentIdFromSessionKey(requestedSessionKeyRaw); + const modelRef = resolveSessionModelRef(sessCfg, sessEntry, sessionAgentId); baseProvider = modelRef.provider; baseModel = modelRef.model; } diff --git a/src/gateway/server.agent.gateway-server-agent-a.test.ts b/src/gateway/server.agent.gateway-server-agent-a.test.ts index ce311de6c34..676f486d79f 100644 --- a/src/gateway/server.agent.gateway-server-agent-a.test.ts +++ b/src/gateway/server.agent.gateway-server-agent-a.test.ts @@ -5,12 +5,14 @@ import { afterAll, afterEach, beforeAll, beforeEach, describe, expect, test, vi import type { ChannelPlugin } from "../channels/plugins/types.js"; import { createChannelTestPluginBase } from "../test-utils/channel-plugins.js"; import { waitForAgentCommandCall } from "./agent-command.test-helpers.js"; +import { __resetModelCatalogCacheForTest as resetGatewayModelCatalogCacheForTest } from "./server-model-catalog.js"; import { setRegistry } from "./server.agent.gateway-server-agent.mocks.js"; import { createRegistry } from "./server.e2e-registry-helpers.js"; import { agentCommand, connectOk, installGatewayTestHooks, + piSdkMock, rpcReq, startServerWithClient, testState, @@ -440,6 +442,69 @@ describe("gateway server agent", () => { }); }); + test("agent validates first image attachment against per-agent model for fresh sessions", async () => { + testState.agentConfig = { model: { primary: "ollama-cloud/deepseek-v4-flash" } }; + testState.agentsConfig = { + list: [ + { id: "main", default: true }, + { id: "vision", model: "ollama-cloud/gemma4:31b" }, + ], + }; + piSdkMock.enabled = true; + piSdkMock.models = [ + { + id: "deepseek-v4-flash", + name: "DeepSeek V4 Flash", + provider: "ollama-cloud", + input: ["text"], + }, + { + id: "gemma4:31b", + name: "Gemma 4 31B", + provider: "ollama-cloud", + input: ["text", "image"], + }, + ]; + await resetGatewayModelCatalogCacheForTest(); + + await setTestSessionStore({ + agentId: "vision", + entries: { + main: { + sessionId: "sess-vision-fresh-image", + updatedAt: Date.now(), + }, + }, + }); + + const res = await rpcReq(ws, "agent", { + message: "what is in the image?", + sessionKey: "agent:vision:main", + attachments: [ + { + mimeType: "image/png", + fileName: "tiny.png", + content: BASE_IMAGE_PNG, + }, + ], + idempotencyKey: "idem-agent-vision-first-image", + }); + expect( + res, + `agent RPC should accept image using per-agent vision model: ${JSON.stringify(res)}`, + ).toMatchObject({ ok: true }); + + const call = await waitForAgentCommandCall("idem-agent-vision-first-image"); + expect(call.sessionKey).toBe("agent:vision:main"); + expect(call.images).toEqual([ + expect.objectContaining({ + type: "image", + mimeType: "image/png", + data: BASE_IMAGE_PNG, + }), + ]); + }); + test("agent errors when delivery requested and no last channel exists", async () => { testState.allowFrom = ["+1555"]; try { diff --git a/src/gateway/test-helpers.runtime-state.ts b/src/gateway/test-helpers.runtime-state.ts index be2aa4ab085..a0fb5eb4132 100644 --- a/src/gateway/test-helpers.runtime-state.ts +++ b/src/gateway/test-helpers.runtime-state.ts @@ -37,6 +37,7 @@ type GatewayTestHoistedState = { provider: string; contextWindow?: number; reasoning?: boolean; + input?: string[]; }>; }; cronIsolatedRun: Mock;