fix: validate inline images against session agent model (#79416)

This commit is contained in:
Peter Steinberger
2026-05-09 06:11:28 +01:00
parent 3938328aa4
commit 0889223a07
4 changed files with 69 additions and 1 deletions

View File

@@ -205,6 +205,7 @@ Docs: https://docs.openclaw.ai
### Fixes
- Gateway/agent: pass the session-key agent id into inline image attachment validation so the first image in a fresh per-agent session uses the agent's vision-capable model override instead of the text-only system default. Fixes #79407. Thanks @pandadev66.
- Gateway/maintenance: prune dedupe overflow against a stable excess count and keep active agent retries from starting duplicate runs after cache eviction. (#73841) Thanks @thesomewhatyou.
- Control UI/subagents: suppress internal `subagent_announce` handoff prompts from requester transcripts and hide legacy inter-session wrapper rows so completed subagent results no longer surface runtime context in WebChat history. (#79618) Thanks @joshavant.
- Discord: preserve username target resolution for Discord outbound sends. (#79076) Thanks @vincentkoc.

View File

@@ -649,7 +649,8 @@ export const agentHandlers: GatewayRequestHandlers = {
let baseModel: string | undefined;
if (requestedSessionKeyRaw) {
const { cfg: sessCfg, entry: sessEntry } = loadSessionEntry(requestedSessionKeyRaw);
const modelRef = resolveSessionModelRef(sessCfg, sessEntry, undefined);
const sessionAgentId = resolveAgentIdFromSessionKey(requestedSessionKeyRaw);
const modelRef = resolveSessionModelRef(sessCfg, sessEntry, sessionAgentId);
baseProvider = modelRef.provider;
baseModel = modelRef.model;
}

View File

@@ -5,12 +5,14 @@ import { afterAll, afterEach, beforeAll, beforeEach, describe, expect, test, vi
import type { ChannelPlugin } from "../channels/plugins/types.js";
import { createChannelTestPluginBase } from "../test-utils/channel-plugins.js";
import { waitForAgentCommandCall } from "./agent-command.test-helpers.js";
import { __resetModelCatalogCacheForTest as resetGatewayModelCatalogCacheForTest } from "./server-model-catalog.js";
import { setRegistry } from "./server.agent.gateway-server-agent.mocks.js";
import { createRegistry } from "./server.e2e-registry-helpers.js";
import {
agentCommand,
connectOk,
installGatewayTestHooks,
piSdkMock,
rpcReq,
startServerWithClient,
testState,
@@ -440,6 +442,69 @@ describe("gateway server agent", () => {
});
});
test("agent validates first image attachment against per-agent model for fresh sessions", async () => {
testState.agentConfig = { model: { primary: "ollama-cloud/deepseek-v4-flash" } };
testState.agentsConfig = {
list: [
{ id: "main", default: true },
{ id: "vision", model: "ollama-cloud/gemma4:31b" },
],
};
piSdkMock.enabled = true;
piSdkMock.models = [
{
id: "deepseek-v4-flash",
name: "DeepSeek V4 Flash",
provider: "ollama-cloud",
input: ["text"],
},
{
id: "gemma4:31b",
name: "Gemma 4 31B",
provider: "ollama-cloud",
input: ["text", "image"],
},
];
await resetGatewayModelCatalogCacheForTest();
await setTestSessionStore({
agentId: "vision",
entries: {
main: {
sessionId: "sess-vision-fresh-image",
updatedAt: Date.now(),
},
},
});
const res = await rpcReq(ws, "agent", {
message: "what is in the image?",
sessionKey: "agent:vision:main",
attachments: [
{
mimeType: "image/png",
fileName: "tiny.png",
content: BASE_IMAGE_PNG,
},
],
idempotencyKey: "idem-agent-vision-first-image",
});
expect(
res,
`agent RPC should accept image using per-agent vision model: ${JSON.stringify(res)}`,
).toMatchObject({ ok: true });
const call = await waitForAgentCommandCall("idem-agent-vision-first-image");
expect(call.sessionKey).toBe("agent:vision:main");
expect(call.images).toEqual([
expect.objectContaining({
type: "image",
mimeType: "image/png",
data: BASE_IMAGE_PNG,
}),
]);
});
test("agent errors when delivery requested and no last channel exists", async () => {
testState.allowFrom = ["+1555"];
try {

View File

@@ -37,6 +37,7 @@ type GatewayTestHoistedState = {
provider: string;
contextWindow?: number;
reasoning?: boolean;
input?: string[];
}>;
};
cronIsolatedRun: Mock<CronIsolatedRunFn>;