mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 18:30:44 +00:00
fix(models): support minimax-portal coding plan vlm routing for image tool (openclaw#33953)
Verified: - pnpm install --frozen-lockfile - pnpm build - pnpm check - pnpm test:macmini Co-authored-by: tars90percent <252094836+tars90percent@users.noreply.github.com>
This commit is contained in:
@@ -134,6 +134,7 @@ Docs: https://docs.openclaw.ai
|
|||||||
- Routing/legacy route guard tightening: require legacy session-key channel hints to match the saved delivery channel before inheriting external routing metadata, preventing custom namespaced keys like `agent:<agent>:work:<ticket>` from inheriting stale non-webchat routes.
|
- Routing/legacy route guard tightening: require legacy session-key channel hints to match the saved delivery channel before inheriting external routing metadata, preventing custom namespaced keys like `agent:<agent>:work:<ticket>` from inheriting stale non-webchat routes.
|
||||||
- Gateway/internal client routing continuity: prevent webchat/TUI/UI turns from inheriting stale external reply routes by requiring explicit `deliver: true` for external delivery, keeping main-session external inheritance scoped to non-Webchat/UI clients, and honoring configured `session.mainKey` when identifying main-session continuity. (from #35321, #34635, #35356) Thanks @alexyyyander and @Octane0411.
|
- Gateway/internal client routing continuity: prevent webchat/TUI/UI turns from inheriting stale external reply routes by requiring explicit `deliver: true` for external delivery, keeping main-session external inheritance scoped to non-Webchat/UI clients, and honoring configured `session.mainKey` when identifying main-session continuity. (from #35321, #34635, #35356) Thanks @alexyyyander and @Octane0411.
|
||||||
- Security/auth labels: remove token and API-key snippets from user-facing auth status labels so `/status` and `/models` do not expose credential fragments. (#33262) thanks @cu1ch3n.
|
- Security/auth labels: remove token and API-key snippets from user-facing auth status labels so `/status` and `/models` do not expose credential fragments. (#33262) thanks @cu1ch3n.
|
||||||
|
- Models/MiniMax portal vision routing: add `MiniMax-VL-01` to the `minimax-portal` provider, route portal image understanding through the MiniMax VLM endpoint, and align media auto-selection plus Telegram sticker description with the shared portal image provider path. (#33953) Thanks @tars90percent.
|
||||||
- Auth/credential semantics: align profile eligibility + probe diagnostics with SecretRef/expiry rules and harden browser download atomic writes. (#33733) thanks @joshavant.
|
- Auth/credential semantics: align profile eligibility + probe diagnostics with SecretRef/expiry rules and harden browser download atomic writes. (#33733) thanks @joshavant.
|
||||||
- Security/audit denyCommands guidance: suggest likely exact node command IDs for unknown `gateway.nodes.denyCommands` entries so ineffective denylist entries are easier to correct. (#29713) thanks @liquidhorizon88-bot.
|
- Security/audit denyCommands guidance: suggest likely exact node command IDs for unknown `gateway.nodes.denyCommands` entries so ineffective denylist entries are easier to correct. (#29713) thanks @liquidhorizon88-bot.
|
||||||
- Agents/overload failover handling: classify overloaded provider failures separately from rate limits/status timeouts, add short overload backoff before retry/failover, record overloaded prompt/assistant failures as transient auth-profile cooldowns (with probeable same-provider fallback) instead of treating them like persistent auth/billing failures, and keep one-shot cron retry classification aligned so overloaded fallback summaries still count as transient retries.
|
- Agents/overload failover handling: classify overloaded provider failures separately from rate limits/status timeouts, add short overload backoff before retry/failover, record overloaded prompt/assistant failures as transient auth-profile cooldowns (with probeable same-provider fallback) instead of treating them like persistent auth/billing failures, and keep one-shot cron retry classification aligned so overloaded fallback summaries still count as transient retries.
|
||||||
|
|||||||
@@ -45,3 +45,14 @@ describe("minimaxUnderstandImage apiKey normalization", () => {
|
|||||||
await runNormalizationCase("minimax-\u0417\u2502test-key");
|
await runNormalizationCase("minimax-\u0417\u2502test-key");
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe("isMinimaxVlmModel", () => {
|
||||||
|
it("only matches the canonical MiniMax VLM model id", async () => {
|
||||||
|
const { isMinimaxVlmModel } = await import("./minimax-vlm.js");
|
||||||
|
|
||||||
|
expect(isMinimaxVlmModel("minimax", "MiniMax-VL-01")).toBe(true);
|
||||||
|
expect(isMinimaxVlmModel("minimax-portal", "MiniMax-VL-01")).toBe(true);
|
||||||
|
expect(isMinimaxVlmModel("minimax-portal", "custom-vision")).toBe(false);
|
||||||
|
expect(isMinimaxVlmModel("openai", "MiniMax-VL-01")).toBe(false);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|||||||
@@ -6,6 +6,14 @@ type MinimaxBaseResp = {
|
|||||||
status_msg?: string;
|
status_msg?: string;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
export function isMinimaxVlmProvider(provider: string): boolean {
|
||||||
|
return provider === "minimax" || provider === "minimax-portal";
|
||||||
|
}
|
||||||
|
|
||||||
|
export function isMinimaxVlmModel(provider: string, modelId: string): boolean {
|
||||||
|
return isMinimaxVlmProvider(provider) && modelId.trim() === "MiniMax-VL-01";
|
||||||
|
}
|
||||||
|
|
||||||
function coerceApiHost(params: {
|
function coerceApiHost(params: {
|
||||||
apiHost?: string;
|
apiHost?: string;
|
||||||
modelBaseUrl?: string;
|
modelBaseUrl?: string;
|
||||||
|
|||||||
@@ -71,10 +71,9 @@ describe("MiniMax implicit provider (#15275)", () => {
|
|||||||
"minimax-portal:default": {
|
"minimax-portal:default": {
|
||||||
type: "oauth",
|
type: "oauth",
|
||||||
provider: "minimax-portal",
|
provider: "minimax-portal",
|
||||||
oauth: {
|
access: "token",
|
||||||
access: "token",
|
refresh: "refresh-token",
|
||||||
expires: Date.now() + 60_000,
|
expires: Date.now() + 60_000,
|
||||||
},
|
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@@ -87,6 +86,18 @@ describe("MiniMax implicit provider (#15275)", () => {
|
|||||||
const providers = await resolveImplicitProviders({ agentDir });
|
const providers = await resolveImplicitProviders({ agentDir });
|
||||||
expect(providers?.["minimax-portal"]?.authHeader).toBe(true);
|
expect(providers?.["minimax-portal"]?.authHeader).toBe(true);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("should include minimax portal provider when MINIMAX_OAUTH_TOKEN is configured", async () => {
|
||||||
|
const agentDir = mkdtempSync(join(tmpdir(), "openclaw-test-"));
|
||||||
|
await withEnvAsync({ MINIMAX_OAUTH_TOKEN: "portal-token" }, async () => {
|
||||||
|
const providers = await resolveImplicitProviders({ agentDir });
|
||||||
|
expect(providers?.["minimax-portal"]).toBeDefined();
|
||||||
|
expect(providers?.["minimax-portal"]?.authHeader).toBe(true);
|
||||||
|
expect(providers?.["minimax-portal"]?.models?.some((m) => m.id === "MiniMax-VL-01")).toBe(
|
||||||
|
true,
|
||||||
|
);
|
||||||
|
});
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
describe("vLLM provider", () => {
|
describe("vLLM provider", () => {
|
||||||
|
|||||||
@@ -771,6 +771,12 @@ function buildMinimaxPortalProvider(): ProviderConfig {
|
|||||||
api: "anthropic-messages",
|
api: "anthropic-messages",
|
||||||
authHeader: true,
|
authHeader: true,
|
||||||
models: [
|
models: [
|
||||||
|
buildMinimaxModel({
|
||||||
|
id: MINIMAX_DEFAULT_VISION_MODEL_ID,
|
||||||
|
name: "MiniMax VL 01",
|
||||||
|
reasoning: false,
|
||||||
|
input: ["text", "image"],
|
||||||
|
}),
|
||||||
buildMinimaxTextModel({
|
buildMinimaxTextModel({
|
||||||
id: MINIMAX_DEFAULT_MODEL_ID,
|
id: MINIMAX_DEFAULT_MODEL_ID,
|
||||||
name: "MiniMax M2.5",
|
name: "MiniMax M2.5",
|
||||||
@@ -1116,8 +1122,9 @@ export async function resolveImplicitProviders(params: {
|
|||||||
providers.minimax = { ...buildMinimaxProvider(), apiKey: minimaxKey };
|
providers.minimax = { ...buildMinimaxProvider(), apiKey: minimaxKey };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const minimaxPortalEnvKey = resolveEnvApiKeyVarName("minimax-portal");
|
||||||
const minimaxOauthProfile = listProfilesForProvider(authStore, "minimax-portal");
|
const minimaxOauthProfile = listProfilesForProvider(authStore, "minimax-portal");
|
||||||
if (minimaxOauthProfile.length > 0) {
|
if (minimaxPortalEnvKey || minimaxOauthProfile.length > 0) {
|
||||||
providers["minimax-portal"] = {
|
providers["minimax-portal"] = {
|
||||||
...buildMinimaxPortalProvider(),
|
...buildMinimaxPortalProvider(),
|
||||||
apiKey: MINIMAX_OAUTH_MARKER,
|
apiKey: MINIMAX_OAUTH_MARKER,
|
||||||
|
|||||||
@@ -273,6 +273,32 @@ describe("image tool implicit imageModel config", () => {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("pairs minimax-portal primary with MiniMax-VL-01 (and fallbacks) when auth exists", async () => {
|
||||||
|
await withTempAgentDir(async (agentDir) => {
|
||||||
|
await writeAuthProfiles(agentDir, {
|
||||||
|
version: 1,
|
||||||
|
profiles: {
|
||||||
|
"minimax-portal:default": {
|
||||||
|
type: "oauth",
|
||||||
|
provider: "minimax-portal",
|
||||||
|
access: "oauth-test",
|
||||||
|
refresh: "refresh-test",
|
||||||
|
expires: Date.now() + 60_000,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
});
|
||||||
|
vi.stubEnv("OPENAI_API_KEY", "openai-test");
|
||||||
|
vi.stubEnv("ANTHROPIC_API_KEY", "anthropic-test");
|
||||||
|
const cfg: OpenClawConfig = {
|
||||||
|
agents: { defaults: { model: { primary: "minimax-portal/MiniMax-M2.5" } } },
|
||||||
|
};
|
||||||
|
expect(resolveImageModelConfigForTool({ cfg, agentDir })).toEqual(
|
||||||
|
createDefaultImageFallbackExpectation("minimax-portal/MiniMax-VL-01"),
|
||||||
|
);
|
||||||
|
expect(createImageTool({ config: cfg, agentDir })).not.toBeNull();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
it("pairs zai primary with glm-4.6v (and fallbacks) when auth exists", async () => {
|
it("pairs zai primary with glm-4.6v (and fallbacks) when auth exists", async () => {
|
||||||
await withTempAgentDir(async (agentDir) => {
|
await withTempAgentDir(async (agentDir) => {
|
||||||
vi.stubEnv("ZAI_API_KEY", "zai-test");
|
vi.stubEnv("ZAI_API_KEY", "zai-test");
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ import { Type } from "@sinclair/typebox";
|
|||||||
import type { OpenClawConfig } from "../../config/config.js";
|
import type { OpenClawConfig } from "../../config/config.js";
|
||||||
import { resolveUserPath } from "../../utils.js";
|
import { resolveUserPath } from "../../utils.js";
|
||||||
import { loadWebMedia } from "../../web/media.js";
|
import { loadWebMedia } from "../../web/media.js";
|
||||||
import { minimaxUnderstandImage } from "../minimax-vlm.js";
|
import { isMinimaxVlmModel, isMinimaxVlmProvider, minimaxUnderstandImage } from "../minimax-vlm.js";
|
||||||
import {
|
import {
|
||||||
coerceImageAssistantText,
|
coerceImageAssistantText,
|
||||||
coerceImageModelConfig,
|
coerceImageModelConfig,
|
||||||
@@ -110,8 +110,8 @@ export function resolveImageModelConfigForTool(params: {
|
|||||||
let preferred: string | null = null;
|
let preferred: string | null = null;
|
||||||
|
|
||||||
// MiniMax users: always try the canonical vision model first when auth exists.
|
// MiniMax users: always try the canonical vision model first when auth exists.
|
||||||
if (primary.provider === "minimax" && providerOk) {
|
if (isMinimaxVlmProvider(primary.provider) && providerOk) {
|
||||||
preferred = "minimax/MiniMax-VL-01";
|
preferred = `${primary.provider}/MiniMax-VL-01`;
|
||||||
} else if (providerOk && providerVisionFromConfig) {
|
} else if (providerOk && providerVisionFromConfig) {
|
||||||
preferred = providerVisionFromConfig;
|
preferred = providerVisionFromConfig;
|
||||||
} else if (primary.provider === "zai" && providerOk) {
|
} else if (primary.provider === "zai" && providerOk) {
|
||||||
@@ -229,7 +229,7 @@ async function runImagePrompt(params: {
|
|||||||
});
|
});
|
||||||
|
|
||||||
// MiniMax VLM only supports a single image; use the first one.
|
// MiniMax VLM only supports a single image; use the first one.
|
||||||
if (model.provider === "minimax") {
|
if (isMinimaxVlmModel(model.provider, model.id)) {
|
||||||
const first = params.images[0];
|
const first = params.images[0];
|
||||||
const imageDataUrl = `data:${first.mimeType};base64,${first.base64}`;
|
const imageDataUrl = `data:${first.mimeType};base64,${first.base64}`;
|
||||||
const text = await minimaxUnderstandImage({
|
const text = await minimaxUnderstandImage({
|
||||||
|
|||||||
@@ -1,8 +1,10 @@
|
|||||||
import { describe, expect, it } from "vitest";
|
import { describe, expect, it } from "vitest";
|
||||||
import {
|
import {
|
||||||
AUTO_AUDIO_KEY_PROVIDERS,
|
AUTO_AUDIO_KEY_PROVIDERS,
|
||||||
|
AUTO_IMAGE_KEY_PROVIDERS,
|
||||||
AUTO_VIDEO_KEY_PROVIDERS,
|
AUTO_VIDEO_KEY_PROVIDERS,
|
||||||
DEFAULT_AUDIO_MODELS,
|
DEFAULT_AUDIO_MODELS,
|
||||||
|
DEFAULT_IMAGE_MODELS,
|
||||||
} from "./defaults.js";
|
} from "./defaults.js";
|
||||||
|
|
||||||
describe("DEFAULT_AUDIO_MODELS", () => {
|
describe("DEFAULT_AUDIO_MODELS", () => {
|
||||||
@@ -22,3 +24,15 @@ describe("AUTO_VIDEO_KEY_PROVIDERS", () => {
|
|||||||
expect(AUTO_VIDEO_KEY_PROVIDERS).toContain("moonshot");
|
expect(AUTO_VIDEO_KEY_PROVIDERS).toContain("moonshot");
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe("AUTO_IMAGE_KEY_PROVIDERS", () => {
|
||||||
|
it("includes minimax-portal auto key resolution", () => {
|
||||||
|
expect(AUTO_IMAGE_KEY_PROVIDERS).toContain("minimax-portal");
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe("DEFAULT_IMAGE_MODELS", () => {
|
||||||
|
it("includes the MiniMax portal vision default", () => {
|
||||||
|
expect(DEFAULT_IMAGE_MODELS["minimax-portal"]).toBe("MiniMax-VL-01");
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|||||||
@@ -46,6 +46,7 @@ export const AUTO_IMAGE_KEY_PROVIDERS = [
|
|||||||
"anthropic",
|
"anthropic",
|
||||||
"google",
|
"google",
|
||||||
"minimax",
|
"minimax",
|
||||||
|
"minimax-portal",
|
||||||
"zai",
|
"zai",
|
||||||
] as const;
|
] as const;
|
||||||
export const AUTO_VIDEO_KEY_PROVIDERS = ["google", "moonshot"] as const;
|
export const AUTO_VIDEO_KEY_PROVIDERS = ["google", "moonshot"] as const;
|
||||||
@@ -54,6 +55,7 @@ export const DEFAULT_IMAGE_MODELS: Record<string, string> = {
|
|||||||
anthropic: "claude-opus-4-6",
|
anthropic: "claude-opus-4-6",
|
||||||
google: "gemini-3-flash-preview",
|
google: "gemini-3-flash-preview",
|
||||||
minimax: "MiniMax-VL-01",
|
minimax: "MiniMax-VL-01",
|
||||||
|
"minimax-portal": "MiniMax-VL-01",
|
||||||
zai: "glm-4.6v",
|
zai: "glm-4.6v",
|
||||||
};
|
};
|
||||||
export const CLI_OUTPUT_MAX_BUFFER = 5 * MB;
|
export const CLI_OUTPUT_MAX_BUFFER = 5 * MB;
|
||||||
|
|||||||
133
src/media-understanding/providers/image.test.ts
Normal file
133
src/media-understanding/providers/image.test.ts
Normal file
@@ -0,0 +1,133 @@
|
|||||||
|
import { beforeEach, describe, expect, it, vi } from "vitest";
|
||||||
|
|
||||||
|
const completeMock = vi.fn();
|
||||||
|
const minimaxUnderstandImageMock = vi.fn();
|
||||||
|
const ensureOpenClawModelsJsonMock = vi.fn(async () => {});
|
||||||
|
const getApiKeyForModelMock = vi.fn(async () => ({
|
||||||
|
apiKey: "oauth-test",
|
||||||
|
source: "test",
|
||||||
|
mode: "oauth",
|
||||||
|
}));
|
||||||
|
const requireApiKeyMock = vi.fn((auth: { apiKey?: string }) => auth.apiKey ?? "");
|
||||||
|
const setRuntimeApiKeyMock = vi.fn();
|
||||||
|
const discoverModelsMock = vi.fn();
|
||||||
|
|
||||||
|
vi.mock("@mariozechner/pi-ai", async (importOriginal) => {
|
||||||
|
const actual = await importOriginal<typeof import("@mariozechner/pi-ai")>();
|
||||||
|
return {
|
||||||
|
...actual,
|
||||||
|
complete: completeMock,
|
||||||
|
};
|
||||||
|
});
|
||||||
|
|
||||||
|
vi.mock("../../agents/minimax-vlm.js", () => ({
|
||||||
|
isMinimaxVlmProvider: (provider: string) =>
|
||||||
|
provider === "minimax" || provider === "minimax-portal",
|
||||||
|
isMinimaxVlmModel: (provider: string, modelId: string) =>
|
||||||
|
(provider === "minimax" || provider === "minimax-portal") && modelId === "MiniMax-VL-01",
|
||||||
|
minimaxUnderstandImage: minimaxUnderstandImageMock,
|
||||||
|
}));
|
||||||
|
|
||||||
|
vi.mock("../../agents/models-config.js", () => ({
|
||||||
|
ensureOpenClawModelsJson: ensureOpenClawModelsJsonMock,
|
||||||
|
}));
|
||||||
|
|
||||||
|
vi.mock("../../agents/model-auth.js", () => ({
|
||||||
|
getApiKeyForModel: getApiKeyForModelMock,
|
||||||
|
requireApiKey: requireApiKeyMock,
|
||||||
|
}));
|
||||||
|
|
||||||
|
vi.mock("../../agents/pi-model-discovery-runtime.js", () => ({
|
||||||
|
discoverAuthStorage: () => ({
|
||||||
|
setRuntimeApiKey: setRuntimeApiKeyMock,
|
||||||
|
}),
|
||||||
|
discoverModels: discoverModelsMock,
|
||||||
|
}));
|
||||||
|
|
||||||
|
describe("describeImageWithModel", () => {
|
||||||
|
beforeEach(() => {
|
||||||
|
vi.clearAllMocks();
|
||||||
|
minimaxUnderstandImageMock.mockResolvedValue("portal ok");
|
||||||
|
discoverModelsMock.mockReturnValue({
|
||||||
|
find: vi.fn(() => ({
|
||||||
|
provider: "minimax-portal",
|
||||||
|
id: "MiniMax-VL-01",
|
||||||
|
input: ["text", "image"],
|
||||||
|
baseUrl: "https://api.minimax.io/anthropic",
|
||||||
|
})),
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it("routes minimax-portal image models through the MiniMax VLM endpoint", async () => {
|
||||||
|
const { describeImageWithModel } = await import("./image.js");
|
||||||
|
|
||||||
|
const result = await describeImageWithModel({
|
||||||
|
cfg: {},
|
||||||
|
agentDir: "/tmp/openclaw-agent",
|
||||||
|
provider: "minimax-portal",
|
||||||
|
model: "MiniMax-VL-01",
|
||||||
|
buffer: Buffer.from("png-bytes"),
|
||||||
|
fileName: "image.png",
|
||||||
|
mime: "image/png",
|
||||||
|
prompt: "Describe the image.",
|
||||||
|
timeoutMs: 1000,
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(result).toEqual({
|
||||||
|
text: "portal ok",
|
||||||
|
model: "MiniMax-VL-01",
|
||||||
|
});
|
||||||
|
expect(ensureOpenClawModelsJsonMock).toHaveBeenCalled();
|
||||||
|
expect(getApiKeyForModelMock).toHaveBeenCalled();
|
||||||
|
expect(requireApiKeyMock).toHaveBeenCalled();
|
||||||
|
expect(setRuntimeApiKeyMock).toHaveBeenCalledWith("minimax-portal", "oauth-test");
|
||||||
|
expect(minimaxUnderstandImageMock).toHaveBeenCalledWith({
|
||||||
|
apiKey: "oauth-test",
|
||||||
|
prompt: "Describe the image.",
|
||||||
|
imageDataUrl: `data:image/png;base64,${Buffer.from("png-bytes").toString("base64")}`,
|
||||||
|
modelBaseUrl: "https://api.minimax.io/anthropic",
|
||||||
|
});
|
||||||
|
expect(completeMock).not.toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("uses generic completion for non-canonical minimax-portal image models", async () => {
|
||||||
|
discoverModelsMock.mockReturnValue({
|
||||||
|
find: vi.fn(() => ({
|
||||||
|
provider: "minimax-portal",
|
||||||
|
id: "custom-vision",
|
||||||
|
input: ["text", "image"],
|
||||||
|
baseUrl: "https://api.minimax.io/anthropic",
|
||||||
|
})),
|
||||||
|
});
|
||||||
|
completeMock.mockResolvedValue({
|
||||||
|
role: "assistant",
|
||||||
|
api: "anthropic-messages",
|
||||||
|
provider: "minimax-portal",
|
||||||
|
model: "custom-vision",
|
||||||
|
stopReason: "stop",
|
||||||
|
timestamp: Date.now(),
|
||||||
|
content: [{ type: "text", text: "generic ok" }],
|
||||||
|
});
|
||||||
|
|
||||||
|
const { describeImageWithModel } = await import("./image.js");
|
||||||
|
|
||||||
|
const result = await describeImageWithModel({
|
||||||
|
cfg: {},
|
||||||
|
agentDir: "/tmp/openclaw-agent",
|
||||||
|
provider: "minimax-portal",
|
||||||
|
model: "custom-vision",
|
||||||
|
buffer: Buffer.from("png-bytes"),
|
||||||
|
fileName: "image.png",
|
||||||
|
mime: "image/png",
|
||||||
|
prompt: "Describe the image.",
|
||||||
|
timeoutMs: 1000,
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(result).toEqual({
|
||||||
|
text: "generic ok",
|
||||||
|
model: "custom-vision",
|
||||||
|
});
|
||||||
|
expect(completeMock).toHaveBeenCalledOnce();
|
||||||
|
expect(minimaxUnderstandImageMock).not.toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
});
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
import type { Api, Context, Model } from "@mariozechner/pi-ai";
|
import type { Api, Context, Model } from "@mariozechner/pi-ai";
|
||||||
import { complete } from "@mariozechner/pi-ai";
|
import { complete } from "@mariozechner/pi-ai";
|
||||||
import { minimaxUnderstandImage } from "../../agents/minimax-vlm.js";
|
import { isMinimaxVlmModel, minimaxUnderstandImage } from "../../agents/minimax-vlm.js";
|
||||||
import { getApiKeyForModel, requireApiKey } from "../../agents/model-auth.js";
|
import { getApiKeyForModel, requireApiKey } from "../../agents/model-auth.js";
|
||||||
import { ensureOpenClawModelsJson } from "../../agents/models-config.js";
|
import { ensureOpenClawModelsJson } from "../../agents/models-config.js";
|
||||||
import { coerceImageAssistantText } from "../../agents/tools/image-tool.helpers.js";
|
import { coerceImageAssistantText } from "../../agents/tools/image-tool.helpers.js";
|
||||||
@@ -40,7 +40,7 @@ export async function describeImageWithModel(
|
|||||||
authStorage.setRuntimeApiKey(model.provider, apiKey);
|
authStorage.setRuntimeApiKey(model.provider, apiKey);
|
||||||
|
|
||||||
const base64 = params.buffer.toString("base64");
|
const base64 = params.buffer.toString("base64");
|
||||||
if (model.provider === "minimax") {
|
if (isMinimaxVlmModel(model.provider, model.id)) {
|
||||||
const text = await minimaxUnderstandImage({
|
const text = await minimaxUnderstandImage({
|
||||||
apiKey,
|
apiKey,
|
||||||
prompt: params.prompt ?? "Describe the image.",
|
prompt: params.prompt ?? "Describe the image.",
|
||||||
|
|||||||
@@ -24,4 +24,12 @@ describe("media-understanding provider registry", () => {
|
|||||||
expect(provider?.id).toBe("moonshot");
|
expect(provider?.id).toBe("moonshot");
|
||||||
expect(provider?.capabilities).toEqual(["image", "video"]);
|
expect(provider?.capabilities).toEqual(["image", "video"]);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("registers the minimax portal provider", () => {
|
||||||
|
const registry = buildMediaUnderstandingRegistry();
|
||||||
|
const provider = getMediaUnderstandingProvider("minimax-portal", registry);
|
||||||
|
|
||||||
|
expect(provider?.id).toBe("minimax-portal");
|
||||||
|
expect(provider?.capabilities).toEqual(["image"]);
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ import { anthropicProvider } from "./anthropic/index.js";
|
|||||||
import { deepgramProvider } from "./deepgram/index.js";
|
import { deepgramProvider } from "./deepgram/index.js";
|
||||||
import { googleProvider } from "./google/index.js";
|
import { googleProvider } from "./google/index.js";
|
||||||
import { groqProvider } from "./groq/index.js";
|
import { groqProvider } from "./groq/index.js";
|
||||||
import { minimaxProvider } from "./minimax/index.js";
|
import { minimaxPortalProvider, minimaxProvider } from "./minimax/index.js";
|
||||||
import { mistralProvider } from "./mistral/index.js";
|
import { mistralProvider } from "./mistral/index.js";
|
||||||
import { moonshotProvider } from "./moonshot/index.js";
|
import { moonshotProvider } from "./moonshot/index.js";
|
||||||
import { openaiProvider } from "./openai/index.js";
|
import { openaiProvider } from "./openai/index.js";
|
||||||
@@ -16,6 +16,7 @@ const PROVIDERS: MediaUnderstandingProvider[] = [
|
|||||||
googleProvider,
|
googleProvider,
|
||||||
anthropicProvider,
|
anthropicProvider,
|
||||||
minimaxProvider,
|
minimaxProvider,
|
||||||
|
minimaxPortalProvider,
|
||||||
moonshotProvider,
|
moonshotProvider,
|
||||||
mistralProvider,
|
mistralProvider,
|
||||||
zaiProvider,
|
zaiProvider,
|
||||||
|
|||||||
@@ -6,3 +6,9 @@ export const minimaxProvider: MediaUnderstandingProvider = {
|
|||||||
capabilities: ["image"],
|
capabilities: ["image"],
|
||||||
describeImage: describeImageWithModel,
|
describeImage: describeImageWithModel,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
export const minimaxPortalProvider: MediaUnderstandingProvider = {
|
||||||
|
id: "minimax-portal",
|
||||||
|
capabilities: ["image"],
|
||||||
|
describeImage: describeImageWithModel,
|
||||||
|
};
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ import type { OpenClawConfig } from "../config/config.js";
|
|||||||
import { STATE_DIR } from "../config/paths.js";
|
import { STATE_DIR } from "../config/paths.js";
|
||||||
import { logVerbose } from "../globals.js";
|
import { logVerbose } from "../globals.js";
|
||||||
import { loadJsonFile, saveJsonFile } from "../infra/json-file.js";
|
import { loadJsonFile, saveJsonFile } from "../infra/json-file.js";
|
||||||
|
import { AUTO_IMAGE_KEY_PROVIDERS, DEFAULT_IMAGE_MODELS } from "../media-understanding/defaults.js";
|
||||||
import { resolveAutoImageModel } from "../media-understanding/runner.js";
|
import { resolveAutoImageModel } from "../media-understanding/runner.js";
|
||||||
|
|
||||||
const CACHE_FILE = path.join(STATE_DIR, "telegram", "sticker-cache.json");
|
const CACHE_FILE = path.join(STATE_DIR, "telegram", "sticker-cache.json");
|
||||||
@@ -142,7 +143,6 @@ export function getCacheStats(): { count: number; oldestAt?: string; newestAt?:
|
|||||||
|
|
||||||
const STICKER_DESCRIPTION_PROMPT =
|
const STICKER_DESCRIPTION_PROMPT =
|
||||||
"Describe this sticker image in 1-2 sentences. Focus on what the sticker depicts (character, object, action, emotion). Be concise and objective.";
|
"Describe this sticker image in 1-2 sentences. Focus on what the sticker depicts (character, object, action, emotion). Be concise and objective.";
|
||||||
const VISION_PROVIDERS = ["openai", "anthropic", "google", "minimax"] as const;
|
|
||||||
let imageRuntimePromise: Promise<
|
let imageRuntimePromise: Promise<
|
||||||
typeof import("../media-understanding/providers/image-runtime.js")
|
typeof import("../media-understanding/providers/image-runtime.js")
|
||||||
> | null = null;
|
> | null = null;
|
||||||
@@ -198,14 +198,7 @@ export async function describeStickerImage(params: DescribeStickerParams): Promi
|
|||||||
if (entries.length === 0) {
|
if (entries.length === 0) {
|
||||||
return undefined;
|
return undefined;
|
||||||
}
|
}
|
||||||
const defaultId =
|
const defaultId = DEFAULT_IMAGE_MODELS[provider];
|
||||||
provider === "openai"
|
|
||||||
? "gpt-5-mini"
|
|
||||||
: provider === "anthropic"
|
|
||||||
? "claude-opus-4-6"
|
|
||||||
: provider === "google"
|
|
||||||
? "gemini-3-flash-preview"
|
|
||||||
: "MiniMax-VL-01";
|
|
||||||
const preferred = entries.find((entry) => entry.id === defaultId);
|
const preferred = entries.find((entry) => entry.id === defaultId);
|
||||||
return preferred ?? entries[0];
|
return preferred ?? entries[0];
|
||||||
};
|
};
|
||||||
@@ -213,14 +206,16 @@ export async function describeStickerImage(params: DescribeStickerParams): Promi
|
|||||||
let resolved = null as { provider: string; model?: string } | null;
|
let resolved = null as { provider: string; model?: string } | null;
|
||||||
if (
|
if (
|
||||||
activeModel &&
|
activeModel &&
|
||||||
VISION_PROVIDERS.includes(activeModel.provider as (typeof VISION_PROVIDERS)[number]) &&
|
AUTO_IMAGE_KEY_PROVIDERS.includes(
|
||||||
|
activeModel.provider as (typeof AUTO_IMAGE_KEY_PROVIDERS)[number],
|
||||||
|
) &&
|
||||||
(await hasProviderKey(activeModel.provider))
|
(await hasProviderKey(activeModel.provider))
|
||||||
) {
|
) {
|
||||||
resolved = activeModel;
|
resolved = activeModel;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!resolved) {
|
if (!resolved) {
|
||||||
for (const provider of VISION_PROVIDERS) {
|
for (const provider of AUTO_IMAGE_KEY_PROVIDERS) {
|
||||||
if (!(await hasProviderKey(provider))) {
|
if (!(await hasProviderKey(provider))) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user