Files
openclaw/extensions/chutes/models.ts
2026-03-27 23:47:04 +00:00

602 lines
16 KiB
TypeScript

import type { ModelDefinitionConfig } from "openclaw/plugin-sdk/provider-model-shared";
import { createSubsystemLogger } from "openclaw/plugin-sdk/runtime-env";
const log = createSubsystemLogger("chutes-models");
export const CHUTES_BASE_URL = "https://llm.chutes.ai/v1";
export const CHUTES_DEFAULT_MODEL_ID = "zai-org/GLM-4.7-TEE";
export const CHUTES_DEFAULT_MODEL_REF = `chutes/${CHUTES_DEFAULT_MODEL_ID}`;
const CHUTES_DEFAULT_CONTEXT_WINDOW = 128000;
const CHUTES_DEFAULT_MAX_TOKENS = 4096;
export const CHUTES_MODEL_CATALOG: ModelDefinitionConfig[] = [
{
id: "Qwen/Qwen3-32B",
name: "Qwen/Qwen3-32B",
reasoning: true,
input: ["text"],
contextWindow: 40960,
maxTokens: 40960,
cost: { input: 0.08, output: 0.24, cacheRead: 0, cacheWrite: 0 },
},
{
id: "unsloth/Mistral-Nemo-Instruct-2407",
name: "unsloth/Mistral-Nemo-Instruct-2407",
reasoning: false,
input: ["text"],
contextWindow: 131072,
maxTokens: 131072,
cost: { input: 0.02, output: 0.04, cacheRead: 0, cacheWrite: 0 },
},
{
id: "deepseek-ai/DeepSeek-V3-0324-TEE",
name: "deepseek-ai/DeepSeek-V3-0324-TEE",
reasoning: true,
input: ["text"],
contextWindow: 163840,
maxTokens: 65536,
cost: { input: 0.25, output: 1, cacheRead: 0, cacheWrite: 0 },
},
{
id: "Qwen/Qwen3-235B-A22B-Instruct-2507-TEE",
name: "Qwen/Qwen3-235B-A22B-Instruct-2507-TEE",
reasoning: true,
input: ["text"],
contextWindow: 262144,
maxTokens: 65536,
cost: { input: 0.08, output: 0.55, cacheRead: 0, cacheWrite: 0 },
},
{
id: "openai/gpt-oss-120b-TEE",
name: "openai/gpt-oss-120b-TEE",
reasoning: true,
input: ["text"],
contextWindow: 131072,
maxTokens: 65536,
cost: { input: 0.05, output: 0.45, cacheRead: 0, cacheWrite: 0 },
},
{
id: "chutesai/Mistral-Small-3.1-24B-Instruct-2503",
name: "chutesai/Mistral-Small-3.1-24B-Instruct-2503",
reasoning: false,
input: ["text", "image"],
contextWindow: 131072,
maxTokens: 131072,
cost: { input: 0.03, output: 0.11, cacheRead: 0, cacheWrite: 0 },
},
{
id: "deepseek-ai/DeepSeek-V3.2-TEE",
name: "deepseek-ai/DeepSeek-V3.2-TEE",
reasoning: true,
input: ["text"],
contextWindow: 131072,
maxTokens: 65536,
cost: { input: 0.28, output: 0.42, cacheRead: 0, cacheWrite: 0 },
},
{
id: "zai-org/GLM-4.7-TEE",
name: "zai-org/GLM-4.7-TEE",
reasoning: true,
input: ["text"],
contextWindow: 202752,
maxTokens: 65535,
cost: { input: 0.4, output: 2, cacheRead: 0, cacheWrite: 0 },
},
{
id: "moonshotai/Kimi-K2.5-TEE",
name: "moonshotai/Kimi-K2.5-TEE",
reasoning: true,
input: ["text", "image"],
contextWindow: 262144,
maxTokens: 65535,
cost: { input: 0.45, output: 2.2, cacheRead: 0, cacheWrite: 0 },
},
{
id: "unsloth/gemma-3-27b-it",
name: "unsloth/gemma-3-27b-it",
reasoning: false,
input: ["text", "image"],
contextWindow: 128000,
maxTokens: 65536,
cost: { input: 0.04, output: 0.15, cacheRead: 0, cacheWrite: 0 },
},
{
id: "XiaomiMiMo/MiMo-V2-Flash-TEE",
name: "XiaomiMiMo/MiMo-V2-Flash-TEE",
reasoning: true,
input: ["text"],
contextWindow: 262144,
maxTokens: 65536,
cost: { input: 0.09, output: 0.29, cacheRead: 0, cacheWrite: 0 },
},
{
id: "chutesai/Mistral-Small-3.2-24B-Instruct-2506",
name: "chutesai/Mistral-Small-3.2-24B-Instruct-2506",
reasoning: false,
input: ["text", "image"],
contextWindow: 131072,
maxTokens: 131072,
cost: { input: 0.06, output: 0.18, cacheRead: 0, cacheWrite: 0 },
},
{
id: "deepseek-ai/DeepSeek-R1-0528-TEE",
name: "deepseek-ai/DeepSeek-R1-0528-TEE",
reasoning: true,
input: ["text"],
contextWindow: 163840,
maxTokens: 65536,
cost: { input: 0.45, output: 2.15, cacheRead: 0, cacheWrite: 0 },
},
{
id: "zai-org/GLM-5-TEE",
name: "zai-org/GLM-5-TEE",
reasoning: true,
input: ["text"],
contextWindow: 202752,
maxTokens: 65535,
cost: { input: 0.95, output: 3.15, cacheRead: 0, cacheWrite: 0 },
},
{
id: "deepseek-ai/DeepSeek-V3.1-TEE",
name: "deepseek-ai/DeepSeek-V3.1-TEE",
reasoning: true,
input: ["text"],
contextWindow: 163840,
maxTokens: 65536,
cost: { input: 0.2, output: 0.8, cacheRead: 0, cacheWrite: 0 },
},
{
id: "deepseek-ai/DeepSeek-V3.1-Terminus-TEE",
name: "deepseek-ai/DeepSeek-V3.1-Terminus-TEE",
reasoning: true,
input: ["text"],
contextWindow: 163840,
maxTokens: 65536,
cost: { input: 0.23, output: 0.9, cacheRead: 0, cacheWrite: 0 },
},
{
id: "unsloth/gemma-3-4b-it",
name: "unsloth/gemma-3-4b-it",
reasoning: false,
input: ["text", "image"],
contextWindow: 96000,
maxTokens: 96000,
cost: { input: 0.01, output: 0.03, cacheRead: 0, cacheWrite: 0 },
},
{
id: "MiniMaxAI/MiniMax-M2.5-TEE",
name: "MiniMaxAI/MiniMax-M2.5-TEE",
reasoning: true,
input: ["text"],
contextWindow: 196608,
maxTokens: 65536,
cost: { input: 0.3, output: 1.1, cacheRead: 0, cacheWrite: 0 },
},
{
id: "tngtech/DeepSeek-TNG-R1T2-Chimera",
name: "tngtech/DeepSeek-TNG-R1T2-Chimera",
reasoning: true,
input: ["text"],
contextWindow: 163840,
maxTokens: 163840,
cost: { input: 0.25, output: 0.85, cacheRead: 0, cacheWrite: 0 },
},
{
id: "Qwen/Qwen3-Coder-Next-TEE",
name: "Qwen/Qwen3-Coder-Next-TEE",
reasoning: true,
input: ["text"],
contextWindow: 262144,
maxTokens: 65536,
cost: { input: 0.12, output: 0.75, cacheRead: 0, cacheWrite: 0 },
},
{
id: "NousResearch/Hermes-4-405B-FP8-TEE",
name: "NousResearch/Hermes-4-405B-FP8-TEE",
reasoning: true,
input: ["text"],
contextWindow: 131072,
maxTokens: 65536,
cost: { input: 0.3, output: 1.2, cacheRead: 0, cacheWrite: 0 },
},
{
id: "deepseek-ai/DeepSeek-V3",
name: "deepseek-ai/DeepSeek-V3",
reasoning: false,
input: ["text"],
contextWindow: 163840,
maxTokens: 163840,
cost: { input: 0.3, output: 1.2, cacheRead: 0, cacheWrite: 0 },
},
{
id: "openai/gpt-oss-20b",
name: "openai/gpt-oss-20b",
reasoning: true,
input: ["text"],
contextWindow: 131072,
maxTokens: 131072,
cost: { input: 0.04, output: 0.15, cacheRead: 0, cacheWrite: 0 },
},
{
id: "unsloth/Llama-3.2-3B-Instruct",
name: "unsloth/Llama-3.2-3B-Instruct",
reasoning: false,
input: ["text"],
contextWindow: 128000,
maxTokens: 4096,
cost: { input: 0.01, output: 0.01, cacheRead: 0, cacheWrite: 0 },
},
{
id: "unsloth/Mistral-Small-24B-Instruct-2501",
name: "unsloth/Mistral-Small-24B-Instruct-2501",
reasoning: false,
input: ["text", "image"],
contextWindow: 32768,
maxTokens: 32768,
cost: { input: 0.07, output: 0.3, cacheRead: 0, cacheWrite: 0 },
},
{
id: "zai-org/GLM-4.7-FP8",
name: "zai-org/GLM-4.7-FP8",
reasoning: true,
input: ["text"],
contextWindow: 202752,
maxTokens: 65535,
cost: { input: 0.3, output: 1.2, cacheRead: 0, cacheWrite: 0 },
},
{
id: "zai-org/GLM-4.6-TEE",
name: "zai-org/GLM-4.6-TEE",
reasoning: true,
input: ["text"],
contextWindow: 202752,
maxTokens: 65536,
cost: { input: 0.4, output: 1.7, cacheRead: 0, cacheWrite: 0 },
},
{
id: "Qwen/Qwen3.5-397B-A17B-TEE",
name: "Qwen/Qwen3.5-397B-A17B-TEE",
reasoning: true,
input: ["text", "image"],
contextWindow: 262144,
maxTokens: 65536,
cost: { input: 0.55, output: 3.5, cacheRead: 0, cacheWrite: 0 },
},
{
id: "Qwen/Qwen2.5-72B-Instruct",
name: "Qwen/Qwen2.5-72B-Instruct",
reasoning: false,
input: ["text"],
contextWindow: 32768,
maxTokens: 32768,
cost: { input: 0.3, output: 1.2, cacheRead: 0, cacheWrite: 0 },
},
{
id: "NousResearch/DeepHermes-3-Mistral-24B-Preview",
name: "NousResearch/DeepHermes-3-Mistral-24B-Preview",
reasoning: false,
input: ["text"],
contextWindow: 32768,
maxTokens: 32768,
cost: { input: 0.02, output: 0.1, cacheRead: 0, cacheWrite: 0 },
},
{
id: "Qwen/Qwen3-Next-80B-A3B-Instruct",
name: "Qwen/Qwen3-Next-80B-A3B-Instruct",
reasoning: false,
input: ["text"],
contextWindow: 262144,
maxTokens: 262144,
cost: { input: 0.1, output: 0.8, cacheRead: 0, cacheWrite: 0 },
},
{
id: "zai-org/GLM-4.6-FP8",
name: "zai-org/GLM-4.6-FP8",
reasoning: true,
input: ["text"],
contextWindow: 202752,
maxTokens: 65535,
cost: { input: 0.3, output: 1.2, cacheRead: 0, cacheWrite: 0 },
},
{
id: "Qwen/Qwen3-235B-A22B-Thinking-2507",
name: "Qwen/Qwen3-235B-A22B-Thinking-2507",
reasoning: true,
input: ["text"],
contextWindow: 262144,
maxTokens: 262144,
cost: { input: 0.11, output: 0.6, cacheRead: 0, cacheWrite: 0 },
},
{
id: "deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
name: "deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
reasoning: true,
input: ["text"],
contextWindow: 131072,
maxTokens: 131072,
cost: { input: 0.03, output: 0.11, cacheRead: 0, cacheWrite: 0 },
},
{
id: "tngtech/R1T2-Chimera-Speed",
name: "tngtech/R1T2-Chimera-Speed",
reasoning: true,
input: ["text"],
contextWindow: 131072,
maxTokens: 65536,
cost: { input: 0.22, output: 0.6, cacheRead: 0, cacheWrite: 0 },
},
{
id: "zai-org/GLM-4.6V",
name: "zai-org/GLM-4.6V",
reasoning: true,
input: ["text", "image"],
contextWindow: 131072,
maxTokens: 65536,
cost: { input: 0.3, output: 0.9, cacheRead: 0, cacheWrite: 0 },
},
{
id: "Qwen/Qwen2.5-VL-32B-Instruct",
name: "Qwen/Qwen2.5-VL-32B-Instruct",
reasoning: false,
input: ["text", "image"],
contextWindow: 16384,
maxTokens: 16384,
cost: { input: 0.05, output: 0.22, cacheRead: 0, cacheWrite: 0 },
},
{
id: "Qwen/Qwen3-VL-235B-A22B-Instruct",
name: "Qwen/Qwen3-VL-235B-A22B-Instruct",
reasoning: false,
input: ["text", "image"],
contextWindow: 262144,
maxTokens: 262144,
cost: { input: 0.3, output: 1.2, cacheRead: 0, cacheWrite: 0 },
},
{
id: "Qwen/Qwen3-14B",
name: "Qwen/Qwen3-14B",
reasoning: true,
input: ["text"],
contextWindow: 40960,
maxTokens: 40960,
cost: { input: 0.05, output: 0.22, cacheRead: 0, cacheWrite: 0 },
},
{
id: "Qwen/Qwen2.5-Coder-32B-Instruct",
name: "Qwen/Qwen2.5-Coder-32B-Instruct",
reasoning: false,
input: ["text"],
contextWindow: 32768,
maxTokens: 32768,
cost: { input: 0.03, output: 0.11, cacheRead: 0, cacheWrite: 0 },
},
{
id: "Qwen/Qwen3-30B-A3B",
name: "Qwen/Qwen3-30B-A3B",
reasoning: true,
input: ["text"],
contextWindow: 40960,
maxTokens: 40960,
cost: { input: 0.06, output: 0.22, cacheRead: 0, cacheWrite: 0 },
},
{
id: "unsloth/gemma-3-12b-it",
name: "unsloth/gemma-3-12b-it",
reasoning: false,
input: ["text", "image"],
contextWindow: 131072,
maxTokens: 131072,
cost: { input: 0.03, output: 0.1, cacheRead: 0, cacheWrite: 0 },
},
{
id: "unsloth/Llama-3.2-1B-Instruct",
name: "unsloth/Llama-3.2-1B-Instruct",
reasoning: false,
input: ["text"],
contextWindow: 128000,
maxTokens: 4096,
cost: { input: 0.01, output: 0.01, cacheRead: 0, cacheWrite: 0 },
},
{
id: "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16-TEE",
name: "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16-TEE",
reasoning: true,
input: ["text"],
contextWindow: 128000,
maxTokens: 4096,
cost: { input: 0.3, output: 1.2, cacheRead: 0, cacheWrite: 0 },
},
{
id: "NousResearch/Hermes-4-14B",
name: "NousResearch/Hermes-4-14B",
reasoning: true,
input: ["text"],
contextWindow: 40960,
maxTokens: 40960,
cost: { input: 0.01, output: 0.05, cacheRead: 0, cacheWrite: 0 },
},
{
id: "Qwen/Qwen3Guard-Gen-0.6B",
name: "Qwen/Qwen3Guard-Gen-0.6B",
reasoning: false,
input: ["text"],
contextWindow: 128000,
maxTokens: 4096,
cost: { input: 0.01, output: 0.01, cacheRead: 0, cacheWrite: 0 },
},
{
id: "rednote-hilab/dots.ocr",
name: "rednote-hilab/dots.ocr",
reasoning: false,
input: ["text", "image"],
contextWindow: 131072,
maxTokens: 131072,
cost: { input: 0.01, output: 0.01, cacheRead: 0, cacheWrite: 0 },
},
];
export function buildChutesModelDefinition(
model: (typeof CHUTES_MODEL_CATALOG)[number],
): ModelDefinitionConfig {
return {
...model,
compat: {
supportsUsageInStreaming: false,
},
};
}
interface ChutesModelEntry {
id: string;
name?: string;
supported_features?: string[];
input_modalities?: string[];
context_length?: number;
max_output_length?: number;
pricing?: {
prompt?: number;
completion?: number;
};
[key: string]: unknown;
}
interface OpenAIListModelsResponse {
data?: ChutesModelEntry[];
}
const CACHE_TTL = 5 * 60 * 1000;
const CACHE_MAX_ENTRIES = 100;
interface CacheEntry {
models: ModelDefinitionConfig[];
time: number;
}
const modelCache = new Map<string, CacheEntry>();
function pruneExpiredCacheEntries(now: number = Date.now()): void {
for (const [key, entry] of modelCache.entries()) {
if (now - entry.time >= CACHE_TTL) {
modelCache.delete(key);
}
}
}
function cacheAndReturn(
tokenKey: string,
models: ModelDefinitionConfig[],
): ModelDefinitionConfig[] {
const now = Date.now();
pruneExpiredCacheEntries(now);
if (!modelCache.has(tokenKey) && modelCache.size >= CACHE_MAX_ENTRIES) {
const oldest = modelCache.keys().next();
if (!oldest.done) {
modelCache.delete(oldest.value);
}
}
modelCache.set(tokenKey, { models, time: now });
return models;
}
export async function discoverChutesModels(accessToken?: string): Promise<ModelDefinitionConfig[]> {
const trimmedKey = accessToken?.trim() ?? "";
const now = Date.now();
pruneExpiredCacheEntries(now);
const cached = modelCache.get(trimmedKey);
if (cached) {
return cached.models;
}
if (process.env.NODE_ENV === "test" || process.env.VITEST === "true") {
return CHUTES_MODEL_CATALOG.map(buildChutesModelDefinition);
}
let effectiveKey = trimmedKey;
const staticCatalog = () =>
cacheAndReturn(effectiveKey, CHUTES_MODEL_CATALOG.map(buildChutesModelDefinition));
const headers: Record<string, string> = {};
if (trimmedKey) {
headers.Authorization = `Bearer ${trimmedKey}`;
}
try {
let response = await fetch(`${CHUTES_BASE_URL}/models`, {
signal: AbortSignal.timeout(10_000),
headers,
});
if (response.status === 401 && trimmedKey) {
effectiveKey = "";
response = await fetch(`${CHUTES_BASE_URL}/models`, {
signal: AbortSignal.timeout(10_000),
});
}
if (!response.ok) {
if (response.status !== 401 && response.status !== 503) {
log.warn(`GET /v1/models failed: HTTP ${response.status}, using static catalog`);
}
return staticCatalog();
}
const body = (await response.json()) as OpenAIListModelsResponse;
const data = body?.data;
if (!Array.isArray(data) || data.length === 0) {
log.warn("No models in response, using static catalog");
return staticCatalog();
}
const seen = new Set<string>();
const models: ModelDefinitionConfig[] = [];
for (const entry of data) {
const id = typeof entry?.id === "string" ? entry.id.trim() : "";
if (!id || seen.has(id)) {
continue;
}
seen.add(id);
const isReasoning =
entry.supported_features?.includes("reasoning") ||
id.toLowerCase().includes("r1") ||
id.toLowerCase().includes("thinking") ||
id.toLowerCase().includes("reason") ||
id.toLowerCase().includes("tee");
const input: Array<"text" | "image"> = (entry.input_modalities || ["text"]).filter(
(i): i is "text" | "image" => i === "text" || i === "image",
);
models.push({
id,
name: id,
reasoning: isReasoning,
input,
cost: {
input: entry.pricing?.prompt || 0,
output: entry.pricing?.completion || 0,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: entry.context_length || CHUTES_DEFAULT_CONTEXT_WINDOW,
maxTokens: entry.max_output_length || CHUTES_DEFAULT_MAX_TOKENS,
compat: {
supportsUsageInStreaming: false,
},
});
}
return cacheAndReturn(
effectiveKey,
models.length > 0 ? models : CHUTES_MODEL_CATALOG.map(buildChutesModelDefinition),
);
} catch (error) {
log.warn(`Discovery failed: ${String(error)}, using static catalog`);
return staticCatalog();
}
}