Files
openclaw/extensions/deepinfra/provider-models.ts
2026-05-28 17:58:06 -04:00

479 lines
15 KiB
TypeScript

import { isProviderApiKeyConfigured } from "openclaw/plugin-sdk/provider-auth";
import { buildManifestModelProviderConfig } from "openclaw/plugin-sdk/provider-catalog-shared";
import { fetchWithTimeout } from "openclaw/plugin-sdk/provider-http";
import type { ModelDefinitionConfig } from "openclaw/plugin-sdk/provider-model-shared";
import { createSubsystemLogger } from "openclaw/plugin-sdk/runtime-env";
import { hasConfiguredSecretInput } from "openclaw/plugin-sdk/secret-input";
import { asPositiveSafeInteger } from "openclaw/plugin-sdk/string-coerce-runtime";
import manifest from "./openclaw.plugin.json" with { type: "json" };
const log = createSubsystemLogger("deepinfra-models");
const DEEPINFRA_MANIFEST_PROVIDER = buildManifestModelProviderConfig({
providerId: "deepinfra",
catalog: manifest.modelCatalog.providers.deepinfra,
});
export const DEEPINFRA_BASE_URL = DEEPINFRA_MANIFEST_PROVIDER.baseUrl;
export const DEEPINFRA_MODELS_URL = `${DEEPINFRA_BASE_URL}/models?sort_by=openclaw&filter=with_meta`;
export const DEEPINFRA_DEFAULT_MODEL_ID = "deepseek-ai/DeepSeek-V4-Flash";
export const DEEPINFRA_DEFAULT_MODEL_REF = `deepinfra/${DEEPINFRA_DEFAULT_MODEL_ID}`;
const DEEPINFRA_DEFAULT_CONTEXT_WINDOW = 128000;
const DEEPINFRA_DEFAULT_MAX_TOKENS = 8192;
export const DEEPINFRA_MODEL_CATALOG: ModelDefinitionConfig[] = DEEPINFRA_MANIFEST_PROVIDER.models;
const DISCOVERY_TIMEOUT_MS = 5000;
const DISCOVERY_CACHE_TTL_MS = 5 * 60 * 1000;
type DeepInfraAuthConfig = {
secrets?: { defaults?: { env?: string; file?: string; exec?: string } };
models?: { providers?: Record<string, { apiKey?: unknown } | undefined> };
};
// Wire format — mirrors deepapi/agent_models_api.AgentOpenAIModelsOut.
interface DeepInfraAgentModelPricing {
// chat / vlm / embed
input_tokens?: number;
output_tokens?: number;
cache_read_tokens?: number;
// image-gen
per_image_unit?: number;
// video-gen
output_seconds?: number;
// tts
input_characters?: number;
// stt
input_seconds?: number;
}
interface DeepInfraAgentModelMetadata {
description?: string;
context_length?: number | null;
max_tokens?: number | null;
pricing?: DeepInfraAgentModelPricing;
tags?: string[];
default_width?: number | null;
default_height?: number | null;
default_iterations?: number | null;
}
interface DeepInfraAgentModelEntry {
id: string;
metadata: DeepInfraAgentModelMetadata | null;
}
interface DeepInfraAgentModelsResponse {
data?: DeepInfraAgentModelEntry[];
}
export type DeepInfraSurface = "chat" | "vlm" | "embed" | "image-gen" | "video-gen" | "tts" | "stt";
export interface DeepInfraSurfaceModel {
id: string;
name: string;
description?: string;
tags: string[];
contextWindow?: number;
maxTokens?: number;
pricing: DeepInfraAgentModelPricing;
defaultWidth?: number;
defaultHeight?: number;
defaultIterations?: number;
}
export interface DeepInfraDiscoveredCatalog {
chat: DeepInfraSurfaceModel[];
vlm: DeepInfraSurfaceModel[];
embed: DeepInfraSurfaceModel[];
imageGen: DeepInfraSurfaceModel[];
videoGen: DeepInfraSurfaceModel[];
tts: DeepInfraSurfaceModel[];
stt: DeepInfraSurfaceModel[];
/** True iff served from a successful live fetch; false for the static fallback. */
live: boolean;
}
let cachedCatalog: DeepInfraDiscoveredCatalog | null = null;
let cachedAt = 0;
export function resetDeepInfraModelCacheForTest(): void {
cachedCatalog = null;
cachedAt = 0;
}
const SURFACE_FOR_TAG: Record<string, DeepInfraSurface> = {
chat: "chat",
vlm: "vlm",
embed: "embed",
"image-gen": "image-gen",
"video-gen": "video-gen",
tts: "tts",
stt: "stt",
};
function entryToSurfaceModel(entry: DeepInfraAgentModelEntry): DeepInfraSurfaceModel | null {
const id = typeof entry?.id === "string" ? entry.id.trim() : "";
if (!id) {
return null;
}
const metadata = entry.metadata;
if (!metadata) {
return null;
}
const tags = Array.isArray(metadata.tags)
? metadata.tags.filter((t): t is string => typeof t === "string")
: [];
const pricing: DeepInfraAgentModelPricing = metadata.pricing ?? {};
return {
id,
name: id,
description: metadata.description ?? undefined,
tags,
contextWindow: asPositiveSafeInteger(metadata.context_length),
maxTokens: asPositiveSafeInteger(metadata.max_tokens),
pricing,
defaultWidth: asPositiveSafeInteger(metadata.default_width),
defaultHeight: asPositiveSafeInteger(metadata.default_height),
defaultIterations: asPositiveSafeInteger(metadata.default_iterations),
};
}
function bucketBySurface(models: DeepInfraSurfaceModel[]): DeepInfraDiscoveredCatalog {
const catalog: DeepInfraDiscoveredCatalog = {
chat: [],
vlm: [],
embed: [],
imageGen: [],
videoGen: [],
tts: [],
stt: [],
live: true,
};
const buckets: Record<DeepInfraSurface, DeepInfraSurfaceModel[]> = {
chat: catalog.chat,
vlm: catalog.vlm,
embed: catalog.embed,
"image-gen": catalog.imageGen,
"video-gen": catalog.videoGen,
tts: catalog.tts,
stt: catalog.stt,
};
for (const model of models) {
const seen = new Set<DeepInfraSurface>();
for (const tag of model.tags) {
const surface = SURFACE_FOR_TAG[tag];
if (surface && !seen.has(surface)) {
seen.add(surface);
buckets[surface].push(model);
}
}
}
return catalog;
}
// Static fallback. Chat rows live in openclaw.plugin.json (manifest-validated);
// non-chat surfaces live below because the manifest validator only accepts
// chat-shaped rows. These are used pre-auth / offline; live discovery
// overrides once a key is configured.
interface ManifestChatModelEntry {
id: string;
name?: string;
contextWindow?: number;
maxTokens?: number;
reasoning?: boolean;
input?: Array<"text" | "image">;
cost?: { input?: number; output?: number; cacheRead?: number };
}
function manifestChatEntryToSurfaceModel(entry: ManifestChatModelEntry): DeepInfraSurfaceModel {
const cost = entry.cost ?? {};
const pricing: DeepInfraAgentModelPricing = {};
if (typeof cost.input === "number") {
pricing.input_tokens = cost.input;
}
if (typeof cost.output === "number") {
pricing.output_tokens = cost.output;
}
if (typeof cost.cacheRead === "number" && cost.cacheRead > 0) {
pricing.cache_read_tokens = cost.cacheRead;
}
const tags: string[] = ["chat"];
if (entry.input?.includes("image")) {
tags.push("vlm");
}
if (entry.reasoning) {
tags.push("reasoning");
}
return {
id: entry.id,
name: entry.name ?? entry.id,
tags,
contextWindow: entry.contextWindow,
maxTokens: entry.maxTokens,
pricing,
};
}
// Per-surface static fallback used only when no API key is configured or
// live discovery fails. Kept deliberately minimal: the dynamic
// `/v1/openai/models?sort_by=openclaw&filter=with_meta` projection is the
// real source of truth (140 tagged rows today), so every retired model
// removed from the DeepInfra catalog disappears here automatically the
// next time discovery runs. Newer entries — additional image-gen models,
// video-gen models, additional TTS voices — arrive through discovery
// without a code change.
//
// Every entry below is verified against the live catalog at the time of
// addition; entries are not pinned to historical shipped models if the
// upstream provider has retired them (e.g. `run-diffusion/Juggernaut-
// Lightning-Flux` was removed from DeepInfra and is therefore not listed
// even though earlier main releases shipped it as a fallback).
const STATIC_NON_CHAT_FALLBACK: DeepInfraSurfaceModel[] = [
// image-gen — representative subset of currently-served models.
{
id: "black-forest-labs/FLUX-1-schnell",
name: "black-forest-labs/FLUX-1-schnell",
tags: ["image-gen"],
pricing: { per_image_unit: 0.003 },
defaultWidth: 1024,
defaultHeight: 1024,
defaultIterations: 4,
},
{
id: "black-forest-labs/FLUX-1-dev",
name: "black-forest-labs/FLUX-1-dev",
tags: ["image-gen"],
pricing: { per_image_unit: 0.025 },
defaultWidth: 1024,
defaultHeight: 1024,
defaultIterations: 28,
},
{
id: "Qwen/Qwen-Image-Max",
name: "Qwen/Qwen-Image-Max",
tags: ["image-gen"],
pricing: { per_image_unit: 0.075 },
defaultWidth: 1024,
defaultHeight: 1024,
defaultIterations: 28,
},
{
id: "stabilityai/sdxl-turbo",
name: "stabilityai/sdxl-turbo",
tags: ["image-gen"],
pricing: { per_image_unit: 0.0002 },
defaultWidth: 1024,
defaultHeight: 1024,
defaultIterations: 4,
},
// video-gen — DeepInfra has no live video-gen catalog rows today;
// intentionally empty here. Live discovery picks up text-to-video
// models as soon as the backend tags them, no static row required.
// tts — Kokoro first so the shipped default voice (af_bella) pairs with
// the chosen default model; the rest are alternative TTS providers
// currently served by DeepInfra. Qwen3-TTS / chatterbox-turbo / csm-1b
// each require their own voice; they ship as discoverable alternatives,
// not the implicit default.
{
id: "hexgrad/Kokoro-82M",
name: "hexgrad/Kokoro-82M",
tags: ["tts"],
pricing: { input_characters: 0.65 },
},
{
id: "Qwen/Qwen3-TTS",
name: "Qwen/Qwen3-TTS",
tags: ["tts"],
pricing: { input_characters: 0.65 },
},
{
id: "ResembleAI/chatterbox-turbo",
name: "ResembleAI/chatterbox-turbo",
tags: ["tts"],
pricing: { input_characters: 1.0 },
},
{
id: "sesame/csm-1b",
name: "sesame/csm-1b",
tags: ["tts"],
pricing: { input_characters: 7.0 },
},
// stt
{
id: "openai/whisper-large-v3-turbo",
name: "openai/whisper-large-v3-turbo",
tags: ["stt"],
pricing: { input_seconds: 0.00004 },
},
// embed
{
id: "BAAI/bge-m3",
name: "BAAI/bge-m3",
tags: ["embed"],
pricing: { input_tokens: 0.01 },
maxTokens: 8192,
contextWindow: 8192,
},
];
function manifestFallbackCatalog(): DeepInfraDiscoveredCatalog {
const rawChat = (manifest.modelCatalog.providers.deepinfra.models ??
[]) as ManifestChatModelEntry[];
const chatModels = rawChat.map(manifestChatEntryToSurfaceModel);
const catalog = bucketBySurface([...chatModels, ...STATIC_NON_CHAT_FALLBACK]);
catalog.live = false;
return catalog;
}
// Sync per-surface fallback for the (sync) register callback. Media providers
// register with these defaults; live discovery feeds the chat surface via
// augmentModelCatalog and the catalog seams for image/video-gen.
export function getDeepInfraSurfaceFallbackCatalog(): DeepInfraDiscoveredCatalog {
return manifestFallbackCatalog();
}
export function buildDeepInfraModelDefinition(model: ModelDefinitionConfig): ModelDefinitionConfig {
return {
...model,
compat: {
...model.compat,
supportsUsageInStreaming: model.compat?.supportsUsageInStreaming ?? true,
},
};
}
function chatSurfaceModelToModelDefinition(model: DeepInfraSurfaceModel): ModelDefinitionConfig {
const input: Array<"text" | "image"> = model.tags.includes("vlm") ? ["text", "image"] : ["text"];
const reasoning = model.tags.includes("reasoning") || model.tags.includes("reasoning_effort");
return buildDeepInfraModelDefinition({
id: model.id,
name: model.name,
reasoning,
input,
contextWindow: model.contextWindow ?? DEEPINFRA_DEFAULT_CONTEXT_WINDOW,
maxTokens: model.maxTokens ?? DEEPINFRA_DEFAULT_MAX_TOKENS,
cost: {
input: model.pricing.input_tokens ?? 0,
output: model.pricing.output_tokens ?? 0,
cacheRead: model.pricing.cache_read_tokens ?? 0,
cacheWrite: 0,
},
});
}
// Gate dynamic discovery on key presence: pre-auth keeps the picker tight and
// avoids a useless network call. The endpoint itself is unauthenticated.
// Accepts env-var keys and auth-profile-store keys via the shared
// `isProviderApiKeyConfigured` helper (covers SecretRef / `OPENCLAW_LIVE_*`
// indirection too).
export function hasDeepInfraApiKey(options?: {
env?: NodeJS.ProcessEnv;
agentDir?: string;
config?: DeepInfraAuthConfig;
}): boolean {
const env = options?.env ?? process.env;
const fromEnv = env.DEEPINFRA_API_KEY;
if (typeof fromEnv === "string" && fromEnv.trim() !== "") {
return true;
}
const providers = options?.config?.models?.providers;
for (const [providerId, provider] of Object.entries(providers ?? {})) {
if (
providerId.trim().toLowerCase() === "deepinfra" &&
hasConfiguredSecretInput(provider?.apiKey, options?.config?.secrets?.defaults)
) {
return true;
}
}
return isProviderApiKeyConfigured({ provider: "deepinfra", agentDir: options?.agentDir });
}
// Discover the per-surface catalog. Falls back to the static manifest when
// no key, fetch fails, or running under Vitest. 5-minute cache on success.
export async function discoverDeepInfraSurfaces(options?: {
hasApiKey?: boolean;
env?: NodeJS.ProcessEnv;
agentDir?: string;
}): Promise<DeepInfraDiscoveredCatalog> {
if (process.env.NODE_ENV === "test" || process.env.VITEST) {
return manifestFallbackCatalog();
}
const env = options?.env ?? process.env;
const hasKey = options?.hasApiKey ?? hasDeepInfraApiKey({ env, agentDir: options?.agentDir });
if (!hasKey) {
return manifestFallbackCatalog();
}
if (cachedCatalog && Date.now() - cachedAt < DISCOVERY_CACHE_TTL_MS) {
return cachedCatalog;
}
try {
const response = await fetchWithTimeout(
DEEPINFRA_MODELS_URL,
{ headers: { Accept: "application/json" } },
DISCOVERY_TIMEOUT_MS,
);
if (!response.ok) {
log.warn(`Failed to discover models: HTTP ${response.status}, using static catalog`);
return manifestFallbackCatalog();
}
const body = (await response.json()) as DeepInfraAgentModelsResponse;
if (!Array.isArray(body.data) || body.data.length === 0) {
log.warn("No models found from DeepInfra agent-projection endpoint, using static catalog");
return manifestFallbackCatalog();
}
const seenIds = new Set<string>();
const surfaceModels: DeepInfraSurfaceModel[] = [];
for (const entry of body.data) {
const model = entryToSurfaceModel(entry);
if (!model || seenIds.has(model.id)) {
continue;
}
seenIds.add(model.id);
surfaceModels.push(model);
}
if (surfaceModels.length === 0) {
return manifestFallbackCatalog();
}
const catalog = bucketBySurface(surfaceModels);
cachedCatalog = catalog;
cachedAt = Date.now();
return catalog;
} catch (error) {
log.warn(`Discovery failed: ${String(error)}, using static catalog`);
return manifestFallbackCatalog();
}
}
// Chat-only shim for callers that haven't migrated to the per-surface catalog
// (provider-catalog.ts, augmentModelCatalog).
export async function discoverDeepInfraModels(options?: {
hasApiKey?: boolean;
env?: NodeJS.ProcessEnv;
agentDir?: string;
}): Promise<ModelDefinitionConfig[]> {
const catalog = await discoverDeepInfraSurfaces(options);
const chatModels = catalog.chat.length > 0 ? catalog.chat : [...catalog.chat, ...catalog.vlm];
if (chatModels.length === 0) {
// True empty (no manifest entries either) — keep behavior stable.
return DEEPINFRA_MODEL_CATALOG.map(buildDeepInfraModelDefinition);
}
const liveModels = chatModels.map(chatSurfaceModelToModelDefinition);
const seen = new Set(liveModels.map((model) => model.id));
const manifestModels = DEEPINFRA_MODEL_CATALOG.map(buildDeepInfraModelDefinition).filter(
(model) => {
if (seen.has(model.id)) {
return false;
}
seen.add(model.id);
return true;
},
);
return [...liveModels, ...manifestModels];
}