Files
openclaw/src/agents/models-config.providers.discovery.ts
2026-03-09 00:26:20 +00:00

293 lines
8.8 KiB
TypeScript

import type { OpenClawConfig } from "../config/config.js";
import type { ModelDefinitionConfig } from "../config/types.models.js";
import { createSubsystemLogger } from "../logging/subsystem.js";
import { KILOCODE_BASE_URL } from "../providers/kilocode-shared.js";
import {
discoverHuggingfaceModels,
HUGGINGFACE_BASE_URL,
HUGGINGFACE_MODEL_CATALOG,
buildHuggingfaceModelDefinition,
} from "./huggingface-models.js";
import { discoverKilocodeModels } from "./kilocode-models.js";
import { OLLAMA_NATIVE_BASE_URL } from "./ollama-stream.js";
import { discoverVeniceModels, VENICE_BASE_URL } from "./venice-models.js";
import { discoverVercelAiGatewayModels, VERCEL_AI_GATEWAY_BASE_URL } from "./vercel-ai-gateway.js";
type ModelsConfig = NonNullable<OpenClawConfig["models"]>;
type ProviderConfig = NonNullable<ModelsConfig["providers"]>[string];
const log = createSubsystemLogger("agents/model-providers");
const OLLAMA_BASE_URL = OLLAMA_NATIVE_BASE_URL;
const OLLAMA_API_BASE_URL = OLLAMA_BASE_URL;
const OLLAMA_SHOW_CONCURRENCY = 8;
const OLLAMA_SHOW_MAX_MODELS = 200;
const OLLAMA_DEFAULT_CONTEXT_WINDOW = 128000;
const OLLAMA_DEFAULT_MAX_TOKENS = 8192;
const OLLAMA_DEFAULT_COST = {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
};
const VLLM_BASE_URL = "http://127.0.0.1:8000/v1";
const VLLM_DEFAULT_CONTEXT_WINDOW = 128000;
const VLLM_DEFAULT_MAX_TOKENS = 8192;
const VLLM_DEFAULT_COST = {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
};
interface OllamaModel {
name: string;
modified_at: string;
size: number;
digest: string;
details?: {
family?: string;
parameter_size?: string;
};
}
interface OllamaTagsResponse {
models: OllamaModel[];
}
type VllmModelsResponse = {
data?: Array<{
id?: string;
}>;
};
/**
* Derive the Ollama native API base URL from a configured base URL.
*
* Users typically configure `baseUrl` with a `/v1` suffix (e.g.
* `http://192.168.20.14:11434/v1`) for the OpenAI-compatible endpoint.
* The native Ollama API lives at the root (e.g. `/api/tags`), so we
* strip the `/v1` suffix when present.
*/
export function resolveOllamaApiBase(configuredBaseUrl?: string): string {
if (!configuredBaseUrl) {
return OLLAMA_API_BASE_URL;
}
// Strip trailing slash, then strip /v1 suffix if present
const trimmed = configuredBaseUrl.replace(/\/+$/, "");
return trimmed.replace(/\/v1$/i, "");
}
async function queryOllamaContextWindow(
apiBase: string,
modelName: string,
): Promise<number | undefined> {
try {
const response = await fetch(`${apiBase}/api/show`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ name: modelName }),
signal: AbortSignal.timeout(3000),
});
if (!response.ok) {
return undefined;
}
const data = (await response.json()) as { model_info?: Record<string, unknown> };
if (!data.model_info) {
return undefined;
}
for (const [key, value] of Object.entries(data.model_info)) {
if (key.endsWith(".context_length") && typeof value === "number" && Number.isFinite(value)) {
const contextWindow = Math.floor(value);
if (contextWindow > 0) {
return contextWindow;
}
}
}
return undefined;
} catch {
return undefined;
}
}
async function discoverOllamaModels(
baseUrl?: string,
opts?: { quiet?: boolean },
): Promise<ModelDefinitionConfig[]> {
if (process.env.VITEST || process.env.NODE_ENV === "test") {
return [];
}
try {
const apiBase = resolveOllamaApiBase(baseUrl);
const response = await fetch(`${apiBase}/api/tags`, {
signal: AbortSignal.timeout(5000),
});
if (!response.ok) {
if (!opts?.quiet) {
log.warn(`Failed to discover Ollama models: ${response.status}`);
}
return [];
}
const data = (await response.json()) as OllamaTagsResponse;
if (!data.models || data.models.length === 0) {
log.debug("No Ollama models found on local instance");
return [];
}
const modelsToInspect = data.models.slice(0, OLLAMA_SHOW_MAX_MODELS);
if (modelsToInspect.length < data.models.length && !opts?.quiet) {
log.warn(
`Capping Ollama /api/show inspection to ${OLLAMA_SHOW_MAX_MODELS} models (received ${data.models.length})`,
);
}
const discovered: ModelDefinitionConfig[] = [];
for (let index = 0; index < modelsToInspect.length; index += OLLAMA_SHOW_CONCURRENCY) {
const batch = modelsToInspect.slice(index, index + OLLAMA_SHOW_CONCURRENCY);
const batchDiscovered = await Promise.all(
batch.map(async (model) => {
const modelId = model.name;
const contextWindow = await queryOllamaContextWindow(apiBase, modelId);
const isReasoning =
modelId.toLowerCase().includes("r1") || modelId.toLowerCase().includes("reasoning");
return {
id: modelId,
name: modelId,
reasoning: isReasoning,
input: ["text"],
cost: OLLAMA_DEFAULT_COST,
contextWindow: contextWindow ?? OLLAMA_DEFAULT_CONTEXT_WINDOW,
maxTokens: OLLAMA_DEFAULT_MAX_TOKENS,
} satisfies ModelDefinitionConfig;
}),
);
discovered.push(...batchDiscovered);
}
return discovered;
} catch (error) {
if (!opts?.quiet) {
log.warn(`Failed to discover Ollama models: ${String(error)}`);
}
return [];
}
}
async function discoverVllmModels(
baseUrl: string,
apiKey?: string,
): Promise<ModelDefinitionConfig[]> {
if (process.env.VITEST || process.env.NODE_ENV === "test") {
return [];
}
const trimmedBaseUrl = baseUrl.trim().replace(/\/+$/, "");
const url = `${trimmedBaseUrl}/models`;
try {
const trimmedApiKey = apiKey?.trim();
const response = await fetch(url, {
headers: trimmedApiKey ? { Authorization: `Bearer ${trimmedApiKey}` } : undefined,
signal: AbortSignal.timeout(5000),
});
if (!response.ok) {
log.warn(`Failed to discover vLLM models: ${response.status}`);
return [];
}
const data = (await response.json()) as VllmModelsResponse;
const models = data.data ?? [];
if (models.length === 0) {
log.warn("No vLLM models found on local instance");
return [];
}
return models
.map((model) => ({ id: typeof model.id === "string" ? model.id.trim() : "" }))
.filter((model) => Boolean(model.id))
.map((model) => {
const modelId = model.id;
const lower = modelId.toLowerCase();
const isReasoning =
lower.includes("r1") || lower.includes("reasoning") || lower.includes("think");
return {
id: modelId,
name: modelId,
reasoning: isReasoning,
input: ["text"],
cost: VLLM_DEFAULT_COST,
contextWindow: VLLM_DEFAULT_CONTEXT_WINDOW,
maxTokens: VLLM_DEFAULT_MAX_TOKENS,
} satisfies ModelDefinitionConfig;
});
} catch (error) {
log.warn(`Failed to discover vLLM models: ${String(error)}`);
return [];
}
}
export async function buildVeniceProvider(): Promise<ProviderConfig> {
const models = await discoverVeniceModels();
return {
baseUrl: VENICE_BASE_URL,
api: "openai-completions",
models,
};
}
export async function buildOllamaProvider(
configuredBaseUrl?: string,
opts?: { quiet?: boolean },
): Promise<ProviderConfig> {
const models = await discoverOllamaModels(configuredBaseUrl, opts);
return {
baseUrl: resolveOllamaApiBase(configuredBaseUrl),
api: "ollama",
models,
};
}
export async function buildHuggingfaceProvider(discoveryApiKey?: string): Promise<ProviderConfig> {
const resolvedSecret = discoveryApiKey?.trim() ?? "";
const models =
resolvedSecret !== ""
? await discoverHuggingfaceModels(resolvedSecret)
: HUGGINGFACE_MODEL_CATALOG.map(buildHuggingfaceModelDefinition);
return {
baseUrl: HUGGINGFACE_BASE_URL,
api: "openai-completions",
models,
};
}
export async function buildVercelAiGatewayProvider(): Promise<ProviderConfig> {
return {
baseUrl: VERCEL_AI_GATEWAY_BASE_URL,
api: "anthropic-messages",
models: await discoverVercelAiGatewayModels(),
};
}
export async function buildVllmProvider(params?: {
baseUrl?: string;
apiKey?: string;
}): Promise<ProviderConfig> {
const baseUrl = (params?.baseUrl?.trim() || VLLM_BASE_URL).replace(/\/+$/, "");
const models = await discoverVllmModels(baseUrl, params?.apiKey);
return {
baseUrl,
api: "openai-completions",
models,
};
}
/**
* Build the Kilocode provider with dynamic model discovery from the gateway
* API. Falls back to the static catalog on failure.
*/
export async function buildKilocodeProviderWithDiscovery(): Promise<ProviderConfig> {
const models = await discoverKilocodeModels();
return {
baseUrl: KILOCODE_BASE_URL,
api: "openai-completions",
models,
};
}