From ff4745fc3fb108e72e1d36922810695128e0bc1f Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Mon, 9 Mar 2026 00:24:02 +0000 Subject: [PATCH] refactor(models): split provider discovery helpers --- docs/refactor/cleanup.md | 2 +- .../models-config.providers.discovery.ts | 292 +++++++++++++++++ src/agents/models-config.providers.ts | 302 +----------------- 3 files changed, 302 insertions(+), 294 deletions(-) create mode 100644 src/agents/models-config.providers.discovery.ts diff --git a/docs/refactor/cleanup.md b/docs/refactor/cleanup.md index f825164c278..142bcda3156 100644 --- a/docs/refactor/cleanup.md +++ b/docs/refactor/cleanup.md @@ -4,4 +4,4 @@ - [x] Split `models list` forward-compat tests by concern. - [x] Extract provider transport normalization from `pi-embedded-runner/model.ts`. - [x] Split `ensureOpenClawModelsJson()` into planning + IO layers. -- [ ] Split provider discovery helpers out of `models-config.providers.ts`. +- [x] Split provider discovery helpers out of `models-config.providers.ts`. diff --git a/src/agents/models-config.providers.discovery.ts b/src/agents/models-config.providers.discovery.ts new file mode 100644 index 00000000000..caab5cafb4e --- /dev/null +++ b/src/agents/models-config.providers.discovery.ts @@ -0,0 +1,292 @@ +import type { OpenClawConfig } from "../config/config.js"; +import type { ModelDefinitionConfig } from "../config/types.models.js"; +import { createSubsystemLogger } from "../logging/subsystem.js"; +import { KILOCODE_BASE_URL } from "../providers/kilocode-shared.js"; +import { + discoverHuggingfaceModels, + HUGGINGFACE_BASE_URL, + HUGGINGFACE_MODEL_CATALOG, + buildHuggingfaceModelDefinition, +} from "./huggingface-models.js"; +import { discoverKilocodeModels } from "./kilocode-models.js"; +import { OLLAMA_NATIVE_BASE_URL } from "./ollama-stream.js"; +import { discoverVeniceModels, VENICE_BASE_URL } from "./venice-models.js"; +import { discoverVercelAiGatewayModels, VERCEL_AI_GATEWAY_BASE_URL } from "./vercel-ai-gateway.js"; + +type ModelsConfig = NonNullable; +type ProviderConfig = NonNullable[string]; + +const log = createSubsystemLogger("agents/model-providers"); + +const OLLAMA_BASE_URL = OLLAMA_NATIVE_BASE_URL; +const OLLAMA_API_BASE_URL = OLLAMA_BASE_URL; +const OLLAMA_SHOW_CONCURRENCY = 8; +const OLLAMA_SHOW_MAX_MODELS = 200; +const OLLAMA_DEFAULT_CONTEXT_WINDOW = 128000; +const OLLAMA_DEFAULT_MAX_TOKENS = 8192; +const OLLAMA_DEFAULT_COST = { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, +}; + +const VLLM_BASE_URL = "http://127.0.0.1:8000/v1"; +const VLLM_DEFAULT_CONTEXT_WINDOW = 128000; +const VLLM_DEFAULT_MAX_TOKENS = 8192; +const VLLM_DEFAULT_COST = { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, +}; + +interface OllamaModel { + name: string; + modified_at: string; + size: number; + digest: string; + details?: { + family?: string; + parameter_size?: string; + }; +} + +interface OllamaTagsResponse { + models: OllamaModel[]; +} + +type VllmModelsResponse = { + data?: Array<{ + id?: string; + }>; +}; + +/** + * Derive the Ollama native API base URL from a configured base URL. + * + * Users typically configure `baseUrl` with a `/v1` suffix (e.g. + * `http://192.168.20.14:11434/v1`) for the OpenAI-compatible endpoint. + * The native Ollama API lives at the root (e.g. `/api/tags`), so we + * strip the `/v1` suffix when present. + */ +export function resolveOllamaApiBase(configuredBaseUrl?: string): string { + if (!configuredBaseUrl) { + return OLLAMA_API_BASE_URL; + } + // Strip trailing slash, then strip /v1 suffix if present + const trimmed = configuredBaseUrl.replace(/\/+$/, ""); + return trimmed.replace(/\/v1$/i, ""); +} + +async function queryOllamaContextWindow( + apiBase: string, + modelName: string, +): Promise { + try { + const response = await fetch(`${apiBase}/api/show`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ name: modelName }), + signal: AbortSignal.timeout(3000), + }); + if (!response.ok) { + return undefined; + } + const data = (await response.json()) as { model_info?: Record }; + if (!data.model_info) { + return undefined; + } + for (const [key, value] of Object.entries(data.model_info)) { + if (key.endsWith(".context_length") && typeof value === "number" && Number.isFinite(value)) { + const contextWindow = Math.floor(value); + if (contextWindow > 0) { + return contextWindow; + } + } + } + return undefined; + } catch { + return undefined; + } +} + +async function discoverOllamaModels( + baseUrl?: string, + opts?: { quiet?: boolean }, +): Promise { + if (process.env.VITEST || process.env.NODE_ENV === "test") { + return []; + } + try { + const apiBase = resolveOllamaApiBase(baseUrl); + const response = await fetch(`${apiBase}/api/tags`, { + signal: AbortSignal.timeout(5000), + }); + if (!response.ok) { + if (!opts?.quiet) { + log.warn(`Failed to discover Ollama models: ${response.status}`); + } + return []; + } + const data = (await response.json()) as OllamaTagsResponse; + if (!data.models || data.models.length === 0) { + log.debug("No Ollama models found on local instance"); + return []; + } + const modelsToInspect = data.models.slice(0, OLLAMA_SHOW_MAX_MODELS); + if (modelsToInspect.length < data.models.length && !opts?.quiet) { + log.warn( + `Capping Ollama /api/show inspection to ${OLLAMA_SHOW_MAX_MODELS} models (received ${data.models.length})`, + ); + } + const discovered: ModelDefinitionConfig[] = []; + for (let index = 0; index < modelsToInspect.length; index += OLLAMA_SHOW_CONCURRENCY) { + const batch = modelsToInspect.slice(index, index + OLLAMA_SHOW_CONCURRENCY); + const batchDiscovered = await Promise.all( + batch.map(async (model) => { + const modelId = model.name; + const contextWindow = await queryOllamaContextWindow(apiBase, modelId); + const isReasoning = + modelId.toLowerCase().includes("r1") || modelId.toLowerCase().includes("reasoning"); + return { + id: modelId, + name: modelId, + reasoning: isReasoning, + input: ["text"], + cost: OLLAMA_DEFAULT_COST, + contextWindow: contextWindow ?? OLLAMA_DEFAULT_CONTEXT_WINDOW, + maxTokens: OLLAMA_DEFAULT_MAX_TOKENS, + } satisfies ModelDefinitionConfig; + }), + ); + discovered.push(...batchDiscovered); + } + return discovered; + } catch (error) { + if (!opts?.quiet) { + log.warn(`Failed to discover Ollama models: ${String(error)}`); + } + return []; + } +} + +async function discoverVllmModels( + baseUrl: string, + apiKey?: string, +): Promise { + if (process.env.VITEST || process.env.NODE_ENV === "test") { + return []; + } + + const trimmedBaseUrl = baseUrl.trim().replace(/\/+$/, ""); + const url = `${trimmedBaseUrl}/models`; + + try { + const trimmedApiKey = apiKey?.trim(); + const response = await fetch(url, { + headers: trimmedApiKey ? { Authorization: `Bearer ${trimmedApiKey}` } : undefined, + signal: AbortSignal.timeout(5000), + }); + if (!response.ok) { + log.warn(`Failed to discover vLLM models: ${response.status}`); + return []; + } + const data = (await response.json()) as VllmModelsResponse; + const models = data.data ?? []; + if (models.length === 0) { + log.warn("No vLLM models found on local instance"); + return []; + } + + return models + .map((model) => ({ id: typeof model.id === "string" ? model.id.trim() : "" })) + .filter((model) => Boolean(model.id)) + .map((model) => { + const modelId = model.id; + const lower = modelId.toLowerCase(); + const isReasoning = + lower.includes("r1") || lower.includes("reasoning") || lower.includes("think"); + return { + id: modelId, + name: modelId, + reasoning: isReasoning, + input: ["text"], + cost: VLLM_DEFAULT_COST, + contextWindow: VLLM_DEFAULT_CONTEXT_WINDOW, + maxTokens: VLLM_DEFAULT_MAX_TOKENS, + } satisfies ModelDefinitionConfig; + }); + } catch (error) { + log.warn(`Failed to discover vLLM models: ${String(error)}`); + return []; + } +} + +export async function buildVeniceProvider(): Promise { + const models = await discoverVeniceModels(); + return { + baseUrl: VENICE_BASE_URL, + api: "openai-completions", + models, + }; +} + +export async function buildOllamaProvider( + configuredBaseUrl?: string, + opts?: { quiet?: boolean }, +): Promise { + const models = await discoverOllamaModels(configuredBaseUrl, opts); + return { + baseUrl: resolveOllamaApiBase(configuredBaseUrl), + api: "ollama", + models, + }; +} + +export async function buildHuggingfaceProvider(discoveryApiKey?: string): Promise { + const resolvedSecret = discoveryApiKey?.trim() ?? ""; + const models = + resolvedSecret !== "" + ? await discoverHuggingfaceModels(resolvedSecret) + : HUGGINGFACE_MODEL_CATALOG.map(buildHuggingfaceModelDefinition); + return { + baseUrl: HUGGINGFACE_BASE_URL, + api: "openai-completions", + models, + }; +} + +export async function buildVercelAiGatewayProvider(): Promise { + return { + baseUrl: VERCEL_AI_GATEWAY_BASE_URL, + api: "anthropic-messages", + models: await discoverVercelAiGatewayModels(), + }; +} + +export async function buildVllmProvider(params?: { + baseUrl?: string; + apiKey?: string; +}): Promise { + const baseUrl = (params?.baseUrl?.trim() || VLLM_BASE_URL).replace(/\/+$/, ""); + const models = await discoverVllmModels(baseUrl, params?.apiKey); + return { + baseUrl, + api: "openai-completions", + models, + }; +} + +/** + * Build the Kilocode provider with dynamic model discovery from the gateway + * API. Falls back to the static catalog on failure. + */ +export async function buildKilocodeProviderWithDiscovery(): Promise { + const models = await discoverKilocodeModels(); + return { + baseUrl: KILOCODE_BASE_URL, + api: "openai-completions", + models, + }; +} diff --git a/src/agents/models-config.providers.ts b/src/agents/models-config.providers.ts index 48be848dcbe..8f8ffb9201c 100644 --- a/src/agents/models-config.providers.ts +++ b/src/agents/models-config.providers.ts @@ -1,12 +1,9 @@ import type { OpenClawConfig } from "../config/config.js"; -import type { ModelDefinitionConfig } from "../config/types.models.js"; import { coerceSecretRef, resolveSecretInputRef } from "../config/types.secrets.js"; -import { createSubsystemLogger } from "../logging/subsystem.js"; import { DEFAULT_COPILOT_API_BASE_URL, resolveCopilotApiToken, } from "../providers/github-copilot-token.js"; -import { KILOCODE_BASE_URL } from "../providers/kilocode-shared.js"; import { normalizeOptionalSecretInput } from "../utils/normalize-secret-input.js"; import { ensureAuthProfileStore, listProfilesForProvider } from "./auth-profiles.js"; import { discoverBedrockModels } from "./bedrock-discovery.js"; @@ -15,12 +12,14 @@ import { resolveCloudflareAiGatewayBaseUrl, } from "./cloudflare-ai-gateway.js"; import { - discoverHuggingfaceModels, - HUGGINGFACE_BASE_URL, - HUGGINGFACE_MODEL_CATALOG, - buildHuggingfaceModelDefinition, -} from "./huggingface-models.js"; -import { discoverKilocodeModels } from "./kilocode-models.js"; + buildHuggingfaceProvider, + buildKilocodeProviderWithDiscovery, + buildOllamaProvider, + buildVeniceProvider, + buildVercelAiGatewayProvider, + buildVllmProvider, + resolveOllamaApiBase, +} from "./models-config.providers.discovery.js"; import { buildBytePlusCodingProvider, buildBytePlusProvider, @@ -63,222 +62,11 @@ import { resolveEnvSecretRefHeaderValueMarker, } from "./model-auth-markers.js"; import { resolveAwsSdkEnvVarName, resolveEnvApiKey } from "./model-auth.js"; -import { OLLAMA_NATIVE_BASE_URL } from "./ollama-stream.js"; -import { discoverVeniceModels, VENICE_BASE_URL } from "./venice-models.js"; -import { discoverVercelAiGatewayModels, VERCEL_AI_GATEWAY_BASE_URL } from "./vercel-ai-gateway.js"; +export { resolveOllamaApiBase } from "./models-config.providers.discovery.js"; type ModelsConfig = NonNullable; export type ProviderConfig = NonNullable[string]; -const OLLAMA_BASE_URL = OLLAMA_NATIVE_BASE_URL; -const OLLAMA_API_BASE_URL = OLLAMA_BASE_URL; -const OLLAMA_SHOW_CONCURRENCY = 8; -const OLLAMA_SHOW_MAX_MODELS = 200; -const OLLAMA_DEFAULT_CONTEXT_WINDOW = 128000; -const OLLAMA_DEFAULT_MAX_TOKENS = 8192; -const OLLAMA_DEFAULT_COST = { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, -}; - -const VLLM_BASE_URL = "http://127.0.0.1:8000/v1"; -const VLLM_DEFAULT_CONTEXT_WINDOW = 128000; -const VLLM_DEFAULT_MAX_TOKENS = 8192; -const VLLM_DEFAULT_COST = { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, -}; - -const log = createSubsystemLogger("agents/model-providers"); - -interface OllamaModel { - name: string; - modified_at: string; - size: number; - digest: string; - details?: { - family?: string; - parameter_size?: string; - }; -} - -interface OllamaTagsResponse { - models: OllamaModel[]; -} - -type VllmModelsResponse = { - data?: Array<{ - id?: string; - }>; -}; - -/** - * Derive the Ollama native API base URL from a configured base URL. - * - * Users typically configure `baseUrl` with a `/v1` suffix (e.g. - * `http://192.168.20.14:11434/v1`) for the OpenAI-compatible endpoint. - * The native Ollama API lives at the root (e.g. `/api/tags`), so we - * strip the `/v1` suffix when present. - */ -export function resolveOllamaApiBase(configuredBaseUrl?: string): string { - if (!configuredBaseUrl) { - return OLLAMA_API_BASE_URL; - } - // Strip trailing slash, then strip /v1 suffix if present - const trimmed = configuredBaseUrl.replace(/\/+$/, ""); - return trimmed.replace(/\/v1$/i, ""); -} - -async function queryOllamaContextWindow( - apiBase: string, - modelName: string, -): Promise { - try { - const response = await fetch(`${apiBase}/api/show`, { - method: "POST", - headers: { "Content-Type": "application/json" }, - body: JSON.stringify({ name: modelName }), - signal: AbortSignal.timeout(3000), - }); - if (!response.ok) { - return undefined; - } - const data = (await response.json()) as { model_info?: Record }; - if (!data.model_info) { - return undefined; - } - for (const [key, value] of Object.entries(data.model_info)) { - if (key.endsWith(".context_length") && typeof value === "number" && Number.isFinite(value)) { - const contextWindow = Math.floor(value); - if (contextWindow > 0) { - return contextWindow; - } - } - } - return undefined; - } catch { - return undefined; - } -} - -async function discoverOllamaModels( - baseUrl?: string, - opts?: { quiet?: boolean }, -): Promise { - // Skip Ollama discovery in test environments - if (process.env.VITEST || process.env.NODE_ENV === "test") { - return []; - } - try { - const apiBase = resolveOllamaApiBase(baseUrl); - const response = await fetch(`${apiBase}/api/tags`, { - signal: AbortSignal.timeout(5000), - }); - if (!response.ok) { - if (!opts?.quiet) { - log.warn(`Failed to discover Ollama models: ${response.status}`); - } - return []; - } - const data = (await response.json()) as OllamaTagsResponse; - if (!data.models || data.models.length === 0) { - log.debug("No Ollama models found on local instance"); - return []; - } - const modelsToInspect = data.models.slice(0, OLLAMA_SHOW_MAX_MODELS); - if (modelsToInspect.length < data.models.length && !opts?.quiet) { - log.warn( - `Capping Ollama /api/show inspection to ${OLLAMA_SHOW_MAX_MODELS} models (received ${data.models.length})`, - ); - } - const discovered: ModelDefinitionConfig[] = []; - for (let index = 0; index < modelsToInspect.length; index += OLLAMA_SHOW_CONCURRENCY) { - const batch = modelsToInspect.slice(index, index + OLLAMA_SHOW_CONCURRENCY); - const batchDiscovered = await Promise.all( - batch.map(async (model) => { - const modelId = model.name; - const contextWindow = await queryOllamaContextWindow(apiBase, modelId); - const isReasoning = - modelId.toLowerCase().includes("r1") || modelId.toLowerCase().includes("reasoning"); - return { - id: modelId, - name: modelId, - reasoning: isReasoning, - input: ["text"], - cost: OLLAMA_DEFAULT_COST, - contextWindow: contextWindow ?? OLLAMA_DEFAULT_CONTEXT_WINDOW, - maxTokens: OLLAMA_DEFAULT_MAX_TOKENS, - } satisfies ModelDefinitionConfig; - }), - ); - discovered.push(...batchDiscovered); - } - return discovered; - } catch (error) { - if (!opts?.quiet) { - log.warn(`Failed to discover Ollama models: ${String(error)}`); - } - return []; - } -} - -async function discoverVllmModels( - baseUrl: string, - apiKey?: string, -): Promise { - // Skip vLLM discovery in test environments - if (process.env.VITEST || process.env.NODE_ENV === "test") { - return []; - } - - const trimmedBaseUrl = baseUrl.trim().replace(/\/+$/, ""); - const url = `${trimmedBaseUrl}/models`; - - try { - const trimmedApiKey = apiKey?.trim(); - const response = await fetch(url, { - headers: trimmedApiKey ? { Authorization: `Bearer ${trimmedApiKey}` } : undefined, - signal: AbortSignal.timeout(5000), - }); - if (!response.ok) { - log.warn(`Failed to discover vLLM models: ${response.status}`); - return []; - } - const data = (await response.json()) as VllmModelsResponse; - const models = data.data ?? []; - if (models.length === 0) { - log.warn("No vLLM models found on local instance"); - return []; - } - - return models - .map((m) => ({ id: typeof m.id === "string" ? m.id.trim() : "" })) - .filter((m) => Boolean(m.id)) - .map((m) => { - const modelId = m.id; - const lower = modelId.toLowerCase(); - const isReasoning = - lower.includes("r1") || lower.includes("reasoning") || lower.includes("think"); - return { - id: modelId, - name: modelId, - reasoning: isReasoning, - input: ["text"], - cost: VLLM_DEFAULT_COST, - contextWindow: VLLM_DEFAULT_CONTEXT_WINDOW, - maxTokens: VLLM_DEFAULT_MAX_TOKENS, - } satisfies ModelDefinitionConfig; - }); - } catch (error) { - log.warn(`Failed to discover vLLM models: ${String(error)}`); - return []; - } -} - const ENV_VAR_NAME_RE = /^[A-Z_][A-Z0-9_]*$/; function normalizeApiKeyConfig(value: string): string { @@ -641,78 +429,6 @@ export function normalizeProviders(params: { return mutated ? next : providers; } -async function buildVeniceProvider(): Promise { - const models = await discoverVeniceModels(); - return { - baseUrl: VENICE_BASE_URL, - api: "openai-completions", - models, - }; -} - -async function buildOllamaProvider( - configuredBaseUrl?: string, - opts?: { quiet?: boolean }, -): Promise { - const models = await discoverOllamaModels(configuredBaseUrl, opts); - return { - baseUrl: resolveOllamaApiBase(configuredBaseUrl), - api: "ollama", - models, - }; -} - -async function buildHuggingfaceProvider(discoveryApiKey?: string): Promise { - const resolvedSecret = toDiscoveryApiKey(discoveryApiKey) ?? ""; - const models = - resolvedSecret !== "" - ? await discoverHuggingfaceModels(resolvedSecret) - : HUGGINGFACE_MODEL_CATALOG.map(buildHuggingfaceModelDefinition); - return { - baseUrl: HUGGINGFACE_BASE_URL, - api: "openai-completions", - models, - }; -} - -async function buildVercelAiGatewayProvider(): Promise { - return { - baseUrl: VERCEL_AI_GATEWAY_BASE_URL, - api: "anthropic-messages", - models: await discoverVercelAiGatewayModels(), - }; -} - -async function buildVllmProvider(params?: { - baseUrl?: string; - apiKey?: string; -}): Promise { - const baseUrl = (params?.baseUrl?.trim() || VLLM_BASE_URL).replace(/\/+$/, ""); - const models = await discoverVllmModels(baseUrl, params?.apiKey); - return { - baseUrl, - api: "openai-completions", - models, - }; -} - -/** - * Build the Kilocode provider with dynamic model discovery from the gateway - * API. Falls back to the static catalog on failure. - * - * Used by {@link resolveImplicitProviders} (async context). The sync - * {@link buildKilocodeProvider} is kept for the onboarding config path - * which cannot await. - */ -async function buildKilocodeProviderWithDiscovery(): Promise { - const models = await discoverKilocodeModels(); - return { - baseUrl: KILOCODE_BASE_URL, - api: "openai-completions", - models, - }; -} - type ImplicitProviderParams = { agentDir: string; config?: OpenClawConfig;