fix(ollama): unify context window handling across discovery, merge, and OpenAI-compat transport (#29205)

* fix(ollama): inject num_ctx for OpenAI-compatible transport

* fix(ollama): discover per-model context and preserve higher limits

* fix(agents): prefer matching provider model for fallback limits

* fix(types): require numeric token limits in provider model merge

* fix(types): accept unknown payload in ollama num_ctx wrapper

* fix(types): simplify ollama settled-result extraction

* config(models): add provider flag for Ollama OpenAI num_ctx injection

* config(schema): allow provider num_ctx injection flag

* config(labels): label provider num_ctx injection flag

* config(help): document provider num_ctx injection flag

* agents(ollama): gate OpenAI num_ctx injection with provider config

* tests(ollama): cover provider num_ctx injection flag behavior

* docs(config): list provider num_ctx injection option

* docs(ollama): document OpenAI num_ctx injection toggle

* docs(config): clarify merge token-limit precedence

* config(help): note merge uses higher model token limits

* fix(ollama): cap /api/show discovery concurrency

* fix(ollama): restrict num_ctx injection to OpenAI compat

* tests(ollama): cover ipv6 and compat num_ctx gating

* fix(ollama): detect remote compat endpoints for ollama-labeled providers

* fix(ollama): cap per-model /api/show lookups to bound discovery load
This commit is contained in:
Vincent Koc
2026-02-27 17:20:47 -08:00
committed by GitHub
parent 70a4f25ab1
commit f16ecd1dac
14 changed files with 582 additions and 21 deletions

View File

@@ -1863,6 +1863,7 @@ OpenClaw uses the pi-coding-agent model catalog. Add custom providers via `model
- Merge precedence for matching provider IDs: - Merge precedence for matching provider IDs:
- Non-empty agent `models.json` `apiKey`/`baseUrl` win. - Non-empty agent `models.json` `apiKey`/`baseUrl` win.
- Empty or missing agent `apiKey`/`baseUrl` fall back to `models.providers` in config. - Empty or missing agent `apiKey`/`baseUrl` fall back to `models.providers` in config.
- Matching model `contextWindow`/`maxTokens` use the higher value between explicit config and implicit catalog values.
- Use `models.mode: "replace"` when you want config to fully rewrite `models.json`. - Use `models.mode: "replace"` when you want config to fully rewrite `models.json`.
### Provider field details ### Provider field details
@@ -1872,6 +1873,7 @@ OpenClaw uses the pi-coding-agent model catalog. Add custom providers via `model
- `models.providers.*.api`: request adapter (`openai-completions`, `openai-responses`, `anthropic-messages`, `google-generative-ai`, etc). - `models.providers.*.api`: request adapter (`openai-completions`, `openai-responses`, `anthropic-messages`, `google-generative-ai`, etc).
- `models.providers.*.apiKey`: provider credential (prefer SecretRef/env substitution). - `models.providers.*.apiKey`: provider credential (prefer SecretRef/env substitution).
- `models.providers.*.auth`: auth strategy (`api-key`, `token`, `oauth`, `aws-sdk`). - `models.providers.*.auth`: auth strategy (`api-key`, `token`, `oauth`, `aws-sdk`).
- `models.providers.*.injectNumCtxForOpenAICompat`: for Ollama + `openai-completions`, inject `options.num_ctx` into requests (default: `true`).
- `models.providers.*.authHeader`: force credential transport in the `Authorization` header when required. - `models.providers.*.authHeader`: force credential transport in the `Authorization` header when required.
- `models.providers.*.baseUrl`: upstream API base URL. - `models.providers.*.baseUrl`: upstream API base URL.
- `models.providers.*.headers`: extra static headers for proxy/tenant routing. - `models.providers.*.headers`: extra static headers for proxy/tenant routing.

View File

@@ -199,6 +199,7 @@ If you need to use the OpenAI-compatible endpoint instead (e.g., behind a proxy
ollama: { ollama: {
baseUrl: "http://ollama-host:11434/v1", baseUrl: "http://ollama-host:11434/v1",
api: "openai-completions", api: "openai-completions",
injectNumCtxForOpenAICompat: true, // default: true
apiKey: "ollama-local", apiKey: "ollama-local",
models: [...] models: [...]
} }
@@ -209,6 +210,24 @@ If you need to use the OpenAI-compatible endpoint instead (e.g., behind a proxy
This mode may not support streaming + tool calling simultaneously. You may need to disable streaming with `params: { streaming: false }` in model config. This mode may not support streaming + tool calling simultaneously. You may need to disable streaming with `params: { streaming: false }` in model config.
When `api: "openai-completions"` is used with Ollama, OpenClaw injects `options.num_ctx` by default so Ollama does not silently fall back to a 4096 context window. If your proxy/upstream rejects unknown `options` fields, disable this behavior:
```json5
{
models: {
providers: {
ollama: {
baseUrl: "http://ollama-host:11434/v1",
api: "openai-completions",
injectNumCtxForOpenAICompat: false,
apiKey: "ollama-local",
models: [...]
}
}
}
}
```
### Context windows ### Context windows
For auto-discovered models, OpenClaw uses the context window reported by Ollama when available, otherwise it defaults to `8192`. You can override `contextWindow` and `maxTokens` in explicit provider config. For auto-discovered models, OpenClaw uses the context window reported by Ollama when available, otherwise it defaults to `8192`. You can override `contextWindow` and `maxTokens` in explicit provider config.

View File

@@ -307,4 +307,57 @@ describe("models-config", () => {
} }
}); });
}); });
it("preserves explicit larger token limits when they exceed implicit catalog defaults", async () => {
await withTempHome(async () => {
const prevKey = process.env.MOONSHOT_API_KEY;
process.env.MOONSHOT_API_KEY = "sk-moonshot-test";
try {
const cfg: OpenClawConfig = {
models: {
providers: {
moonshot: {
baseUrl: "https://api.moonshot.ai/v1",
api: "openai-completions",
models: [
{
id: "kimi-k2.5",
name: "Kimi K2.5",
reasoning: false,
input: ["text"],
cost: { input: 123, output: 456, cacheRead: 0, cacheWrite: 0 },
contextWindow: 350000,
maxTokens: 16384,
},
],
},
},
},
};
await ensureOpenClawModelsJson(cfg);
const parsed = await readGeneratedModelsJson<{
providers: Record<
string,
{
models?: Array<{
id: string;
contextWindow?: number;
maxTokens?: number;
}>;
}
>;
}>();
const kimi = parsed.providers.moonshot?.models?.find((model) => model.id === "kimi-k2.5");
expect(kimi?.contextWindow).toBe(350000);
expect(kimi?.maxTokens).toBe(16384);
} finally {
if (prevKey === undefined) {
delete process.env.MOONSHOT_API_KEY;
} else {
process.env.MOONSHOT_API_KEY = prevKey;
}
}
});
});
}); });

View File

@@ -1,9 +1,14 @@
import { mkdtempSync } from "node:fs"; import { mkdtempSync } from "node:fs";
import { tmpdir } from "node:os"; import { tmpdir } from "node:os";
import { join } from "node:path"; import { join } from "node:path";
import { describe, expect, it } from "vitest"; import { afterEach, describe, expect, it, vi } from "vitest";
import { resolveImplicitProviders, resolveOllamaApiBase } from "./models-config.providers.js"; import { resolveImplicitProviders, resolveOllamaApiBase } from "./models-config.providers.js";
afterEach(() => {
vi.unstubAllEnvs();
vi.unstubAllGlobals();
});
describe("resolveOllamaApiBase", () => { describe("resolveOllamaApiBase", () => {
it("returns default localhost base when no configured URL is provided", () => { it("returns default localhost base when no configured URL is provided", () => {
expect(resolveOllamaApiBase()).toBe("http://127.0.0.1:11434"); expect(resolveOllamaApiBase()).toBe("http://127.0.0.1:11434");
@@ -71,6 +76,110 @@ describe("Ollama provider", () => {
} }
}); });
it("discovers per-model context windows from /api/show", async () => {
const agentDir = mkdtempSync(join(tmpdir(), "openclaw-test-"));
process.env.OLLAMA_API_KEY = "test-key";
vi.stubEnv("VITEST", "");
vi.stubEnv("NODE_ENV", "development");
const fetchMock = vi
.fn()
.mockResolvedValueOnce({
ok: true,
json: async () => ({
models: [
{ name: "qwen3:32b", modified_at: "", size: 1, digest: "" },
{ name: "llama3.3:70b", modified_at: "", size: 1, digest: "" },
],
}),
})
.mockResolvedValueOnce({
ok: true,
json: async () => ({ model_info: { "qwen3.context_length": 131072 } }),
})
.mockResolvedValueOnce({
ok: true,
json: async () => ({ model_info: { "llama.context_length": 65536 } }),
});
vi.stubGlobal("fetch", fetchMock);
try {
const providers = await resolveImplicitProviders({ agentDir });
const models = providers?.ollama?.models ?? [];
const qwen = models.find((model) => model.id === "qwen3:32b");
const llama = models.find((model) => model.id === "llama3.3:70b");
expect(qwen?.contextWindow).toBe(131072);
expect(llama?.contextWindow).toBe(65536);
expect(fetchMock).toHaveBeenCalledTimes(3);
} finally {
delete process.env.OLLAMA_API_KEY;
}
});
it("falls back to default context window when /api/show fails", async () => {
const agentDir = mkdtempSync(join(tmpdir(), "openclaw-test-"));
process.env.OLLAMA_API_KEY = "test-key";
vi.stubEnv("VITEST", "");
vi.stubEnv("NODE_ENV", "development");
const fetchMock = vi
.fn()
.mockResolvedValueOnce({
ok: true,
json: async () => ({
models: [{ name: "qwen3:32b", modified_at: "", size: 1, digest: "" }],
}),
})
.mockResolvedValueOnce({
ok: false,
status: 500,
});
vi.stubGlobal("fetch", fetchMock);
try {
const providers = await resolveImplicitProviders({ agentDir });
const model = providers?.ollama?.models?.find((entry) => entry.id === "qwen3:32b");
expect(model?.contextWindow).toBe(128000);
expect(fetchMock).toHaveBeenCalledTimes(2);
} finally {
delete process.env.OLLAMA_API_KEY;
}
});
it("caps /api/show requests when /api/tags returns a very large model list", async () => {
const agentDir = mkdtempSync(join(tmpdir(), "openclaw-test-"));
process.env.OLLAMA_API_KEY = "test-key";
vi.stubEnv("VITEST", "");
vi.stubEnv("NODE_ENV", "development");
const manyModels = Array.from({ length: 250 }, (_, idx) => ({
name: `model-${idx}`,
modified_at: "",
size: 1,
digest: "",
}));
const fetchMock = vi.fn(async (url: string) => {
if (url.endsWith("/api/tags")) {
return {
ok: true,
json: async () => ({ models: manyModels }),
};
}
return {
ok: true,
json: async () => ({ model_info: { "llama.context_length": 65536 } }),
};
});
vi.stubGlobal("fetch", fetchMock);
try {
const providers = await resolveImplicitProviders({ agentDir });
const models = providers?.ollama?.models ?? [];
// 1 call for /api/tags + 200 capped /api/show calls.
expect(fetchMock).toHaveBeenCalledTimes(201);
expect(models).toHaveLength(200);
} finally {
delete process.env.OLLAMA_API_KEY;
}
});
it("should have correct model structure without streaming override", () => { it("should have correct model structure without streaming override", () => {
const mockOllamaModel = { const mockOllamaModel = {
id: "llama3.3:latest", id: "llama3.3:latest",

View File

@@ -144,6 +144,8 @@ const QWEN_PORTAL_DEFAULT_COST = {
const OLLAMA_BASE_URL = OLLAMA_NATIVE_BASE_URL; const OLLAMA_BASE_URL = OLLAMA_NATIVE_BASE_URL;
const OLLAMA_API_BASE_URL = OLLAMA_BASE_URL; const OLLAMA_API_BASE_URL = OLLAMA_BASE_URL;
const OLLAMA_SHOW_CONCURRENCY = 8;
const OLLAMA_SHOW_MAX_MODELS = 200;
const OLLAMA_DEFAULT_CONTEXT_WINDOW = 128000; const OLLAMA_DEFAULT_CONTEXT_WINDOW = 128000;
const OLLAMA_DEFAULT_MAX_TOKENS = 8192; const OLLAMA_DEFAULT_MAX_TOKENS = 8192;
const OLLAMA_DEFAULT_COST = { const OLLAMA_DEFAULT_COST = {
@@ -236,6 +238,38 @@ export function resolveOllamaApiBase(configuredBaseUrl?: string): string {
return trimmed.replace(/\/v1$/i, ""); return trimmed.replace(/\/v1$/i, "");
} }
async function queryOllamaContextWindow(
apiBase: string,
modelName: string,
): Promise<number | undefined> {
try {
const response = await fetch(`${apiBase}/api/show`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ name: modelName }),
signal: AbortSignal.timeout(3000),
});
if (!response.ok) {
return undefined;
}
const data = (await response.json()) as { model_info?: Record<string, unknown> };
if (!data.model_info) {
return undefined;
}
for (const [key, value] of Object.entries(data.model_info)) {
if (key.endsWith(".context_length") && typeof value === "number" && Number.isFinite(value)) {
const contextWindow = Math.floor(value);
if (contextWindow > 0) {
return contextWindow;
}
}
}
return undefined;
} catch {
return undefined;
}
}
async function discoverOllamaModels( async function discoverOllamaModels(
baseUrl?: string, baseUrl?: string,
opts?: { quiet?: boolean }, opts?: { quiet?: boolean },
@@ -260,20 +294,35 @@ async function discoverOllamaModels(
log.debug("No Ollama models found on local instance"); log.debug("No Ollama models found on local instance");
return []; return [];
} }
return data.models.map((model) => { const modelsToInspect = data.models.slice(0, OLLAMA_SHOW_MAX_MODELS);
const modelId = model.name; if (modelsToInspect.length < data.models.length && !opts?.quiet) {
const isReasoning = log.warn(
modelId.toLowerCase().includes("r1") || modelId.toLowerCase().includes("reasoning"); `Capping Ollama /api/show inspection to ${OLLAMA_SHOW_MAX_MODELS} models (received ${data.models.length})`,
return { );
id: modelId, }
name: modelId, const discovered: ModelDefinitionConfig[] = [];
reasoning: isReasoning, for (let index = 0; index < modelsToInspect.length; index += OLLAMA_SHOW_CONCURRENCY) {
input: ["text"], const batch = modelsToInspect.slice(index, index + OLLAMA_SHOW_CONCURRENCY);
cost: OLLAMA_DEFAULT_COST, const batchDiscovered = await Promise.all(
contextWindow: OLLAMA_DEFAULT_CONTEXT_WINDOW, batch.map(async (model) => {
maxTokens: OLLAMA_DEFAULT_MAX_TOKENS, const modelId = model.name;
}; const contextWindow = await queryOllamaContextWindow(apiBase, modelId);
}); const isReasoning =
modelId.toLowerCase().includes("r1") || modelId.toLowerCase().includes("reasoning");
return {
id: modelId,
name: modelId,
reasoning: isReasoning,
input: ["text"],
cost: OLLAMA_DEFAULT_COST,
contextWindow: contextWindow ?? OLLAMA_DEFAULT_CONTEXT_WINDOW,
maxTokens: OLLAMA_DEFAULT_MAX_TOKENS,
} satisfies ModelDefinitionConfig;
}),
);
discovered.push(...batchDiscovered);
}
return discovered;
} catch (error) { } catch (error) {
if (!opts?.quiet) { if (!opts?.quiet) {
log.warn(`Failed to discover Ollama models: ${String(error)}`); log.warn(`Failed to discover Ollama models: ${String(error)}`);

View File

@@ -15,6 +15,12 @@ type ModelsConfig = NonNullable<OpenClawConfig["models"]>;
const DEFAULT_MODE: NonNullable<ModelsConfig["mode"]> = "merge"; const DEFAULT_MODE: NonNullable<ModelsConfig["mode"]> = "merge";
function resolvePreferredTokenLimit(explicitValue: number, implicitValue: number): number {
// Keep catalog refresh behavior for stale low values while preserving
// intentional larger user overrides (for example Ollama >128k contexts).
return explicitValue > implicitValue ? explicitValue : implicitValue;
}
function mergeProviderModels(implicit: ProviderConfig, explicit: ProviderConfig): ProviderConfig { function mergeProviderModels(implicit: ProviderConfig, explicit: ProviderConfig): ProviderConfig {
const implicitModels = Array.isArray(implicit.models) ? implicit.models : []; const implicitModels = Array.isArray(implicit.models) ? implicit.models : [];
const explicitModels = Array.isArray(explicit.models) ? explicit.models : []; const explicitModels = Array.isArray(explicit.models) ? explicit.models : [];
@@ -55,8 +61,11 @@ function mergeProviderModels(implicit: ProviderConfig, explicit: ProviderConfig)
...explicitModel, ...explicitModel,
input: implicitModel.input, input: implicitModel.input,
reasoning: "reasoning" in explicitModel ? explicitModel.reasoning : implicitModel.reasoning, reasoning: "reasoning" in explicitModel ? explicitModel.reasoning : implicitModel.reasoning,
contextWindow: implicitModel.contextWindow, contextWindow: resolvePreferredTokenLimit(
maxTokens: implicitModel.maxTokens, explicitModel.contextWindow,
implicitModel.contextWindow,
),
maxTokens: resolvePreferredTokenLimit(explicitModel.maxTokens, implicitModel.maxTokens),
}; };
}); });

View File

@@ -171,6 +171,35 @@ describe("resolveModel", () => {
expect(result.model?.id).toBe("missing-model"); expect(result.model?.id).toBe("missing-model");
}); });
it("prefers matching configured model metadata for fallback token limits", () => {
const cfg = {
models: {
providers: {
custom: {
baseUrl: "http://localhost:9000",
models: [
{
...makeModel("model-a"),
contextWindow: 4096,
maxTokens: 1024,
},
{
...makeModel("model-b"),
contextWindow: 262144,
maxTokens: 32768,
},
],
},
},
},
} as OpenClawConfig;
const result = resolveModel("custom", "model-b", "/tmp/agent", cfg);
expect(result.model?.contextWindow).toBe(262144);
expect(result.model?.maxTokens).toBe(32768);
});
it("builds an openai-codex fallback for gpt-5.3-codex", () => { it("builds an openai-codex fallback for gpt-5.3-codex", () => {
mockOpenAICodexTemplateModel(); mockOpenAICodexTemplateModel();

View File

@@ -96,6 +96,7 @@ export function resolveModel(
} }
const providerCfg = providers[provider]; const providerCfg = providers[provider];
if (providerCfg || modelId.startsWith("mock-")) { if (providerCfg || modelId.startsWith("mock-")) {
const configuredModel = providerCfg?.models?.find((candidate) => candidate.id === modelId);
const fallbackModel: Model<Api> = normalizeModelCompat({ const fallbackModel: Model<Api> = normalizeModelCompat({
id: modelId, id: modelId,
name: modelId, name: modelId,
@@ -105,8 +106,14 @@ export function resolveModel(
reasoning: false, reasoning: false,
input: ["text"], input: ["text"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: providerCfg?.models?.[0]?.contextWindow ?? DEFAULT_CONTEXT_TOKENS, contextWindow:
maxTokens: providerCfg?.models?.[0]?.maxTokens ?? DEFAULT_CONTEXT_TOKENS, configuredModel?.contextWindow ??
providerCfg?.models?.[0]?.contextWindow ??
DEFAULT_CONTEXT_TOKENS,
maxTokens:
configuredModel?.maxTokens ??
providerCfg?.models?.[0]?.maxTokens ??
DEFAULT_CONTEXT_TOKENS,
} as Model<Api>); } as Model<Api>);
return { model: fallbackModel, authStorage, modelRegistry }; return { model: fallbackModel, authStorage, modelRegistry };
} }

View File

@@ -1,9 +1,13 @@
import { describe, expect, it, vi } from "vitest"; import { describe, expect, it, vi } from "vitest";
import type { OpenClawConfig } from "../../../config/config.js"; import type { OpenClawConfig } from "../../../config/config.js";
import { import {
isOllamaCompatProvider,
resolveAttemptFsWorkspaceOnly, resolveAttemptFsWorkspaceOnly,
resolveOllamaCompatNumCtxEnabled,
resolvePromptBuildHookResult, resolvePromptBuildHookResult,
resolvePromptModeForSession, resolvePromptModeForSession,
shouldInjectOllamaCompatNumCtx,
wrapOllamaCompatNumCtx,
wrapStreamFnTrimToolCallNames, wrapStreamFnTrimToolCallNames,
} from "./attempt.js"; } from "./attempt.js";
@@ -174,3 +178,159 @@ describe("wrapStreamFnTrimToolCallNames", () => {
expect(baseFn).toHaveBeenCalledTimes(1); expect(baseFn).toHaveBeenCalledTimes(1);
}); });
}); });
describe("isOllamaCompatProvider", () => {
it("detects native ollama provider id", () => {
expect(
isOllamaCompatProvider({
provider: "ollama",
api: "openai-completions",
baseUrl: "https://example.com/v1",
}),
).toBe(true);
});
it("detects localhost Ollama OpenAI-compatible endpoint", () => {
expect(
isOllamaCompatProvider({
provider: "custom",
api: "openai-completions",
baseUrl: "http://127.0.0.1:11434/v1",
}),
).toBe(true);
});
it("does not misclassify non-local OpenAI-compatible providers", () => {
expect(
isOllamaCompatProvider({
provider: "custom",
api: "openai-completions",
baseUrl: "https://api.openrouter.ai/v1",
}),
).toBe(false);
});
it("detects remote Ollama-compatible endpoint when provider id hints ollama", () => {
expect(
isOllamaCompatProvider({
provider: "my-ollama",
api: "openai-completions",
baseUrl: "http://ollama-host:11434/v1",
}),
).toBe(true);
});
it("detects IPv6 loopback Ollama OpenAI-compatible endpoint", () => {
expect(
isOllamaCompatProvider({
provider: "custom",
api: "openai-completions",
baseUrl: "http://[::1]:11434/v1",
}),
).toBe(true);
});
it("does not classify arbitrary remote hosts on 11434 without ollama provider hint", () => {
expect(
isOllamaCompatProvider({
provider: "custom",
api: "openai-completions",
baseUrl: "http://example.com:11434/v1",
}),
).toBe(false);
});
});
describe("wrapOllamaCompatNumCtx", () => {
it("injects num_ctx and preserves downstream onPayload hooks", () => {
let payloadSeen: Record<string, unknown> | undefined;
const baseFn = vi.fn((_model, _context, options) => {
const payload: Record<string, unknown> = { options: { temperature: 0.1 } };
options?.onPayload?.(payload);
payloadSeen = payload;
return {} as never;
});
const downstream = vi.fn();
const wrapped = wrapOllamaCompatNumCtx(baseFn as never, 202752);
void wrapped({} as never, {} as never, { onPayload: downstream } as never);
expect(baseFn).toHaveBeenCalledTimes(1);
expect((payloadSeen?.options as Record<string, unknown> | undefined)?.num_ctx).toBe(202752);
expect(downstream).toHaveBeenCalledTimes(1);
});
});
describe("resolveOllamaCompatNumCtxEnabled", () => {
it("defaults to true when config is missing", () => {
expect(resolveOllamaCompatNumCtxEnabled({ providerId: "ollama" })).toBe(true);
});
it("defaults to true when provider config is missing", () => {
expect(
resolveOllamaCompatNumCtxEnabled({
config: { models: { providers: {} } },
providerId: "ollama",
}),
).toBe(true);
});
it("returns false when provider flag is explicitly disabled", () => {
expect(
resolveOllamaCompatNumCtxEnabled({
config: {
models: {
providers: {
ollama: {
baseUrl: "http://127.0.0.1:11434/v1",
api: "openai-completions",
injectNumCtxForOpenAICompat: false,
models: [],
},
},
},
},
providerId: "ollama",
}),
).toBe(false);
});
});
describe("shouldInjectOllamaCompatNumCtx", () => {
it("requires openai-completions adapter", () => {
expect(
shouldInjectOllamaCompatNumCtx({
model: {
provider: "ollama",
api: "openai-responses",
baseUrl: "http://127.0.0.1:11434/v1",
},
}),
).toBe(false);
});
it("respects provider flag disablement", () => {
expect(
shouldInjectOllamaCompatNumCtx({
model: {
provider: "ollama",
api: "openai-completions",
baseUrl: "http://127.0.0.1:11434/v1",
},
config: {
models: {
providers: {
ollama: {
baseUrl: "http://127.0.0.1:11434/v1",
api: "openai-completions",
injectNumCtxForOpenAICompat: false,
models: [],
},
},
},
},
providerId: "ollama",
}),
).toBe(false);
});
});

View File

@@ -40,7 +40,7 @@ import { resolveOpenClawDocsPath } from "../../docs-path.js";
import { isTimeoutError } from "../../failover-error.js"; import { isTimeoutError } from "../../failover-error.js";
import { resolveImageSanitizationLimits } from "../../image-sanitization.js"; import { resolveImageSanitizationLimits } from "../../image-sanitization.js";
import { resolveModelAuthMode } from "../../model-auth.js"; import { resolveModelAuthMode } from "../../model-auth.js";
import { resolveDefaultModelForAgent } from "../../model-selection.js"; import { normalizeProviderId, resolveDefaultModelForAgent } from "../../model-selection.js";
import { createOllamaStreamFn, OLLAMA_NATIVE_BASE_URL } from "../../ollama-stream.js"; import { createOllamaStreamFn, OLLAMA_NATIVE_BASE_URL } from "../../ollama-stream.js";
import { resolveOwnerDisplaySetting } from "../../owner-display.js"; import { resolveOwnerDisplaySetting } from "../../owner-display.js";
import { import {
@@ -127,6 +127,104 @@ type PromptBuildHookRunner = {
) => Promise<PluginHookBeforeAgentStartResult | undefined>; ) => Promise<PluginHookBeforeAgentStartResult | undefined>;
}; };
export function isOllamaCompatProvider(model: {
provider?: string;
baseUrl?: string;
api?: string;
}): boolean {
const providerId = normalizeProviderId(model.provider ?? "");
if (providerId === "ollama") {
return true;
}
if (!model.baseUrl) {
return false;
}
try {
const parsed = new URL(model.baseUrl);
const hostname = parsed.hostname.toLowerCase();
const isLocalhost =
hostname === "localhost" ||
hostname === "127.0.0.1" ||
hostname === "::1" ||
hostname === "[::1]";
if (isLocalhost && parsed.port === "11434") {
return true;
}
// Allow remote/LAN Ollama OpenAI-compatible endpoints when the provider id
// itself indicates Ollama usage (e.g. "my-ollama").
const providerHintsOllama = providerId.includes("ollama");
const isOllamaPort = parsed.port === "11434";
const isOllamaCompatPath = parsed.pathname === "/" || /^\/v1\/?$/i.test(parsed.pathname);
return providerHintsOllama && isOllamaPort && isOllamaCompatPath;
} catch {
return false;
}
}
export function resolveOllamaCompatNumCtxEnabled(params: {
config?: OpenClawConfig;
providerId?: string;
}): boolean {
const providerId = params.providerId?.trim();
if (!providerId) {
return true;
}
const providers = params.config?.models?.providers;
if (!providers) {
return true;
}
const direct = providers[providerId];
if (direct) {
return direct.injectNumCtxForOpenAICompat ?? true;
}
const normalized = normalizeProviderId(providerId);
for (const [candidateId, candidate] of Object.entries(providers)) {
if (normalizeProviderId(candidateId) === normalized) {
return candidate.injectNumCtxForOpenAICompat ?? true;
}
}
return true;
}
export function shouldInjectOllamaCompatNumCtx(params: {
model: { api?: string; provider?: string; baseUrl?: string };
config?: OpenClawConfig;
providerId?: string;
}): boolean {
// Restrict to the OpenAI-compatible adapter path only.
if (params.model.api !== "openai-completions") {
return false;
}
if (!isOllamaCompatProvider(params.model)) {
return false;
}
return resolveOllamaCompatNumCtxEnabled({
config: params.config,
providerId: params.providerId,
});
}
export function wrapOllamaCompatNumCtx(baseFn: StreamFn | undefined, numCtx: number): StreamFn {
const streamFn = baseFn ?? streamSimple;
return (model, context, options) =>
streamFn(model, context, {
...options,
onPayload: (payload: unknown) => {
if (!payload || typeof payload !== "object") {
options?.onPayload?.(payload);
return;
}
const payloadRecord = payload as Record<string, unknown>;
if (!payloadRecord.options || typeof payloadRecord.options !== "object") {
payloadRecord.options = {};
}
(payloadRecord.options as Record<string, unknown>).num_ctx = numCtx;
options?.onPayload?.(payload);
},
});
}
function trimWhitespaceFromToolCallNamesInMessage(message: unknown): void { function trimWhitespaceFromToolCallNamesInMessage(message: unknown): void {
if (!message || typeof message !== "object") { if (!message || typeof message !== "object") {
return; return;
@@ -773,6 +871,27 @@ export async function runEmbeddedAttempt(
activeSession.agent.streamFn = streamSimple; activeSession.agent.streamFn = streamSimple;
} }
// Ollama with OpenAI-compatible API needs num_ctx in payload.options.
// Otherwise Ollama defaults to a 4096 context window.
const providerIdForNumCtx =
typeof params.model.provider === "string" && params.model.provider.trim().length > 0
? params.model.provider
: params.provider;
const shouldInjectNumCtx = shouldInjectOllamaCompatNumCtx({
model: params.model,
config: params.config,
providerId: providerIdForNumCtx,
});
if (shouldInjectNumCtx) {
const numCtx = Math.max(
1,
Math.floor(
params.model.contextWindow ?? params.model.maxTokens ?? DEFAULT_CONTEXT_TOKENS,
),
);
activeSession.agent.streamFn = wrapOllamaCompatNumCtx(activeSession.agent.streamFn, numCtx);
}
applyExtraParamsToAgent( applyExtraParamsToAgent(
activeSession.agent, activeSession.agent,
params.config, params.config,

View File

@@ -630,7 +630,7 @@ export const FIELD_HELP: Record<string, string> = {
models: models:
"Model catalog root for provider definitions, merge/replace behavior, and optional Bedrock discovery integration. Keep provider definitions explicit and validated before relying on production failover paths.", "Model catalog root for provider definitions, merge/replace behavior, and optional Bedrock discovery integration. Keep provider definitions explicit and validated before relying on production failover paths.",
"models.mode": "models.mode":
'Controls provider catalog behavior: "merge" keeps built-ins and overlays your custom providers, while "replace" uses only your configured providers. In "merge", matching provider IDs preserve non-empty agent models.json apiKey/baseUrl values and fall back to config when agent values are empty or missing.', 'Controls provider catalog behavior: "merge" keeps built-ins and overlays your custom providers, while "replace" uses only your configured providers. In "merge", matching provider IDs preserve non-empty agent models.json apiKey/baseUrl values and fall back to config when agent values are empty or missing; matching model contextWindow/maxTokens use the higher value between explicit and implicit entries.',
"models.providers": "models.providers":
"Provider map keyed by provider ID containing connection/auth settings and concrete model definitions. Use stable provider keys so references from agents and tooling remain portable across environments.", "Provider map keyed by provider ID containing connection/auth settings and concrete model definitions. Use stable provider keys so references from agents and tooling remain portable across environments.",
"models.providers.*.baseUrl": "models.providers.*.baseUrl":
@@ -641,6 +641,8 @@ export const FIELD_HELP: Record<string, string> = {
'Selects provider auth style: "api-key" for API key auth, "token" for bearer token auth, "oauth" for OAuth credentials, and "aws-sdk" for AWS credential resolution. Match this to your provider requirements.', 'Selects provider auth style: "api-key" for API key auth, "token" for bearer token auth, "oauth" for OAuth credentials, and "aws-sdk" for AWS credential resolution. Match this to your provider requirements.',
"models.providers.*.api": "models.providers.*.api":
"Provider API adapter selection controlling request/response compatibility handling for model calls. Use the adapter that matches your upstream provider protocol to avoid feature mismatch.", "Provider API adapter selection controlling request/response compatibility handling for model calls. Use the adapter that matches your upstream provider protocol to avoid feature mismatch.",
"models.providers.*.injectNumCtxForOpenAICompat":
"Controls whether OpenClaw injects `options.num_ctx` for Ollama providers configured with the OpenAI-compatible adapter (`openai-completions`). Default is true. Set false only if your proxy/upstream rejects unknown `options` payload fields.",
"models.providers.*.headers": "models.providers.*.headers":
"Static HTTP headers merged into provider requests for tenant routing, proxy auth, or custom gateway requirements. Use this sparingly and keep sensitive header values in secrets.", "Static HTTP headers merged into provider requests for tenant routing, proxy auth, or custom gateway requirements. Use this sparingly and keep sensitive header values in secrets.",
"models.providers.*.authHeader": "models.providers.*.authHeader":

View File

@@ -378,6 +378,7 @@ export const FIELD_LABELS: Record<string, string> = {
"models.providers.*.apiKey": "Model Provider API Key", "models.providers.*.apiKey": "Model Provider API Key",
"models.providers.*.auth": "Model Provider Auth Mode", "models.providers.*.auth": "Model Provider Auth Mode",
"models.providers.*.api": "Model Provider API Adapter", "models.providers.*.api": "Model Provider API Adapter",
"models.providers.*.injectNumCtxForOpenAICompat": "Model Provider Inject num_ctx (OpenAI Compat)",
"models.providers.*.headers": "Model Provider Headers", "models.providers.*.headers": "Model Provider Headers",
"models.providers.*.authHeader": "Model Provider Authorization Header", "models.providers.*.authHeader": "Model Provider Authorization Header",
"models.providers.*.models": "Model Provider Model List", "models.providers.*.models": "Model Provider Model List",

View File

@@ -52,6 +52,7 @@ export type ModelProviderConfig = {
apiKey?: SecretInput; apiKey?: SecretInput;
auth?: ModelProviderAuthMode; auth?: ModelProviderAuthMode;
api?: ModelApi; api?: ModelApi;
injectNumCtxForOpenAICompat?: boolean;
headers?: Record<string, string>; headers?: Record<string, string>;
authHeader?: boolean; authHeader?: boolean;
models: ModelDefinitionConfig[]; models: ModelDefinitionConfig[];

View File

@@ -232,6 +232,7 @@ export const ModelProviderSchema = z
.union([z.literal("api-key"), z.literal("aws-sdk"), z.literal("oauth"), z.literal("token")]) .union([z.literal("api-key"), z.literal("aws-sdk"), z.literal("oauth"), z.literal("token")])
.optional(), .optional(),
api: ModelApiSchema.optional(), api: ModelApiSchema.optional(),
injectNumCtxForOpenAICompat: z.boolean().optional(),
headers: z.record(z.string(), z.string()).optional(), headers: z.record(z.string(), z.string()).optional(),
authHeader: z.boolean().optional(), authHeader: z.boolean().optional(),
models: z.array(ModelDefinitionSchema), models: z.array(ModelDefinitionSchema),