fix(ollama): unify context window handling across discovery, merge, and OpenAI-compat transport (#29205)

* fix(ollama): inject num_ctx for OpenAI-compatible transport * fix(ollama): discover per-model context and preserve higher limits * fix(agents): prefer matching provider model for fallback limits * fix(types): require numeric token limits in provider model merge * fix(types): accept unknown payload in ollama num_ctx wrapper * fix(types): simplify ollama settled-result extraction * config(models): add provider flag for Ollama OpenAI num_ctx injection * config(schema): allow provider num_ctx injection flag * config(labels): label provider num_ctx injection flag * config(help): document provider num_ctx injection flag * agents(ollama): gate OpenAI num_ctx injection with provider config * tests(ollama): cover provider num_ctx injection flag behavior * docs(config): list provider num_ctx injection option * docs(ollama): document OpenAI num_ctx injection toggle * docs(config): clarify merge token-limit precedence * config(help): note merge uses higher model token limits * fix(ollama): cap /api/show discovery concurrency * fix(ollama): restrict num_ctx injection to OpenAI compat * tests(ollama): cover ipv6 and compat num_ctx gating * fix(ollama): detect remote compat endpoints for ollama-labeled providers * fix(ollama): cap per-model /api/show lookups to bound discovery load
2026-07-02 05:33:32 +00:00 · 2026-02-27 17:20:47 -08:00
parent 70a4f25ab1
commit f16ecd1dac
14 changed files with 582 additions and 21 deletions
--- a/docs/gateway/configuration-reference.md
+++ b/docs/gateway/configuration-reference.md
@@ -1863,6 +1863,7 @@ OpenClaw uses the pi-coding-agent model catalog. Add custom providers via `model
 - Merge precedence for matching provider IDs:
  - Non-empty agent `models.json` `apiKey`/`baseUrl` win.
  - Empty or missing agent `apiKey`/`baseUrl` fall back to `models.providers` in config.
+  - Matching model `contextWindow`/`maxTokens` use the higher value between explicit config and implicit catalog values.
  - Use `models.mode: "replace"` when you want config to fully rewrite `models.json`.

 ### Provider field details
@@ -1872,6 +1873,7 @@ OpenClaw uses the pi-coding-agent model catalog. Add custom providers via `model
 - `models.providers.*.api`: request adapter (`openai-completions`, `openai-responses`, `anthropic-messages`, `google-generative-ai`, etc).
 - `models.providers.*.apiKey`: provider credential (prefer SecretRef/env substitution).
 - `models.providers.*.auth`: auth strategy (`api-key`, `token`, `oauth`, `aws-sdk`).
+- `models.providers.*.injectNumCtxForOpenAICompat`: for Ollama + `openai-completions`, inject `options.num_ctx` into requests (default: `true`).
 - `models.providers.*.authHeader`: force credential transport in the `Authorization` header when required.
 - `models.providers.*.baseUrl`: upstream API base URL.
 - `models.providers.*.headers`: extra static headers for proxy/tenant routing.
--- a/docs/providers/ollama.md
+++ b/docs/providers/ollama.md
@@ -199,6 +199,7 @@ If you need to use the OpenAI-compatible endpoint instead (e.g., behind a proxy
      ollama: {
        baseUrl: "http://ollama-host:11434/v1",
        api: "openai-completions",
+        injectNumCtxForOpenAICompat: true, // default: true
        apiKey: "ollama-local",
        models: [...]
      }
@@ -209,6 +210,24 @@ If you need to use the OpenAI-compatible endpoint instead (e.g., behind a proxy

 This mode may not support streaming + tool calling simultaneously. You may need to disable streaming with `params: { streaming: false }` in model config.

+When `api: "openai-completions"` is used with Ollama, OpenClaw injects `options.num_ctx` by default so Ollama does not silently fall back to a 4096 context window. If your proxy/upstream rejects unknown `options` fields, disable this behavior:
+
+```json5
+{
+  models: {
+    providers: {
+      ollama: {
+        baseUrl: "http://ollama-host:11434/v1",
+        api: "openai-completions",
+        injectNumCtxForOpenAICompat: false,
+        apiKey: "ollama-local",
+        models: [...]
+      }
+    }
+  }
+}
+```
+
 ### Context windows

 For auto-discovered models, OpenClaw uses the context window reported by Ollama when available, otherwise it defaults to `8192`. You can override `contextWindow` and `maxTokens` in explicit provider config.
--- a/src/agents/models-config.fills-missing-provider-apikey-from-env-var.test.ts
+++ b/src/agents/models-config.fills-missing-provider-apikey-from-env-var.test.ts
@@ -307,4 +307,57 @@ describe("models-config", () => {
      }
    });
  });
+
+  it("preserves explicit larger token limits when they exceed implicit catalog defaults", async () => {
+    await withTempHome(async () => {
+      const prevKey = process.env.MOONSHOT_API_KEY;
+      process.env.MOONSHOT_API_KEY = "sk-moonshot-test";
+      try {
+        const cfg: OpenClawConfig = {
+          models: {
+            providers: {
+              moonshot: {
+                baseUrl: "https://api.moonshot.ai/v1",
+                api: "openai-completions",
+                models: [
+                  {
+                    id: "kimi-k2.5",
+                    name: "Kimi K2.5",
+                    reasoning: false,
+                    input: ["text"],
+                    cost: { input: 123, output: 456, cacheRead: 0, cacheWrite: 0 },
+                    contextWindow: 350000,
+                    maxTokens: 16384,
+                  },
+                ],
+              },
+            },
+          },
+        };
+
+        await ensureOpenClawModelsJson(cfg);
+        const parsed = await readGeneratedModelsJson<{
+          providers: Record<
+            string,
+            {
+              models?: Array<{
+                id: string;
+                contextWindow?: number;
+                maxTokens?: number;
+              }>;
+            }
+          >;
+        }>();
+        const kimi = parsed.providers.moonshot?.models?.find((model) => model.id === "kimi-k2.5");
+        expect(kimi?.contextWindow).toBe(350000);
+        expect(kimi?.maxTokens).toBe(16384);
+      } finally {
+        if (prevKey === undefined) {
+          delete process.env.MOONSHOT_API_KEY;
+        } else {
+          process.env.MOONSHOT_API_KEY = prevKey;
+        }
+      }
+    });
+  });
 });
--- a/src/agents/models-config.providers.ollama.test.ts
+++ b/src/agents/models-config.providers.ollama.test.ts
@@ -1,9 +1,14 @@
 import { mkdtempSync } from "node:fs";
 import { tmpdir } from "node:os";
 import { join } from "node:path";
-import { describe, expect, it } from "vitest";
+import { afterEach, describe, expect, it, vi } from "vitest";
 import { resolveImplicitProviders, resolveOllamaApiBase } from "./models-config.providers.js";

+afterEach(() => {
+  vi.unstubAllEnvs();
+  vi.unstubAllGlobals();
+});
+
 describe("resolveOllamaApiBase", () => {
  it("returns default localhost base when no configured URL is provided", () => {
    expect(resolveOllamaApiBase()).toBe("http://127.0.0.1:11434");
@@ -71,6 +76,110 @@ describe("Ollama provider", () => {
    }
  });

+  it("discovers per-model context windows from /api/show", async () => {
+    const agentDir = mkdtempSync(join(tmpdir(), "openclaw-test-"));
+    process.env.OLLAMA_API_KEY = "test-key";
+    vi.stubEnv("VITEST", "");
+    vi.stubEnv("NODE_ENV", "development");
+    const fetchMock = vi
+      .fn()
+      .mockResolvedValueOnce({
+        ok: true,
+        json: async () => ({
+          models: [
+            { name: "qwen3:32b", modified_at: "", size: 1, digest: "" },
+            { name: "llama3.3:70b", modified_at: "", size: 1, digest: "" },
+          ],
+        }),
+      })
+      .mockResolvedValueOnce({
+        ok: true,
+        json: async () => ({ model_info: { "qwen3.context_length": 131072 } }),
+      })
+      .mockResolvedValueOnce({
+        ok: true,
+        json: async () => ({ model_info: { "llama.context_length": 65536 } }),
+      });
+    vi.stubGlobal("fetch", fetchMock);
+
+    try {
+      const providers = await resolveImplicitProviders({ agentDir });
+      const models = providers?.ollama?.models ?? [];
+      const qwen = models.find((model) => model.id === "qwen3:32b");
+      const llama = models.find((model) => model.id === "llama3.3:70b");
+      expect(qwen?.contextWindow).toBe(131072);
+      expect(llama?.contextWindow).toBe(65536);
+      expect(fetchMock).toHaveBeenCalledTimes(3);
+    } finally {
+      delete process.env.OLLAMA_API_KEY;
+    }
+  });
+
+  it("falls back to default context window when /api/show fails", async () => {
+    const agentDir = mkdtempSync(join(tmpdir(), "openclaw-test-"));
+    process.env.OLLAMA_API_KEY = "test-key";
+    vi.stubEnv("VITEST", "");
+    vi.stubEnv("NODE_ENV", "development");
+    const fetchMock = vi
+      .fn()
+      .mockResolvedValueOnce({
+        ok: true,
+        json: async () => ({
+          models: [{ name: "qwen3:32b", modified_at: "", size: 1, digest: "" }],
+        }),
+      })
+      .mockResolvedValueOnce({
+        ok: false,
+        status: 500,
+      });
+    vi.stubGlobal("fetch", fetchMock);
+
+    try {
+      const providers = await resolveImplicitProviders({ agentDir });
+      const model = providers?.ollama?.models?.find((entry) => entry.id === "qwen3:32b");
+      expect(model?.contextWindow).toBe(128000);
+      expect(fetchMock).toHaveBeenCalledTimes(2);
+    } finally {
+      delete process.env.OLLAMA_API_KEY;
+    }
+  });
+
+  it("caps /api/show requests when /api/tags returns a very large model list", async () => {
+    const agentDir = mkdtempSync(join(tmpdir(), "openclaw-test-"));
+    process.env.OLLAMA_API_KEY = "test-key";
+    vi.stubEnv("VITEST", "");
+    vi.stubEnv("NODE_ENV", "development");
+    const manyModels = Array.from({ length: 250 }, (_, idx) => ({
+      name: `model-${idx}`,
+      modified_at: "",
+      size: 1,
+      digest: "",
+    }));
+    const fetchMock = vi.fn(async (url: string) => {
+      if (url.endsWith("/api/tags")) {
+        return {
+          ok: true,
+          json: async () => ({ models: manyModels }),
+        };
+      }
+      return {
+        ok: true,
+        json: async () => ({ model_info: { "llama.context_length": 65536 } }),
+      };
+    });
+    vi.stubGlobal("fetch", fetchMock);
+
+    try {
+      const providers = await resolveImplicitProviders({ agentDir });
+      const models = providers?.ollama?.models ?? [];
+      // 1 call for /api/tags + 200 capped /api/show calls.
+      expect(fetchMock).toHaveBeenCalledTimes(201);
+      expect(models).toHaveLength(200);
+    } finally {
+      delete process.env.OLLAMA_API_KEY;
+    }
+  });
+
  it("should have correct model structure without streaming override", () => {
    const mockOllamaModel = {
      id: "llama3.3:latest",
--- a/src/agents/models-config.providers.ts
+++ b/src/agents/models-config.providers.ts
@@ -144,6 +144,8 @@ const QWEN_PORTAL_DEFAULT_COST = {

 const OLLAMA_BASE_URL = OLLAMA_NATIVE_BASE_URL;
 const OLLAMA_API_BASE_URL = OLLAMA_BASE_URL;
+const OLLAMA_SHOW_CONCURRENCY = 8;
+const OLLAMA_SHOW_MAX_MODELS = 200;
 const OLLAMA_DEFAULT_CONTEXT_WINDOW = 128000;
 const OLLAMA_DEFAULT_MAX_TOKENS = 8192;
 const OLLAMA_DEFAULT_COST = {
@@ -236,6 +238,38 @@ export function resolveOllamaApiBase(configuredBaseUrl?: string): string {
  return trimmed.replace(/\/v1$/i, "");
 }

+async function queryOllamaContextWindow(
+  apiBase: string,
+  modelName: string,
+): Promise<number | undefined> {
+  try {
+    const response = await fetch(`${apiBase}/api/show`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({ name: modelName }),
+      signal: AbortSignal.timeout(3000),
+    });
+    if (!response.ok) {
+      return undefined;
+    }
+    const data = (await response.json()) as { model_info?: Record<string, unknown> };
+    if (!data.model_info) {
+      return undefined;
+    }
+    for (const [key, value] of Object.entries(data.model_info)) {
+      if (key.endsWith(".context_length") && typeof value === "number" && Number.isFinite(value)) {
+        const contextWindow = Math.floor(value);
+        if (contextWindow > 0) {
+          return contextWindow;
+        }
+      }
+    }
+    return undefined;
+  } catch {
+    return undefined;
+  }
+}
+
 async function discoverOllamaModels(
  baseUrl?: string,
  opts?: { quiet?: boolean },
@@ -260,20 +294,35 @@ async function discoverOllamaModels(
      log.debug("No Ollama models found on local instance");
      return [];
    }
-    return data.models.map((model) => {
-      const modelId = model.name;
-      const isReasoning =
-        modelId.toLowerCase().includes("r1") || modelId.toLowerCase().includes("reasoning");
-      return {
-        id: modelId,
-        name: modelId,
-        reasoning: isReasoning,
-        input: ["text"],
-        cost: OLLAMA_DEFAULT_COST,
-        contextWindow: OLLAMA_DEFAULT_CONTEXT_WINDOW,
-        maxTokens: OLLAMA_DEFAULT_MAX_TOKENS,
-      };
-    });
+    const modelsToInspect = data.models.slice(0, OLLAMA_SHOW_MAX_MODELS);
+    if (modelsToInspect.length < data.models.length && !opts?.quiet) {
+      log.warn(
+        `Capping Ollama /api/show inspection to ${OLLAMA_SHOW_MAX_MODELS} models (received ${data.models.length})`,
+      );
+    }
+    const discovered: ModelDefinitionConfig[] = [];
+    for (let index = 0; index < modelsToInspect.length; index += OLLAMA_SHOW_CONCURRENCY) {
+      const batch = modelsToInspect.slice(index, index + OLLAMA_SHOW_CONCURRENCY);
+      const batchDiscovered = await Promise.all(
+        batch.map(async (model) => {
+          const modelId = model.name;
+          const contextWindow = await queryOllamaContextWindow(apiBase, modelId);
+          const isReasoning =
+            modelId.toLowerCase().includes("r1") || modelId.toLowerCase().includes("reasoning");
+          return {
+            id: modelId,
+            name: modelId,
+            reasoning: isReasoning,
+            input: ["text"],
+            cost: OLLAMA_DEFAULT_COST,
+            contextWindow: contextWindow ?? OLLAMA_DEFAULT_CONTEXT_WINDOW,
+            maxTokens: OLLAMA_DEFAULT_MAX_TOKENS,
+          } satisfies ModelDefinitionConfig;
+        }),
+      );
+      discovered.push(...batchDiscovered);
+    }
+    return discovered;
  } catch (error) {
    if (!opts?.quiet) {
      log.warn(`Failed to discover Ollama models: ${String(error)}`);
--- a/src/agents/models-config.ts
+++ b/src/agents/models-config.ts
@@ -15,6 +15,12 @@ type ModelsConfig = NonNullable<OpenClawConfig["models"]>;

 const DEFAULT_MODE: NonNullable<ModelsConfig["mode"]> = "merge";

+function resolvePreferredTokenLimit(explicitValue: number, implicitValue: number): number {
+  // Keep catalog refresh behavior for stale low values while preserving
+  // intentional larger user overrides (for example Ollama >128k contexts).
+  return explicitValue > implicitValue ? explicitValue : implicitValue;
+}
+
 function mergeProviderModels(implicit: ProviderConfig, explicit: ProviderConfig): ProviderConfig {
  const implicitModels = Array.isArray(implicit.models) ? implicit.models : [];
  const explicitModels = Array.isArray(explicit.models) ? explicit.models : [];
@@ -55,8 +61,11 @@ function mergeProviderModels(implicit: ProviderConfig, explicit: ProviderConfig)
      ...explicitModel,
      input: implicitModel.input,
      reasoning: "reasoning" in explicitModel ? explicitModel.reasoning : implicitModel.reasoning,
-      contextWindow: implicitModel.contextWindow,
-      maxTokens: implicitModel.maxTokens,
+      contextWindow: resolvePreferredTokenLimit(
+        explicitModel.contextWindow,
+        implicitModel.contextWindow,
+      ),
+      maxTokens: resolvePreferredTokenLimit(explicitModel.maxTokens, implicitModel.maxTokens),
    };
  });

--- a/src/agents/pi-embedded-runner/model.test.ts
+++ b/src/agents/pi-embedded-runner/model.test.ts
@@ -171,6 +171,35 @@ describe("resolveModel", () => {
    expect(result.model?.id).toBe("missing-model");
  });

+  it("prefers matching configured model metadata for fallback token limits", () => {
+    const cfg = {
+      models: {
+        providers: {
+          custom: {
+            baseUrl: "http://localhost:9000",
+            models: [
+              {
+                ...makeModel("model-a"),
+                contextWindow: 4096,
+                maxTokens: 1024,
+              },
+              {
+                ...makeModel("model-b"),
+                contextWindow: 262144,
+                maxTokens: 32768,
+              },
+            ],
+          },
+        },
+      },
+    } as OpenClawConfig;
+
+    const result = resolveModel("custom", "model-b", "/tmp/agent", cfg);
+
+    expect(result.model?.contextWindow).toBe(262144);
+    expect(result.model?.maxTokens).toBe(32768);
+  });
+
  it("builds an openai-codex fallback for gpt-5.3-codex", () => {
    mockOpenAICodexTemplateModel();

--- a/src/agents/pi-embedded-runner/model.ts
+++ b/src/agents/pi-embedded-runner/model.ts
@@ -96,6 +96,7 @@ export function resolveModel(
    }
    const providerCfg = providers[provider];
    if (providerCfg || modelId.startsWith("mock-")) {
+      const configuredModel = providerCfg?.models?.find((candidate) => candidate.id === modelId);
      const fallbackModel: Model<Api> = normalizeModelCompat({
        id: modelId,
        name: modelId,
@@ -105,8 +106,14 @@ export function resolveModel(
        reasoning: false,
        input: ["text"],
        cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
-        contextWindow: providerCfg?.models?.[0]?.contextWindow ?? DEFAULT_CONTEXT_TOKENS,
-        maxTokens: providerCfg?.models?.[0]?.maxTokens ?? DEFAULT_CONTEXT_TOKENS,
+        contextWindow:
+          configuredModel?.contextWindow ??
+          providerCfg?.models?.[0]?.contextWindow ??
+          DEFAULT_CONTEXT_TOKENS,
+        maxTokens:
+          configuredModel?.maxTokens ??
+          providerCfg?.models?.[0]?.maxTokens ??
+          DEFAULT_CONTEXT_TOKENS,
      } as Model<Api>);
      return { model: fallbackModel, authStorage, modelRegistry };
    }
--- a/src/agents/pi-embedded-runner/run/attempt.test.ts
+++ b/src/agents/pi-embedded-runner/run/attempt.test.ts
@@ -1,9 +1,13 @@
 import { describe, expect, it, vi } from "vitest";
 import type { OpenClawConfig } from "../../../config/config.js";
 import {
+  isOllamaCompatProvider,
  resolveAttemptFsWorkspaceOnly,
+  resolveOllamaCompatNumCtxEnabled,
  resolvePromptBuildHookResult,
  resolvePromptModeForSession,
+  shouldInjectOllamaCompatNumCtx,
+  wrapOllamaCompatNumCtx,
  wrapStreamFnTrimToolCallNames,
 } from "./attempt.js";

@@ -174,3 +178,159 @@ describe("wrapStreamFnTrimToolCallNames", () => {
    expect(baseFn).toHaveBeenCalledTimes(1);
  });
 });
+
+describe("isOllamaCompatProvider", () => {
+  it("detects native ollama provider id", () => {
+    expect(
+      isOllamaCompatProvider({
+        provider: "ollama",
+        api: "openai-completions",
+        baseUrl: "https://example.com/v1",
+      }),
+    ).toBe(true);
+  });
+
+  it("detects localhost Ollama OpenAI-compatible endpoint", () => {
+    expect(
+      isOllamaCompatProvider({
+        provider: "custom",
+        api: "openai-completions",
+        baseUrl: "http://127.0.0.1:11434/v1",
+      }),
+    ).toBe(true);
+  });
+
+  it("does not misclassify non-local OpenAI-compatible providers", () => {
+    expect(
+      isOllamaCompatProvider({
+        provider: "custom",
+        api: "openai-completions",
+        baseUrl: "https://api.openrouter.ai/v1",
+      }),
+    ).toBe(false);
+  });
+
+  it("detects remote Ollama-compatible endpoint when provider id hints ollama", () => {
+    expect(
+      isOllamaCompatProvider({
+        provider: "my-ollama",
+        api: "openai-completions",
+        baseUrl: "http://ollama-host:11434/v1",
+      }),
+    ).toBe(true);
+  });
+
+  it("detects IPv6 loopback Ollama OpenAI-compatible endpoint", () => {
+    expect(
+      isOllamaCompatProvider({
+        provider: "custom",
+        api: "openai-completions",
+        baseUrl: "http://[::1]:11434/v1",
+      }),
+    ).toBe(true);
+  });
+
+  it("does not classify arbitrary remote hosts on 11434 without ollama provider hint", () => {
+    expect(
+      isOllamaCompatProvider({
+        provider: "custom",
+        api: "openai-completions",
+        baseUrl: "http://example.com:11434/v1",
+      }),
+    ).toBe(false);
+  });
+});
+
+describe("wrapOllamaCompatNumCtx", () => {
+  it("injects num_ctx and preserves downstream onPayload hooks", () => {
+    let payloadSeen: Record<string, unknown> | undefined;
+    const baseFn = vi.fn((_model, _context, options) => {
+      const payload: Record<string, unknown> = { options: { temperature: 0.1 } };
+      options?.onPayload?.(payload);
+      payloadSeen = payload;
+      return {} as never;
+    });
+    const downstream = vi.fn();
+
+    const wrapped = wrapOllamaCompatNumCtx(baseFn as never, 202752);
+    void wrapped({} as never, {} as never, { onPayload: downstream } as never);
+
+    expect(baseFn).toHaveBeenCalledTimes(1);
+    expect((payloadSeen?.options as Record<string, unknown> | undefined)?.num_ctx).toBe(202752);
+    expect(downstream).toHaveBeenCalledTimes(1);
+  });
+});
+
+describe("resolveOllamaCompatNumCtxEnabled", () => {
+  it("defaults to true when config is missing", () => {
+    expect(resolveOllamaCompatNumCtxEnabled({ providerId: "ollama" })).toBe(true);
+  });
+
+  it("defaults to true when provider config is missing", () => {
+    expect(
+      resolveOllamaCompatNumCtxEnabled({
+        config: { models: { providers: {} } },
+        providerId: "ollama",
+      }),
+    ).toBe(true);
+  });
+
+  it("returns false when provider flag is explicitly disabled", () => {
+    expect(
+      resolveOllamaCompatNumCtxEnabled({
+        config: {
+          models: {
+            providers: {
+              ollama: {
+                baseUrl: "http://127.0.0.1:11434/v1",
+                api: "openai-completions",
+                injectNumCtxForOpenAICompat: false,
+                models: [],
+              },
+            },
+          },
+        },
+        providerId: "ollama",
+      }),
+    ).toBe(false);
+  });
+});
+
+describe("shouldInjectOllamaCompatNumCtx", () => {
+  it("requires openai-completions adapter", () => {
+    expect(
+      shouldInjectOllamaCompatNumCtx({
+        model: {
+          provider: "ollama",
+          api: "openai-responses",
+          baseUrl: "http://127.0.0.1:11434/v1",
+        },
+      }),
+    ).toBe(false);
+  });
+
+  it("respects provider flag disablement", () => {
+    expect(
+      shouldInjectOllamaCompatNumCtx({
+        model: {
+          provider: "ollama",
+          api: "openai-completions",
+          baseUrl: "http://127.0.0.1:11434/v1",
+        },
+        config: {
+          models: {
+            providers: {
+              ollama: {
+                baseUrl: "http://127.0.0.1:11434/v1",
+                api: "openai-completions",
+                injectNumCtxForOpenAICompat: false,
+                models: [],
+              },
+            },
+          },
+        },
+        providerId: "ollama",
+      }),
+    ).toBe(false);
+  });
+});
--- a/src/agents/pi-embedded-runner/run/attempt.ts
+++ b/src/agents/pi-embedded-runner/run/attempt.ts
@@ -40,7 +40,7 @@ import { resolveOpenClawDocsPath } from "../../docs-path.js";
 import { isTimeoutError } from "../../failover-error.js";
 import { resolveImageSanitizationLimits } from "../../image-sanitization.js";
 import { resolveModelAuthMode } from "../../model-auth.js";
-import { resolveDefaultModelForAgent } from "../../model-selection.js";
+import { normalizeProviderId, resolveDefaultModelForAgent } from "../../model-selection.js";
 import { createOllamaStreamFn, OLLAMA_NATIVE_BASE_URL } from "../../ollama-stream.js";
 import { resolveOwnerDisplaySetting } from "../../owner-display.js";
 import {
@@ -127,6 +127,104 @@ type PromptBuildHookRunner = {
  ) => Promise<PluginHookBeforeAgentStartResult | undefined>;
 };

+export function isOllamaCompatProvider(model: {
+  provider?: string;
+  baseUrl?: string;
+  api?: string;
+}): boolean {
+  const providerId = normalizeProviderId(model.provider ?? "");
+  if (providerId === "ollama") {
+    return true;
+  }
+  if (!model.baseUrl) {
+    return false;
+  }
+  try {
+    const parsed = new URL(model.baseUrl);
+    const hostname = parsed.hostname.toLowerCase();
+    const isLocalhost =
+      hostname === "localhost" ||
+      hostname === "127.0.0.1" ||
+      hostname === "::1" ||
+      hostname === "[::1]";
+    if (isLocalhost && parsed.port === "11434") {
+      return true;
+    }
+
+    // Allow remote/LAN Ollama OpenAI-compatible endpoints when the provider id
+    // itself indicates Ollama usage (e.g. "my-ollama").
+    const providerHintsOllama = providerId.includes("ollama");
+    const isOllamaPort = parsed.port === "11434";
+    const isOllamaCompatPath = parsed.pathname === "/" || /^\/v1\/?$/i.test(parsed.pathname);
+    return providerHintsOllama && isOllamaPort && isOllamaCompatPath;
+  } catch {
+    return false;
+  }
+}
+
+export function resolveOllamaCompatNumCtxEnabled(params: {
+  config?: OpenClawConfig;
+  providerId?: string;
+}): boolean {
+  const providerId = params.providerId?.trim();
+  if (!providerId) {
+    return true;
+  }
+  const providers = params.config?.models?.providers;
+  if (!providers) {
+    return true;
+  }
+  const direct = providers[providerId];
+  if (direct) {
+    return direct.injectNumCtxForOpenAICompat ?? true;
+  }
+  const normalized = normalizeProviderId(providerId);
+  for (const [candidateId, candidate] of Object.entries(providers)) {
+    if (normalizeProviderId(candidateId) === normalized) {
+      return candidate.injectNumCtxForOpenAICompat ?? true;
+    }
+  }
+  return true;
+}
+
+export function shouldInjectOllamaCompatNumCtx(params: {
+  model: { api?: string; provider?: string; baseUrl?: string };
+  config?: OpenClawConfig;
+  providerId?: string;
+}): boolean {
+  // Restrict to the OpenAI-compatible adapter path only.
+  if (params.model.api !== "openai-completions") {
+    return false;
+  }
+  if (!isOllamaCompatProvider(params.model)) {
+    return false;
+  }
+  return resolveOllamaCompatNumCtxEnabled({
+    config: params.config,
+    providerId: params.providerId,
+  });
+}
+
+export function wrapOllamaCompatNumCtx(baseFn: StreamFn | undefined, numCtx: number): StreamFn {
+  const streamFn = baseFn ?? streamSimple;
+  return (model, context, options) =>
+    streamFn(model, context, {
+      ...options,
+      onPayload: (payload: unknown) => {
+        if (!payload || typeof payload !== "object") {
+          options?.onPayload?.(payload);
+          return;
+        }
+        const payloadRecord = payload as Record<string, unknown>;
+        if (!payloadRecord.options || typeof payloadRecord.options !== "object") {
+          payloadRecord.options = {};
+        }
+        (payloadRecord.options as Record<string, unknown>).num_ctx = numCtx;
+        options?.onPayload?.(payload);
+      },
+    });
+}
+
 function trimWhitespaceFromToolCallNamesInMessage(message: unknown): void {
  if (!message || typeof message !== "object") {
    return;
@@ -773,6 +871,27 @@ export async function runEmbeddedAttempt(
        activeSession.agent.streamFn = streamSimple;
      }

+      // Ollama with OpenAI-compatible API needs num_ctx in payload.options.
+      // Otherwise Ollama defaults to a 4096 context window.
+      const providerIdForNumCtx =
+        typeof params.model.provider === "string" && params.model.provider.trim().length > 0
+          ? params.model.provider
+          : params.provider;
+      const shouldInjectNumCtx = shouldInjectOllamaCompatNumCtx({
+        model: params.model,
+        config: params.config,
+        providerId: providerIdForNumCtx,
+      });
+      if (shouldInjectNumCtx) {
+        const numCtx = Math.max(
+          1,
+          Math.floor(
+            params.model.contextWindow ?? params.model.maxTokens ?? DEFAULT_CONTEXT_TOKENS,
+          ),
+        );
+        activeSession.agent.streamFn = wrapOllamaCompatNumCtx(activeSession.agent.streamFn, numCtx);
+      }
+
      applyExtraParamsToAgent(
        activeSession.agent,
        params.config,
--- a/src/config/schema.help.ts
+++ b/src/config/schema.help.ts
@@ -630,7 +630,7 @@ export const FIELD_HELP: Record<string, string> = {
  models:
    "Model catalog root for provider definitions, merge/replace behavior, and optional Bedrock discovery integration. Keep provider definitions explicit and validated before relying on production failover paths.",
  "models.mode":
-    'Controls provider catalog behavior: "merge" keeps built-ins and overlays your custom providers, while "replace" uses only your configured providers. In "merge", matching provider IDs preserve non-empty agent models.json apiKey/baseUrl values and fall back to config when agent values are empty or missing.',
+    'Controls provider catalog behavior: "merge" keeps built-ins and overlays your custom providers, while "replace" uses only your configured providers. In "merge", matching provider IDs preserve non-empty agent models.json apiKey/baseUrl values and fall back to config when agent values are empty or missing; matching model contextWindow/maxTokens use the higher value between explicit and implicit entries.',
  "models.providers":
    "Provider map keyed by provider ID containing connection/auth settings and concrete model definitions. Use stable provider keys so references from agents and tooling remain portable across environments.",
  "models.providers.*.baseUrl":
@@ -641,6 +641,8 @@ export const FIELD_HELP: Record<string, string> = {
    'Selects provider auth style: "api-key" for API key auth, "token" for bearer token auth, "oauth" for OAuth credentials, and "aws-sdk" for AWS credential resolution. Match this to your provider requirements.',
  "models.providers.*.api":
    "Provider API adapter selection controlling request/response compatibility handling for model calls. Use the adapter that matches your upstream provider protocol to avoid feature mismatch.",
+  "models.providers.*.injectNumCtxForOpenAICompat":
+    "Controls whether OpenClaw injects `options.num_ctx` for Ollama providers configured with the OpenAI-compatible adapter (`openai-completions`). Default is true. Set false only if your proxy/upstream rejects unknown `options` payload fields.",
  "models.providers.*.headers":
    "Static HTTP headers merged into provider requests for tenant routing, proxy auth, or custom gateway requirements. Use this sparingly and keep sensitive header values in secrets.",
  "models.providers.*.authHeader":
--- a/src/config/schema.labels.ts
+++ b/src/config/schema.labels.ts
@@ -378,6 +378,7 @@ export const FIELD_LABELS: Record<string, string> = {
  "models.providers.*.apiKey": "Model Provider API Key",
  "models.providers.*.auth": "Model Provider Auth Mode",
  "models.providers.*.api": "Model Provider API Adapter",
+  "models.providers.*.injectNumCtxForOpenAICompat": "Model Provider Inject num_ctx (OpenAI Compat)",
  "models.providers.*.headers": "Model Provider Headers",
  "models.providers.*.authHeader": "Model Provider Authorization Header",
  "models.providers.*.models": "Model Provider Model List",
--- a/src/config/types.models.ts
+++ b/src/config/types.models.ts
@@ -52,6 +52,7 @@ export type ModelProviderConfig = {
  apiKey?: SecretInput;
  auth?: ModelProviderAuthMode;
  api?: ModelApi;
+  injectNumCtxForOpenAICompat?: boolean;
  headers?: Record<string, string>;
  authHeader?: boolean;
  models: ModelDefinitionConfig[];
--- a/src/config/zod-schema.core.ts
+++ b/src/config/zod-schema.core.ts
@@ -232,6 +232,7 @@ export const ModelProviderSchema = z
      .union([z.literal("api-key"), z.literal("aws-sdk"), z.literal("oauth"), z.literal("token")])
      .optional(),
    api: ModelApiSchema.optional(),
+    injectNumCtxForOpenAICompat: z.boolean().optional(),
    headers: z.record(z.string(), z.string()).optional(),
    authHeader: z.boolean().optional(),
    models: z.array(ModelDefinitionSchema),