fix(ollama): restore catalog-driven num_ctx for native /api/chat

This commit is contained in:
openperf
2026-05-03 09:24:06 +08:00
parent c97552a04c
commit ac404b2648
2 changed files with 89 additions and 5 deletions

View File

@@ -208,7 +208,7 @@ describe("createConfiguredOllamaCompatStreamWrapper", () => {
};
expect(requestBody.think).toBe(false);
expect(requestBody.options?.think).toBeUndefined();
expect(requestBody.options?.num_ctx).toBeUndefined();
expect(requestBody.options?.num_ctx).toBe(131072);
},
);
});
@@ -310,7 +310,7 @@ describe("createConfiguredOllamaCompatStreamWrapper", () => {
};
expect(requestBody.think).toBe("low");
expect(requestBody.options?.think).toBeUndefined();
expect(requestBody.options?.num_ctx).toBeUndefined();
expect(requestBody.options?.num_ctx).toBe(131072);
},
);
});
@@ -405,7 +405,7 @@ describe("createConfiguredOllamaCompatStreamWrapper", () => {
};
expect(requestBody.think).toBe("high");
expect(requestBody.options?.think).toBeUndefined();
expect(requestBody.options?.num_ctx).toBeUndefined();
expect(requestBody.options?.num_ctx).toBe(131072);
},
);
});
@@ -1602,7 +1602,9 @@ describe("createOllamaStreamFn", () => {
if (!requestBody.options) {
throw new Error("Expected Ollama request options");
}
expect(requestBody.options?.num_ctx).toBeUndefined();
// Catalog `contextWindow` flows through as `num_ctx` so the request
// does not silently truncate to Ollama's small Modelfile default.
expect(requestBody.options?.num_ctx).toBe(131072);
expect(requestBody.options.num_predict).toBe(123);
},
);
@@ -1657,6 +1659,60 @@ describe("createOllamaStreamFn", () => {
);
});
it("omits num_ctx when the model has no params.num_ctx and no catalog window", async () => {
await withMockNdjsonFetch(
[
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
],
async (fetchMock) => {
const stream = await createOllamaTestStream({
baseUrl: "http://ollama-host:11434",
// Override the helper default contextWindow back to undefined so the
// request body should leave Ollama's Modelfile to decide num_ctx.
model: { contextWindow: undefined },
});
await collectStreamEvents(stream);
const requestInit = getGuardedFetchCall(fetchMock).init ?? {};
if (typeof requestInit.body !== "string") {
throw new Error("Expected string request body");
}
const requestBody = JSON.parse(requestInit.body) as {
options?: { num_ctx?: number };
};
expect(requestBody.options?.num_ctx).toBeUndefined();
},
);
});
it("falls back to catalog contextWindow as num_ctx when params.num_ctx is unset", async () => {
await withMockNdjsonFetch(
[
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
],
async (fetchMock) => {
const stream = await createOllamaTestStream({
baseUrl: "http://ollama-host:11434",
model: { contextWindow: 32768 },
});
await collectStreamEvents(stream);
const requestInit = getGuardedFetchCall(fetchMock).init ?? {};
if (typeof requestInit.body !== "string") {
throw new Error("Expected string request body");
}
const requestBody = JSON.parse(requestInit.body) as {
options?: { num_ctx?: number };
};
expect(requestBody.options?.num_ctx).toBe(32768);
},
);
});
it("maps configured native Ollama params.thinking=max to the stable top-level think value", async () => {
await withMockNdjsonFetch(
[

View File

@@ -290,6 +290,34 @@ function resolveOllamaNumCtx(model: ProviderRuntimeModel): number {
);
}
/**
* Resolves num_ctx for native /api/chat requests:
* 1. explicit `params.num_ctx` set on the model wins,
* 2. otherwise the catalog `contextWindow` / `maxTokens` is forwarded so
* OpenClaw's known model windows survive the trip and `/api/chat` does
* not silently truncate to Ollama's small Modelfile default (typically
* 2048 tokens) — which is too small for a system prompt plus tool
* definitions and produces "model picks wrong tools / says nonsense"
* symptoms on agent turns,
* 3. when neither is known, return undefined so the Modelfile decides.
*
* This intentionally differs from `resolveOllamaNumCtx` by not falling back
* to `DEFAULT_CONTEXT_TOKENS`: that constant is a sane wrapper-side guess for
* the OpenAI-compat path, but on the native path we prefer to leave num_ctx
* absent rather than guess a window for an unknown model.
*/
function resolveOllamaNativeNumCtx(model: ProviderRuntimeModel): number | undefined {
const configured = resolveOllamaConfiguredNumCtx(model);
if (configured !== undefined) {
return configured;
}
const catalog = model.contextWindow ?? model.maxTokens;
if (typeof catalog === "number" && Number.isFinite(catalog) && catalog > 0) {
return Math.floor(catalog);
}
return undefined;
}
function resolveOllamaModelOptions(model: ProviderRuntimeModel): Record<string, unknown> {
const options: Record<string, unknown> = {};
const params = model.params;
@@ -303,7 +331,7 @@ function resolveOllamaModelOptions(model: ProviderRuntimeModel): Record<string,
}
}
}
const numCtx = resolveOllamaConfiguredNumCtx(model);
const numCtx = resolveOllamaNativeNumCtx(model);
if (numCtx !== undefined) {
options.num_ctx = numCtx;
}