mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-20 04:44:46 +00:00
fix(providers): read nested llama cpp props context
This commit is contained in:
committed by
Peter Steinberger
parent
7c7d19ec84
commit
f4be39c4f4
@@ -146,7 +146,129 @@ describe("discoverOpenAICompatibleLocalModels", () => {
|
||||
expect(propsRelease).toHaveBeenCalledOnce();
|
||||
});
|
||||
|
||||
it("uses llama.cpp /props n_ctx as the runtime context cap", async () => {
|
||||
it("uses llama.cpp nested /props n_ctx as the runtime context cap", async () => {
|
||||
const modelsRelease = vi.fn(async () => undefined);
|
||||
const propsRelease = vi.fn(async () => undefined);
|
||||
fetchWithSsrFGuardMock.mockResolvedValueOnce({
|
||||
response: new Response(
|
||||
JSON.stringify({
|
||||
data: [
|
||||
{
|
||||
id: "qwen3.6-mxfp4-moe",
|
||||
meta: { n_ctx_train: 262_144 },
|
||||
},
|
||||
],
|
||||
}),
|
||||
{ status: 200 },
|
||||
),
|
||||
finalUrl: "http://127.0.0.1:8080/v1/models",
|
||||
release: modelsRelease,
|
||||
});
|
||||
fetchWithSsrFGuardMock.mockResolvedValueOnce({
|
||||
response: new Response(JSON.stringify({ default_generation_settings: { n_ctx: 65_536 } }), {
|
||||
status: 200,
|
||||
}),
|
||||
finalUrl: "http://127.0.0.1:8080/props",
|
||||
release: propsRelease,
|
||||
});
|
||||
|
||||
const models = await discoverOpenAICompatibleLocalModels({
|
||||
baseUrl: "http://127.0.0.1:8080/v1",
|
||||
label: "llama.cpp",
|
||||
env: {},
|
||||
});
|
||||
|
||||
expect(models).toEqual([
|
||||
expect.objectContaining({
|
||||
id: "qwen3.6-mxfp4-moe",
|
||||
contextWindow: 262_144,
|
||||
contextTokens: 65_536,
|
||||
}),
|
||||
]);
|
||||
expect(fetchWithSsrFGuardMock).toHaveBeenNthCalledWith(
|
||||
2,
|
||||
expect.objectContaining({
|
||||
url: "http://127.0.0.1:8080/props",
|
||||
}),
|
||||
);
|
||||
expect(modelsRelease).toHaveBeenCalledOnce();
|
||||
expect(propsRelease).toHaveBeenCalledOnce();
|
||||
});
|
||||
|
||||
it("scopes llama.cpp /props runtime caps to each discovered model", async () => {
|
||||
const modelsRelease = vi.fn(async () => undefined);
|
||||
const firstPropsRelease = vi.fn(async () => undefined);
|
||||
const secondPropsRelease = vi.fn(async () => undefined);
|
||||
fetchWithSsrFGuardMock.mockResolvedValueOnce({
|
||||
response: new Response(
|
||||
JSON.stringify({
|
||||
data: [
|
||||
{
|
||||
id: "qwen/router-a",
|
||||
meta: { n_ctx_train: 262_144 },
|
||||
},
|
||||
{
|
||||
id: "qwen/router-b",
|
||||
meta: { n_ctx_train: 131_072 },
|
||||
},
|
||||
],
|
||||
}),
|
||||
{ status: 200 },
|
||||
),
|
||||
finalUrl: "http://127.0.0.1:8080/v1/models",
|
||||
release: modelsRelease,
|
||||
});
|
||||
fetchWithSsrFGuardMock.mockResolvedValueOnce({
|
||||
response: new Response(JSON.stringify({ default_generation_settings: { n_ctx: 65_536 } }), {
|
||||
status: 200,
|
||||
}),
|
||||
finalUrl: "http://127.0.0.1:8080/props?model=qwen%2Frouter-a",
|
||||
release: firstPropsRelease,
|
||||
});
|
||||
fetchWithSsrFGuardMock.mockResolvedValueOnce({
|
||||
response: new Response(JSON.stringify({ default_generation_settings: { n_ctx: 32_768 } }), {
|
||||
status: 200,
|
||||
}),
|
||||
finalUrl: "http://127.0.0.1:8080/props?model=qwen%2Frouter-b",
|
||||
release: secondPropsRelease,
|
||||
});
|
||||
|
||||
const models = await discoverOpenAICompatibleLocalModels({
|
||||
baseUrl: "http://127.0.0.1:8080/v1",
|
||||
label: "llama.cpp",
|
||||
env: {},
|
||||
});
|
||||
|
||||
expect(models).toEqual([
|
||||
expect.objectContaining({
|
||||
id: "qwen/router-a",
|
||||
contextWindow: 262_144,
|
||||
contextTokens: 65_536,
|
||||
}),
|
||||
expect.objectContaining({
|
||||
id: "qwen/router-b",
|
||||
contextWindow: 131_072,
|
||||
contextTokens: 32_768,
|
||||
}),
|
||||
]);
|
||||
expect(fetchWithSsrFGuardMock).toHaveBeenNthCalledWith(
|
||||
2,
|
||||
expect.objectContaining({
|
||||
url: "http://127.0.0.1:8080/props?model=qwen%2Frouter-a",
|
||||
}),
|
||||
);
|
||||
expect(fetchWithSsrFGuardMock).toHaveBeenNthCalledWith(
|
||||
3,
|
||||
expect.objectContaining({
|
||||
url: "http://127.0.0.1:8080/props?model=qwen%2Frouter-b",
|
||||
}),
|
||||
);
|
||||
expect(modelsRelease).toHaveBeenCalledOnce();
|
||||
expect(firstPropsRelease).toHaveBeenCalledOnce();
|
||||
expect(secondPropsRelease).toHaveBeenCalledOnce();
|
||||
});
|
||||
|
||||
it("keeps top-level llama.cpp /props n_ctx as a compatibility fallback", async () => {
|
||||
const modelsRelease = vi.fn(async () => undefined);
|
||||
const propsRelease = vi.fn(async () => undefined);
|
||||
fetchWithSsrFGuardMock.mockResolvedValueOnce({
|
||||
@@ -183,12 +305,6 @@ describe("discoverOpenAICompatibleLocalModels", () => {
|
||||
contextTokens: 65_536,
|
||||
}),
|
||||
]);
|
||||
expect(fetchWithSsrFGuardMock).toHaveBeenNthCalledWith(
|
||||
2,
|
||||
expect.objectContaining({
|
||||
url: "http://127.0.0.1:8080/props",
|
||||
}),
|
||||
);
|
||||
expect(modelsRelease).toHaveBeenCalledOnce();
|
||||
expect(propsRelease).toHaveBeenCalledOnce();
|
||||
});
|
||||
|
||||
@@ -42,6 +42,9 @@ type OpenAICompatModelsResponse = {
|
||||
};
|
||||
|
||||
type LlamaCppPropsResponse = {
|
||||
default_generation_settings?: {
|
||||
n_ctx?: unknown;
|
||||
};
|
||||
n_ctx?: unknown;
|
||||
};
|
||||
|
||||
@@ -76,23 +79,28 @@ function readPositiveInteger(value: unknown): number | undefined {
|
||||
return Math.trunc(value);
|
||||
}
|
||||
|
||||
function resolveLlamaCppPropsUrl(baseUrl: string): string {
|
||||
function resolveLlamaCppPropsUrl(baseUrl: string, modelId?: string): string {
|
||||
const parsed = new URL(baseUrl);
|
||||
const pathname = parsed.pathname.replace(/\/+$/, "");
|
||||
parsed.pathname = pathname.endsWith("/v1") ? pathname.slice(0, -3) || "/" : pathname;
|
||||
const rootPathname = pathname.endsWith("/v1") ? pathname.slice(0, -3) || "/" : pathname;
|
||||
parsed.pathname = `${rootPathname.replace(/\/+$/, "")}/props`;
|
||||
parsed.search = "";
|
||||
parsed.hash = "";
|
||||
const root = parsed.toString().replace(/\/+$/, "");
|
||||
return `${root}/props`;
|
||||
const normalizedModelId = normalizeOptionalString(modelId);
|
||||
if (normalizedModelId) {
|
||||
parsed.searchParams.set("model", normalizedModelId);
|
||||
}
|
||||
return parsed.toString();
|
||||
}
|
||||
|
||||
async function discoverLlamaCppRuntimeContextTokens(params: {
|
||||
baseUrl: string;
|
||||
apiKey?: string;
|
||||
modelId?: string;
|
||||
}): Promise<number | undefined> {
|
||||
let url: string;
|
||||
try {
|
||||
url = resolveLlamaCppPropsUrl(params.baseUrl);
|
||||
url = resolveLlamaCppPropsUrl(params.baseUrl, params.modelId);
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
@@ -111,7 +119,10 @@ async function discoverLlamaCppRuntimeContextTokens(params: {
|
||||
return undefined;
|
||||
}
|
||||
const data = (await response.json()) as LlamaCppPropsResponse;
|
||||
return readPositiveInteger(data.n_ctx);
|
||||
return (
|
||||
readPositiveInteger(data.default_generation_settings?.n_ctx) ??
|
||||
readPositiveInteger(data.n_ctx)
|
||||
);
|
||||
} finally {
|
||||
await release();
|
||||
}
|
||||
@@ -158,23 +169,41 @@ export async function discoverOpenAICompatibleLocalModels(params: {
|
||||
return [];
|
||||
}
|
||||
|
||||
const runtimeContextTokens =
|
||||
params.contextWindow === undefined
|
||||
? await discoverLlamaCppRuntimeContextTokens({
|
||||
baseUrl: trimmedBaseUrl,
|
||||
apiKey: params.apiKey,
|
||||
})
|
||||
: undefined;
|
||||
|
||||
return models.flatMap((model) => {
|
||||
const discoveredModels = models.flatMap((model) => {
|
||||
const modelId = normalizeOptionalString(model.id);
|
||||
if (!modelId) {
|
||||
return [];
|
||||
}
|
||||
return [{ id: modelId, meta: model.meta }];
|
||||
});
|
||||
const runtimeContextTokensByModelId = new Map<string, number>();
|
||||
if (params.contextWindow === undefined) {
|
||||
const uniqueModelIds = [...new Set(discoveredModels.map((model) => model.id))];
|
||||
const runtimeContextTokenResults = await Promise.all(
|
||||
uniqueModelIds.map(
|
||||
async (modelId) =>
|
||||
[
|
||||
modelId,
|
||||
await discoverLlamaCppRuntimeContextTokens({
|
||||
baseUrl: trimmedBaseUrl,
|
||||
apiKey: params.apiKey,
|
||||
modelId: uniqueModelIds.length > 1 ? modelId : undefined,
|
||||
}),
|
||||
] as const,
|
||||
),
|
||||
);
|
||||
for (const [modelId, runtimeContextTokens] of runtimeContextTokenResults) {
|
||||
if (runtimeContextTokens) {
|
||||
runtimeContextTokensByModelId.set(modelId, runtimeContextTokens);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return discoveredModels.map((model) => {
|
||||
const modelConfig: ModelDefinitionConfig = {
|
||||
id: modelId,
|
||||
name: modelId,
|
||||
reasoning: isReasoningModelHeuristic(modelId),
|
||||
id: model.id,
|
||||
name: model.id,
|
||||
reasoning: isReasoningModelHeuristic(model.id),
|
||||
input: ["text"],
|
||||
cost: SELF_HOSTED_DEFAULT_COST,
|
||||
contextWindow:
|
||||
@@ -183,10 +212,11 @@ export async function discoverOpenAICompatibleLocalModels(params: {
|
||||
SELF_HOSTED_DEFAULT_CONTEXT_WINDOW,
|
||||
maxTokens: params.maxTokens ?? SELF_HOSTED_DEFAULT_MAX_TOKENS,
|
||||
};
|
||||
const runtimeContextTokens = runtimeContextTokensByModelId.get(model.id);
|
||||
if (runtimeContextTokens) {
|
||||
modelConfig.contextTokens = runtimeContextTokens;
|
||||
}
|
||||
return [modelConfig];
|
||||
return modelConfig;
|
||||
});
|
||||
} finally {
|
||||
await release();
|
||||
|
||||
Reference in New Issue
Block a user