Files
openclaw/extensions/lmstudio/src/models.test.ts
2026-04-27 13:27:58 +01:00

392 lines
12 KiB
TypeScript

import {
SELF_HOSTED_DEFAULT_CONTEXT_WINDOW,
SELF_HOSTED_DEFAULT_MAX_TOKENS,
} from "openclaw/plugin-sdk/provider-setup";
import { afterEach, describe, expect, it, vi } from "vitest";
import { LMSTUDIO_DEFAULT_LOAD_CONTEXT_LENGTH } from "./defaults.js";
import { discoverLmstudioModels, ensureLmstudioModelLoaded } from "./models.fetch.js";
import {
normalizeLmstudioProviderConfig,
resolveLmstudioInferenceBase,
resolveLmstudioReasoningCompat,
resolveLmstudioReasoningCapability,
resolveLmstudioServerBase,
} from "./models.js";
const fetchWithSsrFGuardMock = vi.hoisted(() => vi.fn());
vi.mock("openclaw/plugin-sdk/ssrf-runtime", async (importOriginal) => {
const actual = await importOriginal<typeof import("openclaw/plugin-sdk/ssrf-runtime")>();
return {
...actual,
fetchWithSsrFGuard: (...args: unknown[]) => fetchWithSsrFGuardMock(...args),
};
});
describe("lmstudio-models", () => {
const asFetch = (mock: unknown) => mock as typeof fetch;
const parseJsonRequestBody = (init: RequestInit | undefined): unknown => {
if (typeof init?.body !== "string") {
throw new Error("Expected request body to be a JSON string");
}
return JSON.parse(init.body) as unknown;
};
const createModelLoadFetchMock = (params?: {
loadedContextLength?: number;
maxContextLength?: number;
}) =>
vi.fn(async (url: string | URL, init?: RequestInit) => {
if (String(url).endsWith("/api/v1/models")) {
return {
ok: true,
json: async () => ({
models: [
{
type: "llm",
key: "qwen3-8b-instruct",
max_context_length: params?.maxContextLength,
loaded_instances: params?.loadedContextLength
? [{ id: "inst-1", config: { context_length: params.loadedContextLength } }]
: [],
},
],
}),
};
}
if (String(url).endsWith("/api/v1/models/load")) {
return {
ok: true,
json: async () => ({ status: "loaded" }),
requestInit: init,
};
}
throw new Error(`Unexpected fetch URL: ${String(url)}`);
});
const findModelLoadCall = (fetchMock: ReturnType<typeof createModelLoadFetchMock>) =>
fetchMock.mock.calls.find((call) => String(call[0]).endsWith("/models/load"));
const expectLoadContextLength = (
fetchMock: ReturnType<typeof createModelLoadFetchMock>,
contextLength: number,
) => {
const loadCall = findModelLoadCall(fetchMock);
expect(loadCall).toBeDefined();
const loadInit = loadCall?.[1] as RequestInit;
const loadBody = parseJsonRequestBody(loadInit) as { context_length: number };
expect(loadBody.context_length).toBe(contextLength);
};
afterEach(() => {
fetchWithSsrFGuardMock.mockReset();
vi.unstubAllGlobals();
});
it("normalizes LM Studio base URLs", () => {
expect(resolveLmstudioServerBase()).toBe("http://localhost:1234");
expect(resolveLmstudioInferenceBase()).toBe("http://localhost:1234/v1");
expect(resolveLmstudioServerBase("http://localhost:1234/api/v1")).toBe("http://localhost:1234");
expect(resolveLmstudioInferenceBase("http://localhost:1234/api/v1")).toBe(
"http://localhost:1234/v1",
);
expect(resolveLmstudioServerBase("localhost:1234/api/v1")).toBe("http://localhost:1234");
expect(resolveLmstudioInferenceBase("localhost:1234/api/v1")).toBe("http://localhost:1234/v1");
});
it("marks configured LM Studio endpoints as trusted private-network model targets", () => {
expect(
normalizeLmstudioProviderConfig({
baseUrl: "http://192.168.1.10:1234",
models: [],
}),
).toEqual({
baseUrl: "http://192.168.1.10:1234/v1",
request: { allowPrivateNetwork: true },
models: [],
});
expect(
normalizeLmstudioProviderConfig({
baseUrl: "http://gpu-box.local:1234/v1",
request: {
allowPrivateNetwork: false,
headers: { "X-Proxy-Auth": "token" },
},
models: [],
}),
).toEqual({
baseUrl: "http://gpu-box.local:1234/v1",
request: {
allowPrivateNetwork: false,
headers: { "X-Proxy-Auth": "token" },
},
models: [],
});
});
it("resolves reasoning capability for supported and unsupported options", () => {
expect(resolveLmstudioReasoningCapability({ capabilities: undefined })).toBe(false);
expect(
resolveLmstudioReasoningCapability({
capabilities: {
reasoning: {
allowed_options: ["low", "medium", "high"],
default: "low",
},
},
}),
).toBe(true);
expect(
resolveLmstudioReasoningCapability({
capabilities: {
reasoning: {
allowed_options: ["off"],
default: "off",
},
},
}),
).toBe(false);
});
it("maps LM Studio native reasoning options into OpenAI-compatible effort compat", () => {
expect(
resolveLmstudioReasoningCompat({
capabilities: {
reasoning: {
allowed_options: ["off", "on"],
default: "on",
},
},
}),
).toEqual({
supportsReasoningEffort: true,
supportedReasoningEfforts: ["off", "on"],
reasoningEffortMap: expect.objectContaining({
off: "off",
none: "off",
low: "on",
medium: "on",
high: "on",
}),
});
expect(
resolveLmstudioReasoningCompat({
capabilities: {
reasoning: {
allowed_options: ["off"],
default: "off",
},
},
}),
).toBeUndefined();
});
it("discovers llm models and maps metadata", async () => {
const fetchMock = vi.fn(async (_url: string | URL) => ({
ok: true,
json: async () => ({
models: [
{
type: "llm",
key: "qwen3-8b-instruct",
display_name: "Qwen3 8B",
max_context_length: 262144,
format: "mlx",
capabilities: {
vision: true,
trained_for_tool_use: true,
reasoning: {
allowed_options: ["off", "on"],
default: "on",
},
},
loaded_instances: [{ id: "inst-1", config: { context_length: 64000 } }],
},
{
type: "llm",
key: "deepseek-r1",
},
{
type: "embedding",
key: "text-embedding-nomic-embed-text-v1.5",
},
{
type: "llm",
key: " ",
},
],
}),
}));
const models = await discoverLmstudioModels({
baseUrl: "http://localhost:1234/v1",
apiKey: "lm-token",
quiet: false,
fetchImpl: asFetch(fetchMock),
});
expect(fetchMock).toHaveBeenCalledWith(
"http://localhost:1234/api/v1/models",
expect.objectContaining({
headers: {
Authorization: "Bearer lm-token",
},
}),
);
expect(models).toHaveLength(2);
expect(models[0]).toEqual({
id: "qwen3-8b-instruct",
name: "Qwen3 8B (MLX, vision, tool-use, loaded)",
reasoning: true,
input: ["text", "image"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
compat: {
supportsUsageInStreaming: true,
supportsReasoningEffort: true,
supportedReasoningEfforts: ["off", "on"],
reasoningEffortMap: expect.objectContaining({
off: "off",
none: "off",
medium: "on",
high: "on",
}),
},
contextWindow: 262144,
contextTokens: LMSTUDIO_DEFAULT_LOAD_CONTEXT_LENGTH,
maxTokens: SELF_HOSTED_DEFAULT_MAX_TOKENS,
});
expect(models[1]).toEqual({
id: "deepseek-r1",
name: "deepseek-r1",
reasoning: false,
input: ["text"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
compat: { supportsUsageInStreaming: true },
contextWindow: SELF_HOSTED_DEFAULT_CONTEXT_WINDOW,
contextTokens: LMSTUDIO_DEFAULT_LOAD_CONTEXT_LENGTH,
maxTokens: SELF_HOSTED_DEFAULT_MAX_TOKENS,
});
});
it("skips model load when already loaded", async () => {
const fetchMock = createModelLoadFetchMock({ loadedContextLength: 64000 });
vi.stubGlobal("fetch", asFetch(fetchMock));
await expect(
ensureLmstudioModelLoaded({
baseUrl: "http://localhost:1234/v1",
modelKey: "qwen3-8b-instruct",
}),
).resolves.toBeUndefined();
expect(fetchMock).toHaveBeenCalledTimes(1);
const calledUrls = fetchMock.mock.calls.map((call) => String(call[0]));
expect(calledUrls).not.toContain("http://localhost:1234/api/v1/models/load");
});
it("reloads model when requested context length exceeds the loaded window", async () => {
const fetchMock = createModelLoadFetchMock({
loadedContextLength: 4096,
maxContextLength: 32768,
});
vi.stubGlobal("fetch", asFetch(fetchMock));
await expect(
ensureLmstudioModelLoaded({
baseUrl: "http://localhost:1234/v1",
modelKey: "qwen3-8b-instruct",
requestedContextLength: 8192,
}),
).resolves.toBeUndefined();
expect(fetchMock).toHaveBeenCalledTimes(2);
expectLoadContextLength(fetchMock, 8192);
});
it("reloads model to the clamped default target when already loaded below the default window", async () => {
const fetchMock = createModelLoadFetchMock({
loadedContextLength: 4096,
maxContextLength: 32768,
});
vi.stubGlobal("fetch", asFetch(fetchMock));
await expect(
ensureLmstudioModelLoaded({
baseUrl: "http://localhost:1234/v1",
modelKey: "qwen3-8b-instruct",
}),
).resolves.toBeUndefined();
expect(fetchMock).toHaveBeenCalledTimes(2);
expectLoadContextLength(fetchMock, 32768);
});
it("loads model with clamped context length and merged headers", async () => {
const fetchMock = createModelLoadFetchMock({ maxContextLength: 32768 });
vi.stubGlobal("fetch", asFetch(fetchMock));
await expect(
ensureLmstudioModelLoaded({
baseUrl: "http://localhost:1234/v1",
apiKey: "lm-token",
headers: {
"X-Proxy-Auth": "required",
Authorization: "Bearer override",
},
modelKey: " qwen3-8b-instruct ",
}),
).resolves.toBeUndefined();
expect(fetchMock).toHaveBeenCalledTimes(2);
const loadCall = findModelLoadCall(fetchMock);
expect(loadCall).toBeDefined();
expect(loadCall?.[1]).toMatchObject({
method: "POST",
headers: {
"X-Proxy-Auth": "required",
Authorization: "Bearer lm-token",
"Content-Type": "application/json",
},
body: JSON.stringify({
model: "qwen3-8b-instruct",
context_length: 32768,
}),
});
const loadInit = loadCall![1] as RequestInit;
const loadBody = parseJsonRequestBody(loadInit) as { context_length: number };
expect(loadBody.context_length).not.toBe(LMSTUDIO_DEFAULT_LOAD_CONTEXT_LENGTH);
});
it("uses requested context length when provided for model load", async () => {
const fetchMock = createModelLoadFetchMock({ maxContextLength: 32768 });
vi.stubGlobal("fetch", asFetch(fetchMock));
await expect(
ensureLmstudioModelLoaded({
baseUrl: "http://localhost:1234/v1",
modelKey: "qwen3-8b-instruct",
requestedContextLength: 8192,
}),
).resolves.toBeUndefined();
expectLoadContextLength(fetchMock, 8192);
});
it("throws when model discovery fails", async () => {
const fetchMock = vi.fn(async () => ({
ok: false,
status: 401,
}));
vi.stubGlobal("fetch", asFetch(fetchMock));
await expect(
ensureLmstudioModelLoaded({
baseUrl: "http://localhost:1234/v1",
modelKey: "qwen3-8b-instruct",
}),
).rejects.toThrow("LM Studio model discovery failed (401)");
expect(fetchMock).toHaveBeenCalledTimes(1);
});
});