fix: let lmstudio skip native preload

This commit is contained in:
Peter Steinberger
2026-05-02 06:09:05 +01:00
parent cbec76c198
commit 0b3d260285
11 changed files with 110 additions and 13 deletions

View File

@@ -199,6 +199,49 @@ describe("lmstudio stream wrapper", () => {
expect(baseStream).toHaveBeenCalledTimes(1);
});
it("skips native model preload when provider params disable it", async () => {
const baseStream = buildDoneStreamFn();
const wrapped = wrapLmstudioInferencePreload({
provider: "lmstudio",
modelId: "qwen3-8b-instruct",
config: {
models: {
providers: {
lmstudio: {
baseUrl: "http://localhost:1234",
params: { preload: false },
models: [],
},
},
},
},
streamFn: baseStream,
} as never);
const events = await collectEvents(
wrapped(
{
provider: "lmstudio",
api: "openai-completions",
id: "qwen3-8b-instruct",
} as never,
{ messages: [] } as never,
undefined as never,
),
);
expect(events).toEqual([expect.objectContaining({ type: "done" })]);
expect(ensureLmstudioModelLoadedMock).not.toHaveBeenCalled();
expect(baseStream).toHaveBeenCalledTimes(1);
expect(baseStream).toHaveBeenCalledWith(
expect.objectContaining({
compat: expect.objectContaining({ supportsUsageInStreaming: true }),
}),
expect.anything(),
undefined,
);
});
it("dedupes concurrent preload requests for the same model and context", async () => {
let resolvePreload: (() => void) | undefined;
ensureLmstudioModelLoadedMock.mockImplementationOnce(

View File

@@ -121,6 +121,22 @@ function toRecord(value: unknown): Record<string, unknown> | undefined {
return value && typeof value === "object" ? (value as Record<string, unknown>) : undefined;
}
function shouldPreloadLmstudioModels(value: unknown): boolean {
const providerConfig = toRecord(value);
const params = toRecord(providerConfig?.params);
return params?.preload !== false;
}
function withLmstudioUsageCompat(model: StreamModel): StreamModel {
return {
...model,
compat: {
...(model.compat && typeof model.compat === "object" ? model.compat : {}),
supportsUsageInStreaming: true,
},
};
}
function resolveContextToolNames(context: StreamContext): Set<string> {
const tools = (context as { tools?: unknown }).tools;
if (!Array.isArray(tools)) {
@@ -381,7 +397,15 @@ export function wrapLmstudioInferencePreload(ctx: ProviderWrapStreamFnContext):
if (!modelKey) {
return underlying(model, context, options);
}
const providerBaseUrl = ctx.config?.models?.providers?.[LMSTUDIO_PROVIDER_ID]?.baseUrl;
const providerConfig = ctx.config?.models?.providers?.[LMSTUDIO_PROVIDER_ID];
if (!shouldPreloadLmstudioModels(providerConfig)) {
const stream = underlying(withLmstudioUsageCompat(model), context, options);
return (async () => {
const resolvedStream = stream instanceof Promise ? await stream : stream;
return wrapLmstudioPlainTextToolCalls(resolvedStream, context);
})();
}
const providerBaseUrl = providerConfig?.baseUrl;
const resolvedBaseUrl = resolveLmstudioInferenceBase(
typeof model.baseUrl === "string" ? model.baseUrl : providerBaseUrl,
);
@@ -454,14 +478,7 @@ export function wrapLmstudioInferencePreload(ctx: ProviderWrapStreamFnContext):
// LM Studio uses OpenAI-compatible streaming usage payloads when requested via
// `stream_options.include_usage`. Force this compat flag at call time so usage
// reporting remains enabled even when catalog entries omitted compat metadata.
const modelWithUsageCompat = {
...model,
compat: {
...(model.compat && typeof model.compat === "object" ? model.compat : {}),
supportsUsageInStreaming: true,
},
};
const stream = underlying(modelWithUsageCompat, context, options);
const stream = underlying(withLmstudioUsageCompat(model), context, options);
const resolvedStream = stream instanceof Promise ? await stream : stream;
return wrapLmstudioPlainTextToolCalls(resolvedStream, context);
})();