fix(model): preserve LM Studio '@' quant suffixes in model name resolution

stripModelProfileSuffix() in providers.ts naively truncated model names at
the first '@', discarding quant variants like @iq3_xxs, @iq4_xs, @q4_k_xl
that LM Studio uses to distinguish quantization levels.

This caused two user-facing bugs (fixes #71474):
1. /model lmstudio/qwen3.6-27b@iq3_xxs → 'model not allowed: lmstudio/qwen3.6-27b'
2. API requests sent truncated model name → LM Studio picked a random quant

Changes:
- Replace the naive indexOf('@') strip in providers.ts with
  splitTrailingAuthProfile() which already handles quant suffixes
- Extend the quant-suffix regex (q\d+...) to also match importance-
  quantization tags (iq3_xxs, iq4_xs, ...) via i?q\d+ pattern
- Add tests for @iq* quant suffixes and auth-profile-after-iq combos
This commit is contained in:
Bartok9
2026-04-25 04:40:45 -04:00
committed by Peter Steinberger
parent 94ceb2bbe9
commit 5bb78ea7ed
3 changed files with 23 additions and 5 deletions

View File

@@ -80,6 +80,22 @@ describe("splitTrailingAuthProfile", () => {
});
});
it("keeps @iq* importance-quantization suffixes in model ids", () => {
expect(splitTrailingAuthProfile("lmstudio/qwen3.6-27b@iq3_xxs")).toEqual({
model: "lmstudio/qwen3.6-27b@iq3_xxs",
});
expect(splitTrailingAuthProfile("lmstudio/qwen3.6-27b@iq4_xs")).toEqual({
model: "lmstudio/qwen3.6-27b@iq4_xs",
});
});
it("supports auth profiles after @iq* quant suffixes", () => {
expect(splitTrailingAuthProfile("lmstudio/qwen3.6-27b@iq3_xxs@work")).toEqual({
model: "lmstudio/qwen3.6-27b@iq3_xxs",
profile: "work",
});
});
it("keeps @4bit/@8bit quant suffixes in model ids", () => {
expect(splitTrailingAuthProfile("lmstudio-mb-pro/gemma-4-31b@4bit")).toEqual({
model: "lmstudio-mb-pro/gemma-4-31b@4bit",

View File

@@ -29,9 +29,12 @@ export function splitTrailingAuthProfile(raw: string): {
// of the model id. These often use '@' (ex: gemma-4-31b-it@q8_0) which would
// otherwise be misinterpreted as an auth profile delimiter.
//
// Covers standard GGUF quant tags (q4_0, q8_0, q4_k_xl, …) and importance-
// quantization variants (iq3_xxs, iq4_xs, …) used by llama.cpp / LM Studio.
//
// If an auth profile is needed, it can still be specified as a second suffix:
// lmstudio/foo@q8_0@work
if (/^(?:q\d+(?:_[a-z0-9]+)*|\d+bit)(?:@|$)/i.test(suffixAfterDelimiter())) {
// lmstudio/foo@q8_0@work lmstudio/foo@iq3_xxs@work
if (/^(?:i?q\d+(?:_[a-z0-9]+)*|\d+bit)(?:@|$)/i.test(suffixAfterDelimiter())) {
const nextDelimiter = trimmed.indexOf("@", profileDelimiter + 1);
if (nextDelimiter < 0) {
return { model: trimmed };

View File

@@ -1,3 +1,4 @@
import { splitTrailingAuthProfile } from "../agents/model-ref-profile.js";
import { normalizeProviderId } from "../agents/provider-id.js";
import { withBundledPluginVitestCompat } from "./bundled-compat.js";
import { resolveEffectivePluginActivationState } from "./config-state.js";
@@ -353,9 +354,7 @@ function resolveManifestRegistry(params: {
}
function stripModelProfileSuffix(value: string): string {
const trimmed = value.trim();
const at = trimmed.indexOf("@");
return at <= 0 ? trimmed : trimmed.slice(0, at).trim();
return splitTrailingAuthProfile(value).model;
}
function splitExplicitModelRef(rawModel: string): { provider?: string; modelId: string } | null {