fix(model): preserve LM Studio '@' quant suffixes in model name resolution

stripModelProfileSuffix() in providers.ts naively truncated model names at the first '@', discarding quant variants like @iq3_xxs, @iq4_xs, @q4_k_xl that LM Studio uses to distinguish quantization levels. This caused two user-facing bugs (fixes #71474): 1. /model lmstudio/qwen3.6-27b@iq3_xxs → 'model not allowed: lmstudio/qwen3.6-27b' 2. API requests sent truncated model name → LM Studio picked a random quant Changes: - Replace the naive indexOf('@') strip in providers.ts with splitTrailingAuthProfile() which already handles quant suffixes - Extend the quant-suffix regex (q\d+...) to also match importance- quantization tags (iq3_xxs, iq4_xs, ...) via i?q\d+ pattern - Add tests for @iq* quant suffixes and auth-profile-after-iq combos
2026-07-13 20:06:07 +00:00 · 2026-04-25 04:40:45 -04:00
parent 94ceb2bbe9
commit 5bb78ea7ed
3 changed files with 23 additions and 5 deletions
--- a/src/agents/model-ref-profile.test.ts
+++ b/src/agents/model-ref-profile.test.ts
@@ -80,6 +80,22 @@ describe("splitTrailingAuthProfile", () => {
    });
  });

+  it("keeps @iq* importance-quantization suffixes in model ids", () => {
+    expect(splitTrailingAuthProfile("lmstudio/qwen3.6-27b@iq3_xxs")).toEqual({
+      model: "lmstudio/qwen3.6-27b@iq3_xxs",
+    });
+    expect(splitTrailingAuthProfile("lmstudio/qwen3.6-27b@iq4_xs")).toEqual({
+      model: "lmstudio/qwen3.6-27b@iq4_xs",
+    });
+  });
+
+  it("supports auth profiles after @iq* quant suffixes", () => {
+    expect(splitTrailingAuthProfile("lmstudio/qwen3.6-27b@iq3_xxs@work")).toEqual({
+      model: "lmstudio/qwen3.6-27b@iq3_xxs",
+      profile: "work",
+    });
+  });
+
  it("keeps @4bit/@8bit quant suffixes in model ids", () => {
    expect(splitTrailingAuthProfile("lmstudio-mb-pro/gemma-4-31b@4bit")).toEqual({
      model: "lmstudio-mb-pro/gemma-4-31b@4bit",
--- a/src/agents/model-ref-profile.ts
+++ b/src/agents/model-ref-profile.ts
@@ -29,9 +29,12 @@ export function splitTrailingAuthProfile(raw: string): {
  // of the model id. These often use '@' (ex: gemma-4-31b-it@q8_0) which would
  // otherwise be misinterpreted as an auth profile delimiter.
  //
+  // Covers standard GGUF quant tags (q4_0, q8_0, q4_k_xl, …) and importance-
+  // quantization variants (iq3_xxs, iq4_xs, …) used by llama.cpp / LM Studio.
+  //
  // If an auth profile is needed, it can still be specified as a second suffix:
-  //   lmstudio/foo@q8_0@work
-  if (/^(?:q\d+(?:_[a-z0-9]+)*|\d+bit)(?:@|$)/i.test(suffixAfterDelimiter())) {
+  //   lmstudio/foo@q8_0@work   lmstudio/foo@iq3_xxs@work
+  if (/^(?:i?q\d+(?:_[a-z0-9]+)*|\d+bit)(?:@|$)/i.test(suffixAfterDelimiter())) {
    const nextDelimiter = trimmed.indexOf("@", profileDelimiter + 1);
    if (nextDelimiter < 0) {
      return { model: trimmed };
--- a/src/plugins/providers.ts
+++ b/src/plugins/providers.ts
@@ -1,3 +1,4 @@
+import { splitTrailingAuthProfile } from "../agents/model-ref-profile.js";
 import { normalizeProviderId } from "../agents/provider-id.js";
 import { withBundledPluginVitestCompat } from "./bundled-compat.js";
 import { resolveEffectivePluginActivationState } from "./config-state.js";
@@ -353,9 +354,7 @@ function resolveManifestRegistry(params: {
 }

 function stripModelProfileSuffix(value: string): string {
-  const trimmed = value.trim();
-  const at = trimmed.indexOf("@");
-  return at <= 0 ? trimmed : trimmed.slice(0, at).trim();
+  return splitTrailingAuthProfile(value).model;
 }

 function splitExplicitModelRef(rawModel: string): { provider?: string; modelId: string } | null {