From 24bf56ce609414b8020c0667286e9c37d7623655 Mon Sep 17 00:00:00 2001
From: Peter Steinberger <steipete@gmail.com>
Date: Fri, 24 Apr 2026 05:47:52 +0100
Subject: [PATCH] test: stabilize live model sweeps

---
 src/agents/live-model-filter.ts           |  8 +++
 src/agents/live-model-turn-probes.test.ts | 53 +++++++++++++++++++-
 src/agents/live-model-turn-probes.ts      | 27 +++++++---
 src/agents/model-compat.test.ts           | 16 ++++++
 src/agents/models.profiles.live.test.ts   | 60 +++++++++++++++++------
 5 files changed, 142 insertions(+), 22 deletions(-)

diff --git a/src/agents/live-model-filter.ts b/src/agents/live-model-filter.ts
index b0a70b3c936..c61774b5665 100644
--- a/src/agents/live-model-filter.ts
+++ b/src/agents/live-model-filter.ts
@@ -97,6 +97,11 @@ function isUnsupportedOpenAiLiveModelRef(provider: string, id: string): boolean
   return !modelName.startsWith("gpt-5.2");
 }
 
+function isOldMiniMaxLiveModelRef(id: string): boolean {
+  const modelName = normalizeLowercaseStringOrEmpty(id).split("/").pop() ?? "";
+  return modelName === "minimax-m2.1" || modelName.startsWith("minimax-m2.1:");
+}
+
 export function isModernModelRef(ref: ModelRef): boolean {
   const provider = normalizeProviderId(ref.provider ?? "");
   const id = normalizeLowercaseStringOrEmpty(ref.id);
@@ -129,6 +134,9 @@ export function isHighSignalLiveModelRef(ref: ModelRef): boolean {
   if (isUnsupportedOpenAiLiveModelRef(provider, id)) {
     return false;
   }
+  if (isOldMiniMaxLiveModelRef(id)) {
+    return false;
+  }
   return isHighSignalClaudeModelId(id);
 }
 
diff --git a/src/agents/live-model-turn-probes.test.ts b/src/agents/live-model-turn-probes.test.ts
index 8ca4fbafc59..4d92b1b47ca 100644
--- a/src/agents/live-model-turn-probes.test.ts
+++ b/src/agents/live-model-turn-probes.test.ts
@@ -35,7 +35,7 @@ describe("live model turn probes", () => {
     const context = buildLiveModelFileProbeContext({ systemPrompt: "sys" });
     expect(context.systemPrompt).toBe("sys");
     expect(context.messages[0]?.content).toEqual(
-      expect.stringContaining(`LIVE_FILE_TOKEN=${LIVE_MODEL_FILE_PROBE_TOKEN}`),
+      expect.stringContaining(`LIVE_LABEL=${LIVE_MODEL_FILE_PROBE_TOKEN}`),
     );
   });
 
@@ -98,17 +98,64 @@ describe("live model turn probes", () => {
     expect(shouldSkipLiveModelFileProbe({ provider: "opencode-go", id: "minimax-m2.5" })).toBe(
       true,
     );
+    expect(
+      shouldSkipLiveModelFileProbe({ provider: "openrouter", id: "arcee-ai/trinity-mini" }),
+    ).toBe(true);
+    expect(
+      shouldSkipLiveModelFileProbe({
+        provider: "openrouter",
+        id: "deepseek/deepseek-chat-v3.1",
+      }),
+    ).toBe(true);
+    expect(
+      shouldSkipLiveModelFileProbe({ provider: "openrouter", id: "minimax/minimax-m2.5" }),
+    ).toBe(true);
+    expect(
+      shouldSkipLiveModelFileProbe({
+        provider: "openrouter",
+        id: "nvidia/llama-3.3-nemotron-super-49b-v1.5",
+      }),
+    ).toBe(true);
+    expect(
+      shouldSkipLiveModelFileProbe({
+        provider: "openrouter",
+        id: "nvidia/nemotron-nano-12b-v2-vl:free",
+      }),
+    ).toBe(true);
+    expect(shouldSkipLiveModelFileProbe({ provider: "openrouter", id: "qwen/qwen3.5-9b" })).toBe(
+      true,
+    );
+    expect(
+      shouldSkipLiveModelFileProbe({
+        provider: "openrouter",
+        id: "tngtech/deepseek-r1t2-chimera",
+      }),
+    ).toBe(true);
+    expect(shouldSkipLiveModelFileProbe({ provider: "openrouter", id: "z-ai/glm-4.7-flash" })).toBe(
+      true,
+    );
+    expect(shouldSkipLiveModelFileProbe({ provider: "openrouter", id: "z-ai/glm-5" })).toBe(true);
+    expect(shouldSkipLiveModelFileProbe({ provider: "openrouter", id: "z-ai/glm-5.1" })).toBe(true);
     expect(shouldSkipLiveModelFileProbe({ provider: "opencode-go", id: "kimi-k2.5" })).toBe(true);
     expect(shouldSkipLiveModelFileProbe({ provider: "fireworks", id: "glm-5" })).toBe(false);
   });
 
   it("skips known stale image probe routes", () => {
+    expect(
+      shouldSkipLiveModelImageProbe({
+        provider: "fireworks",
+        id: "accounts/fireworks/models/kimi-k2p5",
+      }),
+    ).toBe(true);
     expect(
       shouldSkipLiveModelImageProbe({
         provider: "fireworks",
         id: "accounts/fireworks/models/kimi-k2p6",
       }),
     ).toBe(true);
+    expect(shouldSkipLiveModelImageProbe({ provider: "opencode-go", id: "mimo-v2-omni" })).toBe(
+      true,
+    );
     expect(shouldSkipLiveModelImageProbe({ provider: "opencode-go", id: "kimi-k2.5" })).toBe(true);
     expect(
       shouldSkipLiveModelImageProbe({
@@ -116,9 +163,13 @@ describe("live model turn probes", () => {
         id: "gemini-3.1-pro-preview-customtools",
       }),
     ).toBe(true);
+    expect(shouldSkipLiveModelImageProbe({ provider: "opencode", id: "kimi-k2.6" })).toBe(true);
     expect(
       shouldSkipLiveModelImageProbe({ provider: "openrouter", id: "amazon/nova-pro-v1" }),
     ).toBe(true);
+    expect(
+      shouldSkipLiveModelImageProbe({ provider: "openrouter", id: "bytedance-seed/seed-1.6" }),
+    ).toBe(true);
     expect(shouldSkipLiveModelImageProbe({ provider: "fireworks", id: "glm-5" })).toBe(false);
   });
 
diff --git a/src/agents/live-model-turn-probes.ts b/src/agents/live-model-turn-probes.ts
index 2500215f69c..bbf9c1a7e93 100644
--- a/src/agents/live-model-turn-probes.ts
+++ b/src/agents/live-model-turn-probes.ts
@@ -17,14 +17,31 @@ const KNOWN_EMPTY_FILE_PROBE_MODELS = new Set([
   "opencode-go/mimo-v2-omni",
   "opencode-go/mimo-v2-pro",
   "opencode-go/minimax-m2.5",
+  "openrouter/arcee-ai/trinity-mini",
+  "openrouter/deepseek/deepseek-chat-v3.1",
+  "openrouter/minimax/minimax-m2.5",
+  "openrouter/nvidia/llama-3.3-nemotron-super-49b-v1.5",
+  "openrouter/nvidia/nemotron-nano-12b-v2-vl:free",
+  "openrouter/qwen/qwen3.5-9b",
+  "openrouter/tngtech/deepseek-r1t2-chimera",
+  "openrouter/z-ai/glm-4.5",
+  "openrouter/z-ai/glm-4.6",
+  "openrouter/z-ai/glm-4.7",
+  "openrouter/z-ai/glm-4.7-flash",
+  "openrouter/z-ai/glm-5",
+  "openrouter/z-ai/glm-5.1",
 ]);
 const KNOWN_EMPTY_IMAGE_PROBE_MODELS = new Set([
+  "fireworks/accounts/fireworks/models/kimi-k2p5",
   "fireworks/accounts/fireworks/models/kimi-k2p6",
   "fireworks/accounts/fireworks/routers/kimi-k2p5-turbo",
   "google/gemini-3.1-pro-preview-customtools",
+  "opencode/kimi-k2.6",
+  "opencode-go/mimo-v2-omni",
   "opencode-go/kimi-k2.5",
   "opencode-go/kimi-k2.6",
   "openrouter/amazon/nova-pro-v1",
+  "openrouter/bytedance-seed/seed-1.6",
 ]);
 
 function modelKey(model: Pick<Model<Api>, "id" | "provider">): string {
@@ -78,10 +95,8 @@ export function buildLiveModelFileProbeContext(params: { systemPrompt?: string }
       {
         role: "user",
         content:
-          "Read this file excerpt and reply with only the value after LIVE_FILE_TOKEN.\n\n" +
-          "File: live-model-probe.txt\n" +
-          "MIME: text/plain\n\n" +
-          `LIVE_FILE_TOKEN=${LIVE_MODEL_FILE_PROBE_TOKEN}`,
+          "Read this visible label and reply with only the value after LIVE_LABEL.\n\n" +
+          `LIVE_LABEL=${LIVE_MODEL_FILE_PROBE_TOKEN}`,
         timestamp: Date.now(),
       },
     ],
@@ -95,7 +110,7 @@ export function buildLiveModelFileProbeRetryContext(params: { systemPrompt?: str
       {
         role: "user",
         content:
-          "The file live-model-probe.txt contains exactly this token:\n\n" +
+          "The visible label value is:\n\n" +
           `${LIVE_MODEL_FILE_PROBE_TOKEN}\n\n` +
           `Reply with exactly ${LIVE_MODEL_FILE_PROBE_TOKEN}.`,
         timestamp: Date.now(),
@@ -113,7 +128,7 @@ export function buildLiveModelImageProbeContext(params: { systemPrompt?: string
         content: [
           {
             type: "text",
-            text: "Reply with exactly the word OK if you received this image.",
+            text: "Reply with exactly OK.",
           },
           {
             type: "image",
diff --git a/src/agents/model-compat.test.ts b/src/agents/model-compat.test.ts
index 2769da3b9b9..a81b8641f61 100644
--- a/src/agents/model-compat.test.ts
+++ b/src/agents/model-compat.test.ts
@@ -503,6 +503,22 @@ describe("isHighSignalLiveModelRef", () => {
       true,
     );
   });
+
+  it("drops old MiniMax 2.1 models from the default live matrix", () => {
+    providerRuntimeMocks.resolveProviderModernModelRef.mockReturnValue(true);
+
+    expect(isHighSignalLiveModelRef({ provider: "minimax", id: "MiniMax-M2.1" })).toBe(false);
+    expect(isHighSignalLiveModelRef({ provider: "openrouter", id: "minimax/minimax-m2.1" })).toBe(
+      false,
+    );
+    expect(
+      isHighSignalLiveModelRef({ provider: "openrouter", id: "minimax/minimax-m2.1:free" }),
+    ).toBe(false);
+    expect(isHighSignalLiveModelRef({ provider: "minimax", id: "MiniMax-M2.7" })).toBe(true);
+    expect(isHighSignalLiveModelRef({ provider: "openrouter", id: "minimax/minimax-m2.7" })).toBe(
+      true,
+    );
+  });
 });
 
 describe("selectHighSignalLiveItems", () => {
diff --git a/src/agents/models.profiles.live.test.ts b/src/agents/models.profiles.live.test.ts
index bfb41c87052..2e13a477e01 100644
--- a/src/agents/models.profiles.live.test.ts
+++ b/src/agents/models.profiles.live.test.ts
@@ -207,6 +207,13 @@ describe("isProviderUnavailableErrorMessage", () => {
       ),
     ).toBe(true);
   });
+
+  it("matches transient upstream 502 errors", () => {
+    expect(isProviderUnavailableErrorMessage("502 internal server error")).toBe(true);
+    expect(
+      isProviderUnavailableErrorMessage("provider returned error: 502 Internal Server Error"),
+    ).toBe(true);
+  });
 });
 
 function isChatGPTUsageLimitErrorMessage(raw: string): boolean {
@@ -250,7 +257,8 @@ function isProviderUnavailableErrorMessage(raw: string): boolean {
     msg.includes("temporarily rate-limited upstream") ||
     msg.includes("unable to access non-serverless model") ||
     msg.includes("create and start a new dedicated endpoint") ||
-    msg.includes("no available capacity was found for the model")
+    msg.includes("no available capacity was found for the model") ||
+    (msg.includes("502") && msg.includes("internal server error"))
   );
 }
 
@@ -286,6 +294,20 @@ function isUnsupportedThinkingToggleErrorMessage(raw: string): boolean {
   return /does not support parameter [`"]?enable_thinking[`"]?/i.test(raw);
 }
 
+function isUnsupportedPlanErrorMessage(raw: string): boolean {
+  return /current token plan (?:does )?not support (?:this )?model/i.test(raw);
+}
+
+describe("isUnsupportedPlanErrorMessage", () => {
+  it("matches provider plan-gated models", () => {
+    expect(isUnsupportedPlanErrorMessage("current token plan does not support this model")).toBe(
+      true,
+    );
+    expect(isUnsupportedPlanErrorMessage("your current token plan not support model")).toBe(true);
+    expect(isUnsupportedPlanErrorMessage("model not found")).toBe(false);
+  });
+});
+
 function toInt(value: string | undefined, fallback: number): number {
   const trimmed = value?.trim();
   if (!trimmed) {
@@ -500,7 +522,13 @@ async function runExtraTurnProbes(params: {
       fileText = extractAssistantText(retry);
     }
     if (!fileProbeTextMatches(fileText)) {
-      throw new Error(`file-read probe did not return ${LIVE_MODEL_FILE_PROBE_TOKEN}: ${fileText}`);
+      if (fileText.length === 0) {
+        logProgress(`${params.progressLabel}: file-read probe skipped (empty response)`);
+      } else {
+        throw new Error(
+          `file-read probe did not return ${LIVE_MODEL_FILE_PROBE_TOKEN}: ${fileText}`,
+        );
+      }
     }
   } else if (LIVE_FILE_PROBE_ENABLED) {
     logProgress(`${params.progressLabel}: file-read probe skipped (known empty route)`);
@@ -531,6 +559,10 @@ async function runExtraTurnProbes(params: {
   }
   const imageText = extractAssistantText(image);
   if (!imageProbeTextMatches(imageText)) {
+    if (imageText.length === 0) {
+      logProgress(`${params.progressLabel}: image probe skipped (empty response)`);
+      return;
+    }
     throw new Error(`image probe did not return ok: ${imageText}`);
   }
 }
@@ -847,7 +879,10 @@ describeLive("live models (profile keys)", () => {
               ok.text.length === 0 &&
               allowNotFoundSkip &&
               (model.provider === "fireworks" ||
+                model.provider === "google-antigravity" ||
                 model.provider === "minimax" ||
+                model.provider === "openai-codex" ||
+                model.provider === "xai" ||
                 model.provider === "zai")
             ) {
               skipped.push({
@@ -857,18 +892,6 @@ describeLive("live models (profile keys)", () => {
               logProgress(`${progressLabel}: skip (empty response)`);
               break;
             }
-            if (
-              ok.text.length === 0 &&
-              allowNotFoundSkip &&
-              (model.provider === "google-antigravity" || model.provider === "openai-codex")
-            ) {
-              skipped.push({
-                model: id,
-                reason: "no text returned (provider returned empty content)",
-              });
-              logProgress(`${progressLabel}: skip (empty response)`);
-              break;
-            }
             expect(ok.text.length).toBeGreaterThan(0);
             await runExtraTurnProbes({
               model,
@@ -921,7 +944,9 @@ describeLive("live models (profile keys)", () => {
             }
             if (
               allowNotFoundSkip &&
-              (model.provider === "minimax" || model.provider === "zai") &&
+              (model.provider === "minimax" ||
+                model.provider === "zai" ||
+                model.provider === "openrouter") &&
               isRateLimitErrorMessage(message)
             ) {
               skipped.push({ model: id, reason: message });
@@ -1012,6 +1037,11 @@ describeLive("live models (profile keys)", () => {
               logProgress(`${progressLabel}: skip (thinking toggle unsupported)`);
               break;
             }
+            if (allowNotFoundSkip && isUnsupportedPlanErrorMessage(message)) {
+              skipped.push({ model: id, reason: message });
+              logProgress(`${progressLabel}: skip (plan unsupported)`);
+              break;
+            }
             if (
               allowNotFoundSkip &&
               model.provider === "ollama" &&