test: stabilize live model sweeps

2026-05-06 12:50:42 +00:00 · 2026-04-24 05:47:52 +01:00
parent 9d445f4d68
commit 24bf56ce60
5 changed files with 142 additions and 22 deletions
--- a/src/agents/live-model-filter.ts
+++ b/src/agents/live-model-filter.ts
@@ -97,6 +97,11 @@ function isUnsupportedOpenAiLiveModelRef(provider: string, id: string): boolean
  return !modelName.startsWith("gpt-5.2");
 }

+function isOldMiniMaxLiveModelRef(id: string): boolean {
+  const modelName = normalizeLowercaseStringOrEmpty(id).split("/").pop() ?? "";
+  return modelName === "minimax-m2.1" || modelName.startsWith("minimax-m2.1:");
+}
+
 export function isModernModelRef(ref: ModelRef): boolean {
  const provider = normalizeProviderId(ref.provider ?? "");
  const id = normalizeLowercaseStringOrEmpty(ref.id);
@@ -129,6 +134,9 @@ export function isHighSignalLiveModelRef(ref: ModelRef): boolean {
  if (isUnsupportedOpenAiLiveModelRef(provider, id)) {
    return false;
  }
+  if (isOldMiniMaxLiveModelRef(id)) {
+    return false;
+  }
  return isHighSignalClaudeModelId(id);
 }

--- a/src/agents/live-model-turn-probes.test.ts
+++ b/src/agents/live-model-turn-probes.test.ts
@@ -35,7 +35,7 @@ describe("live model turn probes", () => {
    const context = buildLiveModelFileProbeContext({ systemPrompt: "sys" });
    expect(context.systemPrompt).toBe("sys");
    expect(context.messages[0]?.content).toEqual(
-      expect.stringContaining(`LIVE_FILE_TOKEN=${LIVE_MODEL_FILE_PROBE_TOKEN}`),
+      expect.stringContaining(`LIVE_LABEL=${LIVE_MODEL_FILE_PROBE_TOKEN}`),
    );
  });

@@ -98,17 +98,64 @@ describe("live model turn probes", () => {
    expect(shouldSkipLiveModelFileProbe({ provider: "opencode-go", id: "minimax-m2.5" })).toBe(
      true,
    );
+    expect(
+      shouldSkipLiveModelFileProbe({ provider: "openrouter", id: "arcee-ai/trinity-mini" }),
+    ).toBe(true);
+    expect(
+      shouldSkipLiveModelFileProbe({
+        provider: "openrouter",
+        id: "deepseek/deepseek-chat-v3.1",
+      }),
+    ).toBe(true);
+    expect(
+      shouldSkipLiveModelFileProbe({ provider: "openrouter", id: "minimax/minimax-m2.5" }),
+    ).toBe(true);
+    expect(
+      shouldSkipLiveModelFileProbe({
+        provider: "openrouter",
+        id: "nvidia/llama-3.3-nemotron-super-49b-v1.5",
+      }),
+    ).toBe(true);
+    expect(
+      shouldSkipLiveModelFileProbe({
+        provider: "openrouter",
+        id: "nvidia/nemotron-nano-12b-v2-vl:free",
+      }),
+    ).toBe(true);
+    expect(shouldSkipLiveModelFileProbe({ provider: "openrouter", id: "qwen/qwen3.5-9b" })).toBe(
+      true,
+    );
+    expect(
+      shouldSkipLiveModelFileProbe({
+        provider: "openrouter",
+        id: "tngtech/deepseek-r1t2-chimera",
+      }),
+    ).toBe(true);
+    expect(shouldSkipLiveModelFileProbe({ provider: "openrouter", id: "z-ai/glm-4.7-flash" })).toBe(
+      true,
+    );
+    expect(shouldSkipLiveModelFileProbe({ provider: "openrouter", id: "z-ai/glm-5" })).toBe(true);
+    expect(shouldSkipLiveModelFileProbe({ provider: "openrouter", id: "z-ai/glm-5.1" })).toBe(true);
    expect(shouldSkipLiveModelFileProbe({ provider: "opencode-go", id: "kimi-k2.5" })).toBe(true);
    expect(shouldSkipLiveModelFileProbe({ provider: "fireworks", id: "glm-5" })).toBe(false);
  });

  it("skips known stale image probe routes", () => {
+    expect(
+      shouldSkipLiveModelImageProbe({
+        provider: "fireworks",
+        id: "accounts/fireworks/models/kimi-k2p5",
+      }),
+    ).toBe(true);
    expect(
      shouldSkipLiveModelImageProbe({
        provider: "fireworks",
        id: "accounts/fireworks/models/kimi-k2p6",
      }),
    ).toBe(true);
+    expect(shouldSkipLiveModelImageProbe({ provider: "opencode-go", id: "mimo-v2-omni" })).toBe(
+      true,
+    );
    expect(shouldSkipLiveModelImageProbe({ provider: "opencode-go", id: "kimi-k2.5" })).toBe(true);
    expect(
      shouldSkipLiveModelImageProbe({
@@ -116,9 +163,13 @@ describe("live model turn probes", () => {
        id: "gemini-3.1-pro-preview-customtools",
      }),
    ).toBe(true);
+    expect(shouldSkipLiveModelImageProbe({ provider: "opencode", id: "kimi-k2.6" })).toBe(true);
    expect(
      shouldSkipLiveModelImageProbe({ provider: "openrouter", id: "amazon/nova-pro-v1" }),
    ).toBe(true);
+    expect(
+      shouldSkipLiveModelImageProbe({ provider: "openrouter", id: "bytedance-seed/seed-1.6" }),
+    ).toBe(true);
    expect(shouldSkipLiveModelImageProbe({ provider: "fireworks", id: "glm-5" })).toBe(false);
  });

--- a/src/agents/live-model-turn-probes.ts
+++ b/src/agents/live-model-turn-probes.ts
@@ -17,14 +17,31 @@ const KNOWN_EMPTY_FILE_PROBE_MODELS = new Set([
  "opencode-go/mimo-v2-omni",
  "opencode-go/mimo-v2-pro",
  "opencode-go/minimax-m2.5",
+  "openrouter/arcee-ai/trinity-mini",
+  "openrouter/deepseek/deepseek-chat-v3.1",
+  "openrouter/minimax/minimax-m2.5",
+  "openrouter/nvidia/llama-3.3-nemotron-super-49b-v1.5",
+  "openrouter/nvidia/nemotron-nano-12b-v2-vl:free",
+  "openrouter/qwen/qwen3.5-9b",
+  "openrouter/tngtech/deepseek-r1t2-chimera",
+  "openrouter/z-ai/glm-4.5",
+  "openrouter/z-ai/glm-4.6",
+  "openrouter/z-ai/glm-4.7",
+  "openrouter/z-ai/glm-4.7-flash",
+  "openrouter/z-ai/glm-5",
+  "openrouter/z-ai/glm-5.1",
 ]);
 const KNOWN_EMPTY_IMAGE_PROBE_MODELS = new Set([
+  "fireworks/accounts/fireworks/models/kimi-k2p5",
  "fireworks/accounts/fireworks/models/kimi-k2p6",
  "fireworks/accounts/fireworks/routers/kimi-k2p5-turbo",
  "google/gemini-3.1-pro-preview-customtools",
+  "opencode/kimi-k2.6",
+  "opencode-go/mimo-v2-omni",
  "opencode-go/kimi-k2.5",
  "opencode-go/kimi-k2.6",
  "openrouter/amazon/nova-pro-v1",
+  "openrouter/bytedance-seed/seed-1.6",
 ]);

 function modelKey(model: Pick<Model<Api>, "id" | "provider">): string {
@@ -78,10 +95,8 @@ export function buildLiveModelFileProbeContext(params: { systemPrompt?: string }
      {
        role: "user",
        content:
-          "Read this file excerpt and reply with only the value after LIVE_FILE_TOKEN.\n\n" +
-          "File: live-model-probe.txt\n" +
-          "MIME: text/plain\n\n" +
-          `LIVE_FILE_TOKEN=${LIVE_MODEL_FILE_PROBE_TOKEN}`,
+          "Read this visible label and reply with only the value after LIVE_LABEL.\n\n" +
+          `LIVE_LABEL=${LIVE_MODEL_FILE_PROBE_TOKEN}`,
        timestamp: Date.now(),
      },
    ],
@@ -95,7 +110,7 @@ export function buildLiveModelFileProbeRetryContext(params: { systemPrompt?: str
      {
        role: "user",
        content:
-          "The file live-model-probe.txt contains exactly this token:\n\n" +
+          "The visible label value is:\n\n" +
          `${LIVE_MODEL_FILE_PROBE_TOKEN}\n\n` +
          `Reply with exactly ${LIVE_MODEL_FILE_PROBE_TOKEN}.`,
        timestamp: Date.now(),
@@ -113,7 +128,7 @@ export function buildLiveModelImageProbeContext(params: { systemPrompt?: string
        content: [
          {
            type: "text",
-            text: "Reply with exactly the word OK if you received this image.",
+            text: "Reply with exactly OK.",
          },
          {
            type: "image",
--- a/src/agents/model-compat.test.ts
+++ b/src/agents/model-compat.test.ts
@@ -503,6 +503,22 @@ describe("isHighSignalLiveModelRef", () => {
      true,
    );
  });
+
+  it("drops old MiniMax 2.1 models from the default live matrix", () => {
+    providerRuntimeMocks.resolveProviderModernModelRef.mockReturnValue(true);
+
+    expect(isHighSignalLiveModelRef({ provider: "minimax", id: "MiniMax-M2.1" })).toBe(false);
+    expect(isHighSignalLiveModelRef({ provider: "openrouter", id: "minimax/minimax-m2.1" })).toBe(
+      false,
+    );
+    expect(
+      isHighSignalLiveModelRef({ provider: "openrouter", id: "minimax/minimax-m2.1:free" }),
+    ).toBe(false);
+    expect(isHighSignalLiveModelRef({ provider: "minimax", id: "MiniMax-M2.7" })).toBe(true);
+    expect(isHighSignalLiveModelRef({ provider: "openrouter", id: "minimax/minimax-m2.7" })).toBe(
+      true,
+    );
+  });
 });

 describe("selectHighSignalLiveItems", () => {
--- a/src/agents/models.profiles.live.test.ts
+++ b/src/agents/models.profiles.live.test.ts
@@ -207,6 +207,13 @@ describe("isProviderUnavailableErrorMessage", () => {
      ),
    ).toBe(true);
  });
+
+  it("matches transient upstream 502 errors", () => {
+    expect(isProviderUnavailableErrorMessage("502 internal server error")).toBe(true);
+    expect(
+      isProviderUnavailableErrorMessage("provider returned error: 502 Internal Server Error"),
+    ).toBe(true);
+  });
 });

 function isChatGPTUsageLimitErrorMessage(raw: string): boolean {
@@ -250,7 +257,8 @@ function isProviderUnavailableErrorMessage(raw: string): boolean {
    msg.includes("temporarily rate-limited upstream") ||
    msg.includes("unable to access non-serverless model") ||
    msg.includes("create and start a new dedicated endpoint") ||
-    msg.includes("no available capacity was found for the model")
+    msg.includes("no available capacity was found for the model") ||
+    (msg.includes("502") && msg.includes("internal server error"))
  );
 }

@@ -286,6 +294,20 @@ function isUnsupportedThinkingToggleErrorMessage(raw: string): boolean {
  return /does not support parameter [`"]?enable_thinking[`"]?/i.test(raw);
 }

+function isUnsupportedPlanErrorMessage(raw: string): boolean {
+  return /current token plan (?:does )?not support (?:this )?model/i.test(raw);
+}
+
+describe("isUnsupportedPlanErrorMessage", () => {
+  it("matches provider plan-gated models", () => {
+    expect(isUnsupportedPlanErrorMessage("current token plan does not support this model")).toBe(
+      true,
+    );
+    expect(isUnsupportedPlanErrorMessage("your current token plan not support model")).toBe(true);
+    expect(isUnsupportedPlanErrorMessage("model not found")).toBe(false);
+  });
+});
+
 function toInt(value: string | undefined, fallback: number): number {
  const trimmed = value?.trim();
  if (!trimmed) {
@@ -500,7 +522,13 @@ async function runExtraTurnProbes(params: {
      fileText = extractAssistantText(retry);
    }
    if (!fileProbeTextMatches(fileText)) {
-      throw new Error(`file-read probe did not return ${LIVE_MODEL_FILE_PROBE_TOKEN}: ${fileText}`);
+      if (fileText.length === 0) {
+        logProgress(`${params.progressLabel}: file-read probe skipped (empty response)`);
+      } else {
+        throw new Error(
+          `file-read probe did not return ${LIVE_MODEL_FILE_PROBE_TOKEN}: ${fileText}`,
+        );
+      }
    }
  } else if (LIVE_FILE_PROBE_ENABLED) {
    logProgress(`${params.progressLabel}: file-read probe skipped (known empty route)`);
@@ -531,6 +559,10 @@ async function runExtraTurnProbes(params: {
  }
  const imageText = extractAssistantText(image);
  if (!imageProbeTextMatches(imageText)) {
+    if (imageText.length === 0) {
+      logProgress(`${params.progressLabel}: image probe skipped (empty response)`);
+      return;
+    }
    throw new Error(`image probe did not return ok: ${imageText}`);
  }
 }
@@ -847,7 +879,10 @@ describeLive("live models (profile keys)", () => {
              ok.text.length === 0 &&
              allowNotFoundSkip &&
              (model.provider === "fireworks" ||
+                model.provider === "google-antigravity" ||
                model.provider === "minimax" ||
+                model.provider === "openai-codex" ||
+                model.provider === "xai" ||
                model.provider === "zai")
            ) {
              skipped.push({
@@ -857,18 +892,6 @@ describeLive("live models (profile keys)", () => {
              logProgress(`${progressLabel}: skip (empty response)`);
              break;
            }
-            if (
-              ok.text.length === 0 &&
-              allowNotFoundSkip &&
-              (model.provider === "google-antigravity" || model.provider === "openai-codex")
-            ) {
-              skipped.push({
-                model: id,
-                reason: "no text returned (provider returned empty content)",
-              });
-              logProgress(`${progressLabel}: skip (empty response)`);
-              break;
-            }
            expect(ok.text.length).toBeGreaterThan(0);
            await runExtraTurnProbes({
              model,
@@ -921,7 +944,9 @@ describeLive("live models (profile keys)", () => {
            }
            if (
              allowNotFoundSkip &&
-              (model.provider === "minimax" || model.provider === "zai") &&
+              (model.provider === "minimax" ||
+                model.provider === "zai" ||
+                model.provider === "openrouter") &&
              isRateLimitErrorMessage(message)
            ) {
              skipped.push({ model: id, reason: message });
@@ -1012,6 +1037,11 @@ describeLive("live models (profile keys)", () => {
              logProgress(`${progressLabel}: skip (thinking toggle unsupported)`);
              break;
            }
+            if (allowNotFoundSkip && isUnsupportedPlanErrorMessage(message)) {
+              skipped.push({ model: id, reason: message });
+              logProgress(`${progressLabel}: skip (plan unsupported)`);
+              break;
+            }
            if (
              allowNotFoundSkip &&
              model.provider === "ollama" &&