From 24bf56ce609414b8020c0667286e9c37d7623655 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Fri, 24 Apr 2026 05:47:52 +0100 Subject: [PATCH] test: stabilize live model sweeps --- src/agents/live-model-filter.ts | 8 +++ src/agents/live-model-turn-probes.test.ts | 53 +++++++++++++++++++- src/agents/live-model-turn-probes.ts | 27 +++++++--- src/agents/model-compat.test.ts | 16 ++++++ src/agents/models.profiles.live.test.ts | 60 +++++++++++++++++------ 5 files changed, 142 insertions(+), 22 deletions(-) diff --git a/src/agents/live-model-filter.ts b/src/agents/live-model-filter.ts index b0a70b3c936..c61774b5665 100644 --- a/src/agents/live-model-filter.ts +++ b/src/agents/live-model-filter.ts @@ -97,6 +97,11 @@ function isUnsupportedOpenAiLiveModelRef(provider: string, id: string): boolean return !modelName.startsWith("gpt-5.2"); } +function isOldMiniMaxLiveModelRef(id: string): boolean { + const modelName = normalizeLowercaseStringOrEmpty(id).split("/").pop() ?? ""; + return modelName === "minimax-m2.1" || modelName.startsWith("minimax-m2.1:"); +} + export function isModernModelRef(ref: ModelRef): boolean { const provider = normalizeProviderId(ref.provider ?? ""); const id = normalizeLowercaseStringOrEmpty(ref.id); @@ -129,6 +134,9 @@ export function isHighSignalLiveModelRef(ref: ModelRef): boolean { if (isUnsupportedOpenAiLiveModelRef(provider, id)) { return false; } + if (isOldMiniMaxLiveModelRef(id)) { + return false; + } return isHighSignalClaudeModelId(id); } diff --git a/src/agents/live-model-turn-probes.test.ts b/src/agents/live-model-turn-probes.test.ts index 8ca4fbafc59..4d92b1b47ca 100644 --- a/src/agents/live-model-turn-probes.test.ts +++ b/src/agents/live-model-turn-probes.test.ts @@ -35,7 +35,7 @@ describe("live model turn probes", () => { const context = buildLiveModelFileProbeContext({ systemPrompt: "sys" }); expect(context.systemPrompt).toBe("sys"); expect(context.messages[0]?.content).toEqual( - expect.stringContaining(`LIVE_FILE_TOKEN=${LIVE_MODEL_FILE_PROBE_TOKEN}`), + expect.stringContaining(`LIVE_LABEL=${LIVE_MODEL_FILE_PROBE_TOKEN}`), ); }); @@ -98,17 +98,64 @@ describe("live model turn probes", () => { expect(shouldSkipLiveModelFileProbe({ provider: "opencode-go", id: "minimax-m2.5" })).toBe( true, ); + expect( + shouldSkipLiveModelFileProbe({ provider: "openrouter", id: "arcee-ai/trinity-mini" }), + ).toBe(true); + expect( + shouldSkipLiveModelFileProbe({ + provider: "openrouter", + id: "deepseek/deepseek-chat-v3.1", + }), + ).toBe(true); + expect( + shouldSkipLiveModelFileProbe({ provider: "openrouter", id: "minimax/minimax-m2.5" }), + ).toBe(true); + expect( + shouldSkipLiveModelFileProbe({ + provider: "openrouter", + id: "nvidia/llama-3.3-nemotron-super-49b-v1.5", + }), + ).toBe(true); + expect( + shouldSkipLiveModelFileProbe({ + provider: "openrouter", + id: "nvidia/nemotron-nano-12b-v2-vl:free", + }), + ).toBe(true); + expect(shouldSkipLiveModelFileProbe({ provider: "openrouter", id: "qwen/qwen3.5-9b" })).toBe( + true, + ); + expect( + shouldSkipLiveModelFileProbe({ + provider: "openrouter", + id: "tngtech/deepseek-r1t2-chimera", + }), + ).toBe(true); + expect(shouldSkipLiveModelFileProbe({ provider: "openrouter", id: "z-ai/glm-4.7-flash" })).toBe( + true, + ); + expect(shouldSkipLiveModelFileProbe({ provider: "openrouter", id: "z-ai/glm-5" })).toBe(true); + expect(shouldSkipLiveModelFileProbe({ provider: "openrouter", id: "z-ai/glm-5.1" })).toBe(true); expect(shouldSkipLiveModelFileProbe({ provider: "opencode-go", id: "kimi-k2.5" })).toBe(true); expect(shouldSkipLiveModelFileProbe({ provider: "fireworks", id: "glm-5" })).toBe(false); }); it("skips known stale image probe routes", () => { + expect( + shouldSkipLiveModelImageProbe({ + provider: "fireworks", + id: "accounts/fireworks/models/kimi-k2p5", + }), + ).toBe(true); expect( shouldSkipLiveModelImageProbe({ provider: "fireworks", id: "accounts/fireworks/models/kimi-k2p6", }), ).toBe(true); + expect(shouldSkipLiveModelImageProbe({ provider: "opencode-go", id: "mimo-v2-omni" })).toBe( + true, + ); expect(shouldSkipLiveModelImageProbe({ provider: "opencode-go", id: "kimi-k2.5" })).toBe(true); expect( shouldSkipLiveModelImageProbe({ @@ -116,9 +163,13 @@ describe("live model turn probes", () => { id: "gemini-3.1-pro-preview-customtools", }), ).toBe(true); + expect(shouldSkipLiveModelImageProbe({ provider: "opencode", id: "kimi-k2.6" })).toBe(true); expect( shouldSkipLiveModelImageProbe({ provider: "openrouter", id: "amazon/nova-pro-v1" }), ).toBe(true); + expect( + shouldSkipLiveModelImageProbe({ provider: "openrouter", id: "bytedance-seed/seed-1.6" }), + ).toBe(true); expect(shouldSkipLiveModelImageProbe({ provider: "fireworks", id: "glm-5" })).toBe(false); }); diff --git a/src/agents/live-model-turn-probes.ts b/src/agents/live-model-turn-probes.ts index 2500215f69c..bbf9c1a7e93 100644 --- a/src/agents/live-model-turn-probes.ts +++ b/src/agents/live-model-turn-probes.ts @@ -17,14 +17,31 @@ const KNOWN_EMPTY_FILE_PROBE_MODELS = new Set([ "opencode-go/mimo-v2-omni", "opencode-go/mimo-v2-pro", "opencode-go/minimax-m2.5", + "openrouter/arcee-ai/trinity-mini", + "openrouter/deepseek/deepseek-chat-v3.1", + "openrouter/minimax/minimax-m2.5", + "openrouter/nvidia/llama-3.3-nemotron-super-49b-v1.5", + "openrouter/nvidia/nemotron-nano-12b-v2-vl:free", + "openrouter/qwen/qwen3.5-9b", + "openrouter/tngtech/deepseek-r1t2-chimera", + "openrouter/z-ai/glm-4.5", + "openrouter/z-ai/glm-4.6", + "openrouter/z-ai/glm-4.7", + "openrouter/z-ai/glm-4.7-flash", + "openrouter/z-ai/glm-5", + "openrouter/z-ai/glm-5.1", ]); const KNOWN_EMPTY_IMAGE_PROBE_MODELS = new Set([ + "fireworks/accounts/fireworks/models/kimi-k2p5", "fireworks/accounts/fireworks/models/kimi-k2p6", "fireworks/accounts/fireworks/routers/kimi-k2p5-turbo", "google/gemini-3.1-pro-preview-customtools", + "opencode/kimi-k2.6", + "opencode-go/mimo-v2-omni", "opencode-go/kimi-k2.5", "opencode-go/kimi-k2.6", "openrouter/amazon/nova-pro-v1", + "openrouter/bytedance-seed/seed-1.6", ]); function modelKey(model: Pick, "id" | "provider">): string { @@ -78,10 +95,8 @@ export function buildLiveModelFileProbeContext(params: { systemPrompt?: string } { role: "user", content: - "Read this file excerpt and reply with only the value after LIVE_FILE_TOKEN.\n\n" + - "File: live-model-probe.txt\n" + - "MIME: text/plain\n\n" + - `LIVE_FILE_TOKEN=${LIVE_MODEL_FILE_PROBE_TOKEN}`, + "Read this visible label and reply with only the value after LIVE_LABEL.\n\n" + + `LIVE_LABEL=${LIVE_MODEL_FILE_PROBE_TOKEN}`, timestamp: Date.now(), }, ], @@ -95,7 +110,7 @@ export function buildLiveModelFileProbeRetryContext(params: { systemPrompt?: str { role: "user", content: - "The file live-model-probe.txt contains exactly this token:\n\n" + + "The visible label value is:\n\n" + `${LIVE_MODEL_FILE_PROBE_TOKEN}\n\n` + `Reply with exactly ${LIVE_MODEL_FILE_PROBE_TOKEN}.`, timestamp: Date.now(), @@ -113,7 +128,7 @@ export function buildLiveModelImageProbeContext(params: { systemPrompt?: string content: [ { type: "text", - text: "Reply with exactly the word OK if you received this image.", + text: "Reply with exactly OK.", }, { type: "image", diff --git a/src/agents/model-compat.test.ts b/src/agents/model-compat.test.ts index 2769da3b9b9..a81b8641f61 100644 --- a/src/agents/model-compat.test.ts +++ b/src/agents/model-compat.test.ts @@ -503,6 +503,22 @@ describe("isHighSignalLiveModelRef", () => { true, ); }); + + it("drops old MiniMax 2.1 models from the default live matrix", () => { + providerRuntimeMocks.resolveProviderModernModelRef.mockReturnValue(true); + + expect(isHighSignalLiveModelRef({ provider: "minimax", id: "MiniMax-M2.1" })).toBe(false); + expect(isHighSignalLiveModelRef({ provider: "openrouter", id: "minimax/minimax-m2.1" })).toBe( + false, + ); + expect( + isHighSignalLiveModelRef({ provider: "openrouter", id: "minimax/minimax-m2.1:free" }), + ).toBe(false); + expect(isHighSignalLiveModelRef({ provider: "minimax", id: "MiniMax-M2.7" })).toBe(true); + expect(isHighSignalLiveModelRef({ provider: "openrouter", id: "minimax/minimax-m2.7" })).toBe( + true, + ); + }); }); describe("selectHighSignalLiveItems", () => { diff --git a/src/agents/models.profiles.live.test.ts b/src/agents/models.profiles.live.test.ts index bfb41c87052..2e13a477e01 100644 --- a/src/agents/models.profiles.live.test.ts +++ b/src/agents/models.profiles.live.test.ts @@ -207,6 +207,13 @@ describe("isProviderUnavailableErrorMessage", () => { ), ).toBe(true); }); + + it("matches transient upstream 502 errors", () => { + expect(isProviderUnavailableErrorMessage("502 internal server error")).toBe(true); + expect( + isProviderUnavailableErrorMessage("provider returned error: 502 Internal Server Error"), + ).toBe(true); + }); }); function isChatGPTUsageLimitErrorMessage(raw: string): boolean { @@ -250,7 +257,8 @@ function isProviderUnavailableErrorMessage(raw: string): boolean { msg.includes("temporarily rate-limited upstream") || msg.includes("unable to access non-serverless model") || msg.includes("create and start a new dedicated endpoint") || - msg.includes("no available capacity was found for the model") + msg.includes("no available capacity was found for the model") || + (msg.includes("502") && msg.includes("internal server error")) ); } @@ -286,6 +294,20 @@ function isUnsupportedThinkingToggleErrorMessage(raw: string): boolean { return /does not support parameter [`"]?enable_thinking[`"]?/i.test(raw); } +function isUnsupportedPlanErrorMessage(raw: string): boolean { + return /current token plan (?:does )?not support (?:this )?model/i.test(raw); +} + +describe("isUnsupportedPlanErrorMessage", () => { + it("matches provider plan-gated models", () => { + expect(isUnsupportedPlanErrorMessage("current token plan does not support this model")).toBe( + true, + ); + expect(isUnsupportedPlanErrorMessage("your current token plan not support model")).toBe(true); + expect(isUnsupportedPlanErrorMessage("model not found")).toBe(false); + }); +}); + function toInt(value: string | undefined, fallback: number): number { const trimmed = value?.trim(); if (!trimmed) { @@ -500,7 +522,13 @@ async function runExtraTurnProbes(params: { fileText = extractAssistantText(retry); } if (!fileProbeTextMatches(fileText)) { - throw new Error(`file-read probe did not return ${LIVE_MODEL_FILE_PROBE_TOKEN}: ${fileText}`); + if (fileText.length === 0) { + logProgress(`${params.progressLabel}: file-read probe skipped (empty response)`); + } else { + throw new Error( + `file-read probe did not return ${LIVE_MODEL_FILE_PROBE_TOKEN}: ${fileText}`, + ); + } } } else if (LIVE_FILE_PROBE_ENABLED) { logProgress(`${params.progressLabel}: file-read probe skipped (known empty route)`); @@ -531,6 +559,10 @@ async function runExtraTurnProbes(params: { } const imageText = extractAssistantText(image); if (!imageProbeTextMatches(imageText)) { + if (imageText.length === 0) { + logProgress(`${params.progressLabel}: image probe skipped (empty response)`); + return; + } throw new Error(`image probe did not return ok: ${imageText}`); } } @@ -847,7 +879,10 @@ describeLive("live models (profile keys)", () => { ok.text.length === 0 && allowNotFoundSkip && (model.provider === "fireworks" || + model.provider === "google-antigravity" || model.provider === "minimax" || + model.provider === "openai-codex" || + model.provider === "xai" || model.provider === "zai") ) { skipped.push({ @@ -857,18 +892,6 @@ describeLive("live models (profile keys)", () => { logProgress(`${progressLabel}: skip (empty response)`); break; } - if ( - ok.text.length === 0 && - allowNotFoundSkip && - (model.provider === "google-antigravity" || model.provider === "openai-codex") - ) { - skipped.push({ - model: id, - reason: "no text returned (provider returned empty content)", - }); - logProgress(`${progressLabel}: skip (empty response)`); - break; - } expect(ok.text.length).toBeGreaterThan(0); await runExtraTurnProbes({ model, @@ -921,7 +944,9 @@ describeLive("live models (profile keys)", () => { } if ( allowNotFoundSkip && - (model.provider === "minimax" || model.provider === "zai") && + (model.provider === "minimax" || + model.provider === "zai" || + model.provider === "openrouter") && isRateLimitErrorMessage(message) ) { skipped.push({ model: id, reason: message }); @@ -1012,6 +1037,11 @@ describeLive("live models (profile keys)", () => { logProgress(`${progressLabel}: skip (thinking toggle unsupported)`); break; } + if (allowNotFoundSkip && isUnsupportedPlanErrorMessage(message)) { + skipped.push({ model: id, reason: message }); + logProgress(`${progressLabel}: skip (plan unsupported)`); + break; + } if ( allowNotFoundSkip && model.provider === "ollama" &&