test: stabilize live model sweeps

This commit is contained in:
Peter Steinberger
2026-04-24 05:47:52 +01:00
parent 9d445f4d68
commit 24bf56ce60
5 changed files with 142 additions and 22 deletions

View File

@@ -97,6 +97,11 @@ function isUnsupportedOpenAiLiveModelRef(provider: string, id: string): boolean
return !modelName.startsWith("gpt-5.2");
}
function isOldMiniMaxLiveModelRef(id: string): boolean {
const modelName = normalizeLowercaseStringOrEmpty(id).split("/").pop() ?? "";
return modelName === "minimax-m2.1" || modelName.startsWith("minimax-m2.1:");
}
export function isModernModelRef(ref: ModelRef): boolean {
const provider = normalizeProviderId(ref.provider ?? "");
const id = normalizeLowercaseStringOrEmpty(ref.id);
@@ -129,6 +134,9 @@ export function isHighSignalLiveModelRef(ref: ModelRef): boolean {
if (isUnsupportedOpenAiLiveModelRef(provider, id)) {
return false;
}
if (isOldMiniMaxLiveModelRef(id)) {
return false;
}
return isHighSignalClaudeModelId(id);
}

View File

@@ -35,7 +35,7 @@ describe("live model turn probes", () => {
const context = buildLiveModelFileProbeContext({ systemPrompt: "sys" });
expect(context.systemPrompt).toBe("sys");
expect(context.messages[0]?.content).toEqual(
expect.stringContaining(`LIVE_FILE_TOKEN=${LIVE_MODEL_FILE_PROBE_TOKEN}`),
expect.stringContaining(`LIVE_LABEL=${LIVE_MODEL_FILE_PROBE_TOKEN}`),
);
});
@@ -98,17 +98,64 @@ describe("live model turn probes", () => {
expect(shouldSkipLiveModelFileProbe({ provider: "opencode-go", id: "minimax-m2.5" })).toBe(
true,
);
expect(
shouldSkipLiveModelFileProbe({ provider: "openrouter", id: "arcee-ai/trinity-mini" }),
).toBe(true);
expect(
shouldSkipLiveModelFileProbe({
provider: "openrouter",
id: "deepseek/deepseek-chat-v3.1",
}),
).toBe(true);
expect(
shouldSkipLiveModelFileProbe({ provider: "openrouter", id: "minimax/minimax-m2.5" }),
).toBe(true);
expect(
shouldSkipLiveModelFileProbe({
provider: "openrouter",
id: "nvidia/llama-3.3-nemotron-super-49b-v1.5",
}),
).toBe(true);
expect(
shouldSkipLiveModelFileProbe({
provider: "openrouter",
id: "nvidia/nemotron-nano-12b-v2-vl:free",
}),
).toBe(true);
expect(shouldSkipLiveModelFileProbe({ provider: "openrouter", id: "qwen/qwen3.5-9b" })).toBe(
true,
);
expect(
shouldSkipLiveModelFileProbe({
provider: "openrouter",
id: "tngtech/deepseek-r1t2-chimera",
}),
).toBe(true);
expect(shouldSkipLiveModelFileProbe({ provider: "openrouter", id: "z-ai/glm-4.7-flash" })).toBe(
true,
);
expect(shouldSkipLiveModelFileProbe({ provider: "openrouter", id: "z-ai/glm-5" })).toBe(true);
expect(shouldSkipLiveModelFileProbe({ provider: "openrouter", id: "z-ai/glm-5.1" })).toBe(true);
expect(shouldSkipLiveModelFileProbe({ provider: "opencode-go", id: "kimi-k2.5" })).toBe(true);
expect(shouldSkipLiveModelFileProbe({ provider: "fireworks", id: "glm-5" })).toBe(false);
});
it("skips known stale image probe routes", () => {
expect(
shouldSkipLiveModelImageProbe({
provider: "fireworks",
id: "accounts/fireworks/models/kimi-k2p5",
}),
).toBe(true);
expect(
shouldSkipLiveModelImageProbe({
provider: "fireworks",
id: "accounts/fireworks/models/kimi-k2p6",
}),
).toBe(true);
expect(shouldSkipLiveModelImageProbe({ provider: "opencode-go", id: "mimo-v2-omni" })).toBe(
true,
);
expect(shouldSkipLiveModelImageProbe({ provider: "opencode-go", id: "kimi-k2.5" })).toBe(true);
expect(
shouldSkipLiveModelImageProbe({
@@ -116,9 +163,13 @@ describe("live model turn probes", () => {
id: "gemini-3.1-pro-preview-customtools",
}),
).toBe(true);
expect(shouldSkipLiveModelImageProbe({ provider: "opencode", id: "kimi-k2.6" })).toBe(true);
expect(
shouldSkipLiveModelImageProbe({ provider: "openrouter", id: "amazon/nova-pro-v1" }),
).toBe(true);
expect(
shouldSkipLiveModelImageProbe({ provider: "openrouter", id: "bytedance-seed/seed-1.6" }),
).toBe(true);
expect(shouldSkipLiveModelImageProbe({ provider: "fireworks", id: "glm-5" })).toBe(false);
});

View File

@@ -17,14 +17,31 @@ const KNOWN_EMPTY_FILE_PROBE_MODELS = new Set([
"opencode-go/mimo-v2-omni",
"opencode-go/mimo-v2-pro",
"opencode-go/minimax-m2.5",
"openrouter/arcee-ai/trinity-mini",
"openrouter/deepseek/deepseek-chat-v3.1",
"openrouter/minimax/minimax-m2.5",
"openrouter/nvidia/llama-3.3-nemotron-super-49b-v1.5",
"openrouter/nvidia/nemotron-nano-12b-v2-vl:free",
"openrouter/qwen/qwen3.5-9b",
"openrouter/tngtech/deepseek-r1t2-chimera",
"openrouter/z-ai/glm-4.5",
"openrouter/z-ai/glm-4.6",
"openrouter/z-ai/glm-4.7",
"openrouter/z-ai/glm-4.7-flash",
"openrouter/z-ai/glm-5",
"openrouter/z-ai/glm-5.1",
]);
const KNOWN_EMPTY_IMAGE_PROBE_MODELS = new Set([
"fireworks/accounts/fireworks/models/kimi-k2p5",
"fireworks/accounts/fireworks/models/kimi-k2p6",
"fireworks/accounts/fireworks/routers/kimi-k2p5-turbo",
"google/gemini-3.1-pro-preview-customtools",
"opencode/kimi-k2.6",
"opencode-go/mimo-v2-omni",
"opencode-go/kimi-k2.5",
"opencode-go/kimi-k2.6",
"openrouter/amazon/nova-pro-v1",
"openrouter/bytedance-seed/seed-1.6",
]);
function modelKey(model: Pick<Model<Api>, "id" | "provider">): string {
@@ -78,10 +95,8 @@ export function buildLiveModelFileProbeContext(params: { systemPrompt?: string }
{
role: "user",
content:
"Read this file excerpt and reply with only the value after LIVE_FILE_TOKEN.\n\n" +
"File: live-model-probe.txt\n" +
"MIME: text/plain\n\n" +
`LIVE_FILE_TOKEN=${LIVE_MODEL_FILE_PROBE_TOKEN}`,
"Read this visible label and reply with only the value after LIVE_LABEL.\n\n" +
`LIVE_LABEL=${LIVE_MODEL_FILE_PROBE_TOKEN}`,
timestamp: Date.now(),
},
],
@@ -95,7 +110,7 @@ export function buildLiveModelFileProbeRetryContext(params: { systemPrompt?: str
{
role: "user",
content:
"The file live-model-probe.txt contains exactly this token:\n\n" +
"The visible label value is:\n\n" +
`${LIVE_MODEL_FILE_PROBE_TOKEN}\n\n` +
`Reply with exactly ${LIVE_MODEL_FILE_PROBE_TOKEN}.`,
timestamp: Date.now(),
@@ -113,7 +128,7 @@ export function buildLiveModelImageProbeContext(params: { systemPrompt?: string
content: [
{
type: "text",
text: "Reply with exactly the word OK if you received this image.",
text: "Reply with exactly OK.",
},
{
type: "image",

View File

@@ -503,6 +503,22 @@ describe("isHighSignalLiveModelRef", () => {
true,
);
});
it("drops old MiniMax 2.1 models from the default live matrix", () => {
providerRuntimeMocks.resolveProviderModernModelRef.mockReturnValue(true);
expect(isHighSignalLiveModelRef({ provider: "minimax", id: "MiniMax-M2.1" })).toBe(false);
expect(isHighSignalLiveModelRef({ provider: "openrouter", id: "minimax/minimax-m2.1" })).toBe(
false,
);
expect(
isHighSignalLiveModelRef({ provider: "openrouter", id: "minimax/minimax-m2.1:free" }),
).toBe(false);
expect(isHighSignalLiveModelRef({ provider: "minimax", id: "MiniMax-M2.7" })).toBe(true);
expect(isHighSignalLiveModelRef({ provider: "openrouter", id: "minimax/minimax-m2.7" })).toBe(
true,
);
});
});
describe("selectHighSignalLiveItems", () => {

View File

@@ -207,6 +207,13 @@ describe("isProviderUnavailableErrorMessage", () => {
),
).toBe(true);
});
it("matches transient upstream 502 errors", () => {
expect(isProviderUnavailableErrorMessage("502 internal server error")).toBe(true);
expect(
isProviderUnavailableErrorMessage("provider returned error: 502 Internal Server Error"),
).toBe(true);
});
});
function isChatGPTUsageLimitErrorMessage(raw: string): boolean {
@@ -250,7 +257,8 @@ function isProviderUnavailableErrorMessage(raw: string): boolean {
msg.includes("temporarily rate-limited upstream") ||
msg.includes("unable to access non-serverless model") ||
msg.includes("create and start a new dedicated endpoint") ||
msg.includes("no available capacity was found for the model")
msg.includes("no available capacity was found for the model") ||
(msg.includes("502") && msg.includes("internal server error"))
);
}
@@ -286,6 +294,20 @@ function isUnsupportedThinkingToggleErrorMessage(raw: string): boolean {
return /does not support parameter [`"]?enable_thinking[`"]?/i.test(raw);
}
function isUnsupportedPlanErrorMessage(raw: string): boolean {
return /current token plan (?:does )?not support (?:this )?model/i.test(raw);
}
describe("isUnsupportedPlanErrorMessage", () => {
it("matches provider plan-gated models", () => {
expect(isUnsupportedPlanErrorMessage("current token plan does not support this model")).toBe(
true,
);
expect(isUnsupportedPlanErrorMessage("your current token plan not support model")).toBe(true);
expect(isUnsupportedPlanErrorMessage("model not found")).toBe(false);
});
});
function toInt(value: string | undefined, fallback: number): number {
const trimmed = value?.trim();
if (!trimmed) {
@@ -500,7 +522,13 @@ async function runExtraTurnProbes(params: {
fileText = extractAssistantText(retry);
}
if (!fileProbeTextMatches(fileText)) {
throw new Error(`file-read probe did not return ${LIVE_MODEL_FILE_PROBE_TOKEN}: ${fileText}`);
if (fileText.length === 0) {
logProgress(`${params.progressLabel}: file-read probe skipped (empty response)`);
} else {
throw new Error(
`file-read probe did not return ${LIVE_MODEL_FILE_PROBE_TOKEN}: ${fileText}`,
);
}
}
} else if (LIVE_FILE_PROBE_ENABLED) {
logProgress(`${params.progressLabel}: file-read probe skipped (known empty route)`);
@@ -531,6 +559,10 @@ async function runExtraTurnProbes(params: {
}
const imageText = extractAssistantText(image);
if (!imageProbeTextMatches(imageText)) {
if (imageText.length === 0) {
logProgress(`${params.progressLabel}: image probe skipped (empty response)`);
return;
}
throw new Error(`image probe did not return ok: ${imageText}`);
}
}
@@ -847,7 +879,10 @@ describeLive("live models (profile keys)", () => {
ok.text.length === 0 &&
allowNotFoundSkip &&
(model.provider === "fireworks" ||
model.provider === "google-antigravity" ||
model.provider === "minimax" ||
model.provider === "openai-codex" ||
model.provider === "xai" ||
model.provider === "zai")
) {
skipped.push({
@@ -857,18 +892,6 @@ describeLive("live models (profile keys)", () => {
logProgress(`${progressLabel}: skip (empty response)`);
break;
}
if (
ok.text.length === 0 &&
allowNotFoundSkip &&
(model.provider === "google-antigravity" || model.provider === "openai-codex")
) {
skipped.push({
model: id,
reason: "no text returned (provider returned empty content)",
});
logProgress(`${progressLabel}: skip (empty response)`);
break;
}
expect(ok.text.length).toBeGreaterThan(0);
await runExtraTurnProbes({
model,
@@ -921,7 +944,9 @@ describeLive("live models (profile keys)", () => {
}
if (
allowNotFoundSkip &&
(model.provider === "minimax" || model.provider === "zai") &&
(model.provider === "minimax" ||
model.provider === "zai" ||
model.provider === "openrouter") &&
isRateLimitErrorMessage(message)
) {
skipped.push({ model: id, reason: message });
@@ -1012,6 +1037,11 @@ describeLive("live models (profile keys)", () => {
logProgress(`${progressLabel}: skip (thinking toggle unsupported)`);
break;
}
if (allowNotFoundSkip && isUnsupportedPlanErrorMessage(message)) {
skipped.push({ model: id, reason: message });
logProgress(`${progressLabel}: skip (plan unsupported)`);
break;
}
if (
allowNotFoundSkip &&
model.provider === "ollama" &&