From a6894a5238fec5ee8469f5e06d97ea41918e6236 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sun, 5 Apr 2026 08:03:47 +0100 Subject: [PATCH] test: harden live model skip handling --- src/agents/live-model-errors.ts | 12 ++++ src/agents/models.profiles.live.test.ts | 60 ++++++++++++++++++- .../gateway-models.profiles.live.test.ts | 36 ++++++++++- 3 files changed, 106 insertions(+), 2 deletions(-) diff --git a/src/agents/live-model-errors.ts b/src/agents/live-model-errors.ts index 56ba30a826b..a553a75cf03 100644 --- a/src/agents/live-model-errors.ts +++ b/src/agents/live-model-errors.ts @@ -12,6 +12,18 @@ export function isModelNotFoundErrorMessage(raw: string): boolean { if (/model:\s*[a-z0-9._-]+/i.test(msg) && /not(?:[_\-\s])?found/i.test(msg)) { return true; } + if (/does not exist or you do not have access/i.test(msg)) { + return true; + } + if (/deprecated/i.test(msg) && /upgrade to/i.test(msg)) { + return true; + } + if (/stealth model/i.test(msg) && /find it here/i.test(msg)) { + return true; + } + if (/is not a valid model id/i.test(msg)) { + return true; + } return false; } diff --git a/src/agents/models.profiles.live.test.ts b/src/agents/models.profiles.live.test.ts index 4aea0ea4fc2..e0b13de083a 100644 --- a/src/agents/models.profiles.live.test.ts +++ b/src/agents/models.profiles.live.test.ts @@ -150,6 +150,18 @@ function isModelNotFoundErrorMessage(raw: string): boolean { if (/model:\s*[a-z0-9._-]+/i.test(msg) && /not(?:[\s_-]+)?found/i.test(msg)) { return true; } + if (/does not exist or you do not have access/i.test(msg)) { + return true; + } + if (/deprecated/i.test(msg) && /upgrade to/i.test(msg)) { + return true; + } + if (/stealth model/i.test(msg) && /find it here/i.test(msg)) { + return true; + } + if (/is not a valid model id/i.test(msg)) { + return true; + } return false; } @@ -188,7 +200,11 @@ function isProviderUnavailableErrorMessage(raw: string): boolean { msg.includes("no allowed providers are available") || msg.includes("provider unavailable") || msg.includes("upstream provider unavailable") || - msg.includes("upstream error from google") + msg.includes("upstream error from google") || + msg.includes("temporarily rate-limited upstream") || + msg.includes("unable to access non-serverless model") || + msg.includes("create and start a new dedicated endpoint") || + msg.includes("no available capacity was found for the model") ); } @@ -201,6 +217,21 @@ function isOllamaUnavailableErrorMessage(raw: string): boolean { ); } +function isAudioOnlyModelErrorMessage(raw: string): boolean { + return /requires that either input content or output modality contain audio/i.test(raw); +} + +function isUnsupportedReasoningEffortErrorMessage(raw: string): boolean { + return ( + /does not support parameter reasoningeffort/i.test(raw) || + /unsupported value:\s*'low'.*reasoning\.effort.*supported values are:\s*'medium'/i.test(raw) + ); +} + +function isUnsupportedThinkingToggleErrorMessage(raw: string): boolean { + return /does not support parameter [`"]?enable_thinking[`"]?/i.test(raw); +} + function toInt(value: string | undefined, fallback: number): number { const trimmed = value?.trim(); if (!trimmed) { @@ -260,6 +291,18 @@ function resolveTestReasoning( return undefined; } const id = model.id.toLowerCase(); + if (id.includes("deep-research")) { + return "medium"; + } + if (id === "gpt-5.4-pro") { + return "medium"; + } + if (model.provider === "openrouter" && id.startsWith("qwq")) { + return undefined; + } + if (model.provider === "xai" && id.startsWith("grok-4")) { + return undefined; + } if (model.provider === "openai" || model.provider === "openai-codex") { if (id.includes("pro")) { return "high"; @@ -775,6 +818,21 @@ describeLive("live models (profile keys)", () => { logProgress(`${progressLabel}: skip (provider unavailable)`); break; } + if (allowNotFoundSkip && isAudioOnlyModelErrorMessage(message)) { + skipped.push({ model: id, reason: message }); + logProgress(`${progressLabel}: skip (audio-only model)`); + break; + } + if (allowNotFoundSkip && isUnsupportedReasoningEffortErrorMessage(message)) { + skipped.push({ model: id, reason: message }); + logProgress(`${progressLabel}: skip (reasoning unsupported)`); + break; + } + if (allowNotFoundSkip && isUnsupportedThinkingToggleErrorMessage(message)) { + skipped.push({ model: id, reason: message }); + logProgress(`${progressLabel}: skip (thinking toggle unsupported)`); + break; + } if ( allowNotFoundSkip && model.provider === "ollama" && diff --git a/src/gateway/gateway-models.profiles.live.test.ts b/src/gateway/gateway-models.profiles.live.test.ts index 1fef122e779..378d45d1e98 100644 --- a/src/gateway/gateway-models.profiles.live.test.ts +++ b/src/gateway/gateway-models.profiles.live.test.ts @@ -542,7 +542,11 @@ function isProviderUnavailableErrorMessage(raw: string): boolean { msg.includes("no allowed providers are available") || msg.includes("provider unavailable") || msg.includes("upstream provider unavailable") || - msg.includes("upstream error from google") + msg.includes("upstream error from google") || + msg.includes("temporarily rate-limited upstream") || + msg.includes("unable to access non-serverless model") || + msg.includes("create and start a new dedicated endpoint") || + msg.includes("no available capacity was found for the model") ); } @@ -555,6 +559,21 @@ function isOllamaUnavailableErrorMessage(raw: string): boolean { ); } +function isAudioOnlyModelErrorMessage(raw: string): boolean { + return /requires that either input content or output modality contain audio/i.test(raw); +} + +function isUnsupportedReasoningEffortErrorMessage(raw: string): boolean { + return ( + /does not support parameter reasoningeffort/i.test(raw) || + /unsupported value:\s*'low'.*reasoning\.effort.*supported values are:\s*'medium'/i.test(raw) + ); +} + +function isUnsupportedThinkingToggleErrorMessage(raw: string): boolean { + return /does not support parameter [`"]?enable_thinking[`"]?/i.test(raw); +} + function isInstructionsRequiredError(error: string): boolean { return /instructions are required/i.test(error); } @@ -1672,6 +1691,21 @@ async function runGatewayModelSuite(params: GatewayModelSuiteParams) { logProgress(`${progressLabel}: skip (provider unavailable)`); break; } + if (isAudioOnlyModelErrorMessage(message)) { + skippedCount += 1; + logProgress(`${progressLabel}: skip (audio-only model)`); + break; + } + if (isUnsupportedReasoningEffortErrorMessage(message)) { + skippedCount += 1; + logProgress(`${progressLabel}: skip (reasoning unsupported)`); + break; + } + if (isUnsupportedThinkingToggleErrorMessage(message)) { + skippedCount += 1; + logProgress(`${progressLabel}: skip (thinking toggle unsupported)`); + break; + } if (model.provider === "openrouter" && isPromptProbeMiss(message)) { skippedCount += 1; logProgress(`${progressLabel}: skip (openrouter prompt probe miss)`);