From 7db44b979ffb0e059be5dd200b19c4d0cbeaf0e3 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Fri, 15 May 2026 06:07:58 +0100 Subject: [PATCH] test: tolerate provider account drift in live CI --- .../live-cache-regression-runner.test.ts | 14 ++ src/agents/live-cache-regression-runner.ts | 121 +++++++++++++++--- src/agents/live-cache-test-support.ts | 5 +- src/agents/models.profiles.live.test.ts | 14 ++ src/gateway/gateway-cli-backend.live.test.ts | 13 ++ .../gateway-models.profiles.live.test.ts | 18 ++- 6 files changed, 168 insertions(+), 17 deletions(-) diff --git a/src/agents/live-cache-regression-runner.test.ts b/src/agents/live-cache-regression-runner.test.ts index b19f45b0c62..e68a25d4f72 100644 --- a/src/agents/live-cache-regression-runner.test.ts +++ b/src/agents/live-cache-regression-runner.test.ts @@ -84,6 +84,20 @@ describe("live cache regression runner", () => { ).toBe(false); }); + it("classifies Anthropic account drift as non-cache provider state", () => { + expect( + __testing.isAnthropicAccountDrift( + new Error("Your credit balance is too low to access the Anthropic API."), + ), + ).toBe(true); + expect( + __testing.isAnthropicAccountDrift( + '401 {"error":{"message":"The API key you provided is invalid."}}', + ), + ).toBe(true); + expect(__testing.isAnthropicAccountDrift("anthropic:image cacheRead=0 < min=4500")).toBe(false); + }); + it("retries a cache probe twice when provider text misses the sentinel", () => { expect( __testing.shouldRetryCacheProbeText({ diff --git a/src/agents/live-cache-regression-runner.ts b/src/agents/live-cache-regression-runner.ts index 3a89cdc4116..1e6841b59a7 100644 --- a/src/agents/live-cache-regression-runner.ts +++ b/src/agents/live-cache-regression-runner.ts @@ -13,9 +13,14 @@ import { completeSimpleWithLiveTimeout, computeCacheHitRate, extractAssistantText, + type LiveResolvedModel, logLiveCache, resolveLiveDirectModel, } from "./live-cache-test-support.js"; +import { + isAuthErrorMessage, + isBillingErrorMessage, +} from "./pi-embedded-helpers/failover-matches.js"; const OPENAI_TIMEOUT_MS = 120_000; const ANTHROPIC_TIMEOUT_MS = 120_000; @@ -31,7 +36,6 @@ const LIVE_TEST_PNG_URL = new URL( import.meta.url, ); -type LiveResolvedModel = Awaited>; type ProviderKey = keyof typeof LIVE_CACHE_REGRESSION_BASELINE; type CacheLane = "image" | "mcp" | "stable" | "tool"; type CacheUsage = { @@ -595,9 +599,88 @@ function appendBaselineFindings(target: BaselineFindings, source: BaselineFindin target.warnings.push(...source.warnings); } +function isAnthropicAccountDrift(error: unknown): boolean { + const message = error instanceof Error ? error.message : String(error); + return isBillingErrorMessage(message) || isAuthErrorMessage(message); +} + +function isAnthropicEmptyCacheProbe(error: unknown): boolean { + return error instanceof CacheProbeTextMismatchError && error.text.trim().length === 0; +} + +function cloneFixtureWithKey(fixture: LiveResolvedModel, apiKey: string): LiveResolvedModel { + return { ...fixture, apiKey }; +} + +async function runAnthropicCacheLane(params: { + fixture: LiveResolvedModel; + lane: CacheLane; + pngBase64: string; + runToken: string; + warnings: string[]; +}): Promise<{ attempt?: Awaited> }> { + const keys = + params.fixture.apiKeys && params.fixture.apiKeys.length > 0 + ? params.fixture.apiKeys + : [params.fixture.apiKey]; + let lastError: unknown; + for (const [index, apiKey] of keys.entries()) { + try { + return { + attempt: await runRepeatedLaneWithBaselineRetry({ + lane: params.lane, + providerTag: "anthropic", + fixture: cloneFixtureWithKey(params.fixture, apiKey), + runToken: params.runToken, + pngBase64: params.pngBase64, + }), + }; + } catch (error) { + lastError = error; + if (isAnthropicAccountDrift(error) && index + 1 < keys.length) { + logLiveCache(`anthropic ${params.lane} account drift; retrying with next key`); + continue; + } + break; + } + } + + if (isAnthropicAccountDrift(lastError) || isAnthropicEmptyCacheProbe(lastError)) { + const reason = isAnthropicEmptyCacheProbe(lastError) ? "empty response" : "account drift"; + const warning = `anthropic ${params.lane} skipped: ${reason}`; + params.warnings.push(warning); + logLiveCache(warning); + return {}; + } + throw lastError; +} + +async function runAnthropicDisabledCacheLane(params: { + fixture: LiveResolvedModel; + runToken: string; + warnings: string[]; +}): Promise { + try { + return await runAnthropicDisabledLane({ + fixture: params.fixture, + runToken: params.runToken, + sessionId: `live-cache-regression-${params.runToken}-anthropic-disabled`, + }); + } catch (error) { + if (isAnthropicAccountDrift(error) || isAnthropicEmptyCacheProbe(error)) { + const warning = "anthropic disabled skipped: account drift"; + params.warnings.push(warning); + logLiveCache(warning); + return undefined; + } + throw error; + } +} + export const __testing = { assertAgainstBaseline, evaluateAgainstBaseline, + isAnthropicAccountDrift, resolveCacheProbeMaxTokens, shouldAcceptEmptyOpenAICacheProbe, shouldRetryCacheProbeText, @@ -650,13 +733,17 @@ export async function runLiveCacheRegression(): Promise 0) { diff --git a/src/agents/live-cache-test-support.ts b/src/agents/live-cache-test-support.ts index cfad4014ceb..4b7fcf03c2e 100644 --- a/src/agents/live-cache-test-support.ts +++ b/src/agents/live-cache-test-support.ts @@ -22,8 +22,9 @@ export const LIVE_CACHE_TEST_ENABLED = const DEFAULT_HEARTBEAT_MS = 20_000; const DEFAULT_TIMEOUT_MS = 90_000; -type LiveResolvedModel = { +export type LiveResolvedModel = { apiKey: string; + apiKeys?: string[]; model: Model; }; @@ -189,6 +190,7 @@ export async function resolveLiveDirectModel(params: { return { model: selectedModel, apiKey: liveKeys[0] ?? "", + apiKeys: liveKeys, }; } @@ -237,5 +239,6 @@ export async function resolveLiveDirectModel(params: { return { model: resolvedModel, apiKey, + apiKeys: [apiKey], }; } diff --git a/src/agents/models.profiles.live.test.ts b/src/agents/models.profiles.live.test.ts index ca84ec7ae18..99bcfd2436c 100644 --- a/src/agents/models.profiles.live.test.ts +++ b/src/agents/models.profiles.live.test.ts @@ -53,6 +53,7 @@ import { isCloudflareOrHtmlErrorPage, isRateLimitErrorMessage, } from "./pi-embedded-helpers/errors.js"; +import { isAuthErrorMessage } from "./pi-embedded-helpers/failover-matches.js"; import { discoverAuthStorage, discoverModels, @@ -395,6 +396,14 @@ describe("isUnsupportedPlanErrorMessage", () => { }); }); +describe("isAuthErrorMessage", () => { + it("matches provider API key drift", () => { + expect( + isAuthErrorMessage('401 {"error":{"message":"The API key you provided is invalid."}}'), + ).toBe(true); + }); +}); + describe("isOpenRouterOpaqueBadRequestErrorMessage", () => { it("matches opaque OpenRouter upstream bad requests", () => { expect( @@ -1314,6 +1323,11 @@ describeLive("live models (profile keys)", () => { logProgress(`${progressLabel}: skip (provider unavailable)`); break; } + if (allowNotFoundSkip && isAuthErrorMessage(message)) { + skipped.push({ model: id, reason: message }); + logProgress(`${progressLabel}: skip (auth drift)`); + break; + } if ( allowNotFoundSkip && model.provider === "openrouter" && diff --git a/src/gateway/gateway-cli-backend.live.test.ts b/src/gateway/gateway-cli-backend.live.test.ts index 59accc8fdcc..65923036dd2 100644 --- a/src/gateway/gateway-cli-backend.live.test.ts +++ b/src/gateway/gateway-cli-backend.live.test.ts @@ -6,6 +6,10 @@ import { describe, expect, it } from "vitest"; import { resolveCliBackendConfig, resolveCliBackendLiveTest } from "../agents/cli-backends.js"; import { isLiveTestEnabled } from "../agents/live-test-helpers.js"; import { parseModelRef } from "../agents/model-selection.js"; +import { + isAuthErrorMessage, + isBillingErrorMessage, +} from "../agents/pi-embedded-helpers/failover-matches.js"; import { clearRuntimeConfigSnapshot, type OpenClawConfig } from "../config/config.js"; import { isTruthyEnvValue } from "../infra/env.js"; import { @@ -122,6 +126,11 @@ function isProviderCapacityError(error: unknown): boolean { ); } +function isProviderAccountDriftError(error: unknown): boolean { + const message = error instanceof Error ? `${error.name}: ${error.message}` : String(error); + return isBillingErrorMessage(message) || isAuthErrorMessage(message); +} + async function requestWithProviderCapacityRetry( providerId: string, label: string, @@ -133,6 +142,10 @@ async function requestWithProviderCapacityRetry( return await request(); } catch (error) { if (!isProviderCapacityError(error) || attempt >= maxAttempts) { + if (isProviderAccountDriftError(error)) { + console.warn(`SKIP: ${label} skipped because provider account/auth drift blocked it.`); + return undefined; + } if (providerId === "claude-cli" && isProviderCapacityError(error)) { console.warn(`SKIP: ${label} skipped because Claude API stayed overloaded.`); return undefined; diff --git a/src/gateway/gateway-models.profiles.live.test.ts b/src/gateway/gateway-models.profiles.live.test.ts index fb6e74e5779..376126f9a33 100644 --- a/src/gateway/gateway-models.profiles.live.test.ts +++ b/src/gateway/gateway-models.profiles.live.test.ts @@ -42,7 +42,10 @@ import { normalizeProviderId } from "../agents/model-selection.js"; import { shouldSuppressBuiltInModel } from "../agents/model-suppression.js"; import { ensureOpenClawModelsJson } from "../agents/models-config.js"; import { isRateLimitErrorMessage } from "../agents/pi-embedded-helpers/errors.js"; -import { isBillingErrorMessage } from "../agents/pi-embedded-helpers/failover-matches.js"; +import { + isAuthErrorMessage, + isBillingErrorMessage, +} from "../agents/pi-embedded-helpers/failover-matches.js"; import { discoverAuthStorage, discoverModels } from "../agents/pi-model-discovery.js"; import { STREAM_ERROR_FALLBACK_TEXT } from "../agents/stream-message-shared.js"; import { clearRuntimeConfigSnapshot, getRuntimeConfig } from "../config/io.js"; @@ -916,6 +919,14 @@ function isPromptProbeMiss(error: string): boolean { return msg.includes("not meaningful:") || msg.includes("missing required keywords:"); } +describe("isAuthErrorMessage", () => { + it("matches provider API key drift", () => { + expect( + isAuthErrorMessage('401 {"error":{"message":"The API key you provided is invalid."}}'), + ).toBe(true); + }); +}); + function shouldSkipToolNonceProbeMissForLiveModel(modelKey?: string): boolean { if (!modelKey) { return false; @@ -2457,6 +2468,11 @@ async function runGatewayModelSuite(params: GatewayModelSuiteParams) { logProgress(`${progressLabel}: skip (billing drift)`); break; } + if (isAuthErrorMessage(message)) { + skippedCount += 1; + logProgress(`${progressLabel}: skip (auth drift)`); + break; + } if ( (model.provider === "minimax" || model.provider === "opencode" ||