test: tolerate provider account drift in live CI

This commit is contained in:
Peter Steinberger
2026-05-15 06:07:58 +01:00
parent b672be59ae
commit 7db44b979f
6 changed files with 168 additions and 17 deletions

View File

@@ -84,6 +84,20 @@ describe("live cache regression runner", () => {
).toBe(false);
});
it("classifies Anthropic account drift as non-cache provider state", () => {
expect(
__testing.isAnthropicAccountDrift(
new Error("Your credit balance is too low to access the Anthropic API."),
),
).toBe(true);
expect(
__testing.isAnthropicAccountDrift(
'401 {"error":{"message":"The API key you provided is invalid."}}',
),
).toBe(true);
expect(__testing.isAnthropicAccountDrift("anthropic:image cacheRead=0 < min=4500")).toBe(false);
});
it("retries a cache probe twice when provider text misses the sentinel", () => {
expect(
__testing.shouldRetryCacheProbeText({

View File

@@ -13,9 +13,14 @@ import {
completeSimpleWithLiveTimeout,
computeCacheHitRate,
extractAssistantText,
type LiveResolvedModel,
logLiveCache,
resolveLiveDirectModel,
} from "./live-cache-test-support.js";
import {
isAuthErrorMessage,
isBillingErrorMessage,
} from "./pi-embedded-helpers/failover-matches.js";
const OPENAI_TIMEOUT_MS = 120_000;
const ANTHROPIC_TIMEOUT_MS = 120_000;
@@ -31,7 +36,6 @@ const LIVE_TEST_PNG_URL = new URL(
import.meta.url,
);
type LiveResolvedModel = Awaited<ReturnType<typeof resolveLiveDirectModel>>;
type ProviderKey = keyof typeof LIVE_CACHE_REGRESSION_BASELINE;
type CacheLane = "image" | "mcp" | "stable" | "tool";
type CacheUsage = {
@@ -595,9 +599,88 @@ function appendBaselineFindings(target: BaselineFindings, source: BaselineFindin
target.warnings.push(...source.warnings);
}
function isAnthropicAccountDrift(error: unknown): boolean {
const message = error instanceof Error ? error.message : String(error);
return isBillingErrorMessage(message) || isAuthErrorMessage(message);
}
function isAnthropicEmptyCacheProbe(error: unknown): boolean {
return error instanceof CacheProbeTextMismatchError && error.text.trim().length === 0;
}
function cloneFixtureWithKey(fixture: LiveResolvedModel, apiKey: string): LiveResolvedModel {
return { ...fixture, apiKey };
}
async function runAnthropicCacheLane(params: {
fixture: LiveResolvedModel;
lane: CacheLane;
pngBase64: string;
runToken: string;
warnings: string[];
}): Promise<{ attempt?: Awaited<ReturnType<typeof runRepeatedLaneWithBaselineRetry>> }> {
const keys =
params.fixture.apiKeys && params.fixture.apiKeys.length > 0
? params.fixture.apiKeys
: [params.fixture.apiKey];
let lastError: unknown;
for (const [index, apiKey] of keys.entries()) {
try {
return {
attempt: await runRepeatedLaneWithBaselineRetry({
lane: params.lane,
providerTag: "anthropic",
fixture: cloneFixtureWithKey(params.fixture, apiKey),
runToken: params.runToken,
pngBase64: params.pngBase64,
}),
};
} catch (error) {
lastError = error;
if (isAnthropicAccountDrift(error) && index + 1 < keys.length) {
logLiveCache(`anthropic ${params.lane} account drift; retrying with next key`);
continue;
}
break;
}
}
if (isAnthropicAccountDrift(lastError) || isAnthropicEmptyCacheProbe(lastError)) {
const reason = isAnthropicEmptyCacheProbe(lastError) ? "empty response" : "account drift";
const warning = `anthropic ${params.lane} skipped: ${reason}`;
params.warnings.push(warning);
logLiveCache(warning);
return {};
}
throw lastError;
}
async function runAnthropicDisabledCacheLane(params: {
fixture: LiveResolvedModel;
runToken: string;
warnings: string[];
}): Promise<LaneResult | undefined> {
try {
return await runAnthropicDisabledLane({
fixture: params.fixture,
runToken: params.runToken,
sessionId: `live-cache-regression-${params.runToken}-anthropic-disabled`,
});
} catch (error) {
if (isAnthropicAccountDrift(error) || isAnthropicEmptyCacheProbe(error)) {
const warning = "anthropic disabled skipped: account drift";
params.warnings.push(warning);
logLiveCache(warning);
return undefined;
}
throw error;
}
}
export const __testing = {
assertAgainstBaseline,
evaluateAgainstBaseline,
isAnthropicAccountDrift,
resolveCacheProbeMaxTokens,
shouldAcceptEmptyOpenAICacheProbe,
shouldRetryCacheProbeText,
@@ -650,13 +733,17 @@ export async function runLiveCacheRegression(): Promise<LiveCacheRegressionResul
};
appendBaselineFindings({ regressions, warnings }, openaiAttempt.findings);
const anthropicAttempt = await runRepeatedLaneWithBaselineRetry({
const { attempt: anthropicAttempt } = await runAnthropicCacheLane({
lane,
providerTag: "anthropic",
fixture: anthropic,
runToken,
pngBase64,
warnings,
});
if (!anthropicAttempt) {
summary.anthropic[lane] = { skipped: true };
continue;
}
const anthropicResult = anthropicAttempt.result;
logLiveCache(
`anthropic ${lane} warmup ${formatUsage(anthropicResult.warmup?.usage ?? {})} rate=${anthropicResult.warmup?.hitRate.toFixed(3) ?? "0.000"}`,
@@ -673,22 +760,26 @@ export async function runLiveCacheRegression(): Promise<LiveCacheRegressionResul
appendBaselineFindings({ regressions, warnings }, anthropicAttempt.findings);
}
const disabled = await runAnthropicDisabledLane({
const disabled = await runAnthropicDisabledCacheLane({
fixture: anthropic,
runToken,
sessionId: `live-cache-regression-${runToken}-anthropic-disabled`,
});
logLiveCache(`anthropic disabled ${formatUsage(disabled.disabled?.usage ?? {})}`);
summary.anthropic.disabled = {
disabled: disabled.disabled?.usage,
};
assertAgainstBaseline({
lane: "disabled",
provider: "anthropic",
result: disabled,
regressions,
warnings,
});
if (disabled) {
logLiveCache(`anthropic disabled ${formatUsage(disabled.disabled?.usage ?? {})}`);
summary.anthropic.disabled = {
disabled: disabled.disabled?.usage,
};
assertAgainstBaseline({
lane: "disabled",
provider: "anthropic",
result: disabled,
regressions,
warnings,
});
} else {
summary.anthropic.disabled = { skipped: true };
}
logLiveCache(`cache regression summary ${JSON.stringify(summary)}`);
if (warnings.length > 0) {

View File

@@ -22,8 +22,9 @@ export const LIVE_CACHE_TEST_ENABLED =
const DEFAULT_HEARTBEAT_MS = 20_000;
const DEFAULT_TIMEOUT_MS = 90_000;
type LiveResolvedModel = {
export type LiveResolvedModel = {
apiKey: string;
apiKeys?: string[];
model: Model<Api>;
};
@@ -189,6 +190,7 @@ export async function resolveLiveDirectModel(params: {
return {
model: selectedModel,
apiKey: liveKeys[0] ?? "",
apiKeys: liveKeys,
};
}
@@ -237,5 +239,6 @@ export async function resolveLiveDirectModel(params: {
return {
model: resolvedModel,
apiKey,
apiKeys: [apiKey],
};
}

View File

@@ -53,6 +53,7 @@ import {
isCloudflareOrHtmlErrorPage,
isRateLimitErrorMessage,
} from "./pi-embedded-helpers/errors.js";
import { isAuthErrorMessage } from "./pi-embedded-helpers/failover-matches.js";
import {
discoverAuthStorage,
discoverModels,
@@ -395,6 +396,14 @@ describe("isUnsupportedPlanErrorMessage", () => {
});
});
describe("isAuthErrorMessage", () => {
it("matches provider API key drift", () => {
expect(
isAuthErrorMessage('401 {"error":{"message":"The API key you provided is invalid."}}'),
).toBe(true);
});
});
describe("isOpenRouterOpaqueBadRequestErrorMessage", () => {
it("matches opaque OpenRouter upstream bad requests", () => {
expect(
@@ -1314,6 +1323,11 @@ describeLive("live models (profile keys)", () => {
logProgress(`${progressLabel}: skip (provider unavailable)`);
break;
}
if (allowNotFoundSkip && isAuthErrorMessage(message)) {
skipped.push({ model: id, reason: message });
logProgress(`${progressLabel}: skip (auth drift)`);
break;
}
if (
allowNotFoundSkip &&
model.provider === "openrouter" &&

View File

@@ -6,6 +6,10 @@ import { describe, expect, it } from "vitest";
import { resolveCliBackendConfig, resolveCliBackendLiveTest } from "../agents/cli-backends.js";
import { isLiveTestEnabled } from "../agents/live-test-helpers.js";
import { parseModelRef } from "../agents/model-selection.js";
import {
isAuthErrorMessage,
isBillingErrorMessage,
} from "../agents/pi-embedded-helpers/failover-matches.js";
import { clearRuntimeConfigSnapshot, type OpenClawConfig } from "../config/config.js";
import { isTruthyEnvValue } from "../infra/env.js";
import {
@@ -122,6 +126,11 @@ function isProviderCapacityError(error: unknown): boolean {
);
}
function isProviderAccountDriftError(error: unknown): boolean {
const message = error instanceof Error ? `${error.name}: ${error.message}` : String(error);
return isBillingErrorMessage(message) || isAuthErrorMessage(message);
}
async function requestWithProviderCapacityRetry<T>(
providerId: string,
label: string,
@@ -133,6 +142,10 @@ async function requestWithProviderCapacityRetry<T>(
return await request();
} catch (error) {
if (!isProviderCapacityError(error) || attempt >= maxAttempts) {
if (isProviderAccountDriftError(error)) {
console.warn(`SKIP: ${label} skipped because provider account/auth drift blocked it.`);
return undefined;
}
if (providerId === "claude-cli" && isProviderCapacityError(error)) {
console.warn(`SKIP: ${label} skipped because Claude API stayed overloaded.`);
return undefined;

View File

@@ -42,7 +42,10 @@ import { normalizeProviderId } from "../agents/model-selection.js";
import { shouldSuppressBuiltInModel } from "../agents/model-suppression.js";
import { ensureOpenClawModelsJson } from "../agents/models-config.js";
import { isRateLimitErrorMessage } from "../agents/pi-embedded-helpers/errors.js";
import { isBillingErrorMessage } from "../agents/pi-embedded-helpers/failover-matches.js";
import {
isAuthErrorMessage,
isBillingErrorMessage,
} from "../agents/pi-embedded-helpers/failover-matches.js";
import { discoverAuthStorage, discoverModels } from "../agents/pi-model-discovery.js";
import { STREAM_ERROR_FALLBACK_TEXT } from "../agents/stream-message-shared.js";
import { clearRuntimeConfigSnapshot, getRuntimeConfig } from "../config/io.js";
@@ -916,6 +919,14 @@ function isPromptProbeMiss(error: string): boolean {
return msg.includes("not meaningful:") || msg.includes("missing required keywords:");
}
describe("isAuthErrorMessage", () => {
it("matches provider API key drift", () => {
expect(
isAuthErrorMessage('401 {"error":{"message":"The API key you provided is invalid."}}'),
).toBe(true);
});
});
function shouldSkipToolNonceProbeMissForLiveModel(modelKey?: string): boolean {
if (!modelKey) {
return false;
@@ -2457,6 +2468,11 @@ async function runGatewayModelSuite(params: GatewayModelSuiteParams) {
logProgress(`${progressLabel}: skip (billing drift)`);
break;
}
if (isAuthErrorMessage(message)) {
skippedCount += 1;
logProgress(`${progressLabel}: skip (auth drift)`);
break;
}
if (
(model.provider === "minimax" ||
model.provider === "opencode" ||