fix: classify HTML provider error pages correctly (#67642) (thanks @stainlu)

* fix(agents): classify Cloudflare/CDN HTML error pages as transport failures

Fixes #67517

When a provider endpoint returns an HTML error page (e.g. Cloudflare
502/503/520-524), the pattern-based message classifiers would scan
the HTML body and misinterpret embedded text like "Rate limit
exceeded" as a structured rate_limit API error. This caused
incorrect failover behavior (profile rotation instead of clean
retry/fallback) and left the TUI stuck.

Two fixes:
1. classifyFailoverSignal now short-circuits on HTML responses
   before running pattern matchers, returning "timeout" (transport
   failure) so retry/fallback handles them correctly.
2. classifyProviderRuntimeFailureKind now detects HTML errors at
   any status (not just 403), returning "upstream_html" for
   non-403 statuses with a clear user-facing message about
   CDN/gateway errors.

Adds regression tests covering Cloudflare 502/503 HTML with
embedded rate-limit text, 403 HTML (still classified as auth),
and JSON rate-limit responses (still classified correctly).

* fix: preserve auth and proxy HTML classification

* fix: classify HTML provider error pages correctly (#67642) (thanks @stainlu)

---------

Co-authored-by: Ayaan Zaidi <hi@obviy.us>
This commit is contained in:
stain lu
2026-04-16 20:49:53 +08:00
committed by GitHub
parent 55f05df77e
commit e588e904a7
4 changed files with 110 additions and 12 deletions

View File

@@ -35,6 +35,7 @@ Docs: https://docs.openclaw.ai
- Agents/tools: resolve non-workspace host tilde paths against the OS home directory and keep edit recovery aligned with that same path target, so `~/...` host edit/write operations stop failing or reading back the wrong file when `OPENCLAW_HOME` differs. (#62804) Thanks @stainlu.
- Speech/TTS: auto-enable the bundled Microsoft and ElevenLabs speech providers, and route generic TTS directive tokens through the explicit or active provider first so overrides like `[[tts:speed=1.2]]` stop silently landing on the wrong provider. (#62846) Thanks @stainlu.
- OpenAI Codex/models: normalize stale native transport metadata in both runtime resolution and discovery/listing so legacy `openai-codex` rows with missing `api` or `https://chatgpt.com/backend-api/v1` self-heal to the canonical Codex transport instead of routing requests through broken HTML/Cloudflare paths, combining the original fixes proposed in #66969 (saamuelng601-pixel) and #67159 (hclsys). (#67635)
- Agents/failover: treat HTML provider error pages as upstream transport failures for CDN-style 5xx responses without misclassifying embedded body text as API rate limits, while still preserving auth remediation for HTML 401/403 pages and proxy remediation for HTML 407 pages. (#67642) Thanks @stainlu.
## 2026.4.15-beta.1

View File

@@ -167,21 +167,21 @@ describe("formatAssistantErrorText", () => {
expect(result).toBe("⚠️ Your quota has been exhausted, try again in 24 hours");
});
it("falls back to generic copy for HTML quota pages", () => {
it("returns upstream HTML copy for HTML quota pages", () => {
const msg = makeAssistantError(
"429 <!DOCTYPE html><html><body>Your quota is exhausted</body></html>",
);
expect(formatAssistantErrorText(msg)).toBe(
"⚠️ API rate limit reached. Please try again later.",
"The provider returned an HTML error page instead of an API response. This usually means a CDN or gateway (e.g. Cloudflare) blocked the request. Retry in a moment or check provider status.",
);
});
it("falls back to generic copy for prefixed HTML rate-limit pages", () => {
it("returns upstream HTML copy for prefixed HTML rate-limit pages", () => {
const msg = makeAssistantError(
"Error: 521 <!DOCTYPE html><html><body>rate limit</body></html>",
);
expect(formatAssistantErrorText(msg)).toBe(
"⚠️ API rate limit reached. Please try again later.",
"The provider returned an HTML error page instead of an API response. This usually means a CDN or gateway (e.g. Cloudflare) blocked the request. Retry in a moment or check provider status.",
);
});

View File

@@ -257,6 +257,7 @@ export type ProviderRuntimeFailureKind =
| "auth_scope"
| "auth_refresh"
| "auth_html_403"
| "upstream_html"
| "proxy"
| "rate_limit"
| "dns"
@@ -324,11 +325,15 @@ const REPLAY_INVALID_RE =
const SANDBOX_BLOCKED_RE =
/\bapproval is required\b|\bapproval timed out\b|\bapproval was denied\b|\bblocked by sandbox\b|\bsandbox\b.*\b(?:blocked|denied|forbidden|disabled|not allowed)\b/i;
function stripErrorPrefix(raw: string): string {
return raw.replace(/^error:\s*/i, "").trim();
}
function inferSignalStatus(signal: FailoverSignal): number | undefined {
if (typeof signal.status === "number" && Number.isFinite(signal.status)) {
return signal.status;
}
return extractLeadingHttpStatus(signal.message?.trim() ?? "")?.code;
return extractLeadingHttpStatus(stripErrorPrefix(signal.message?.trim() ?? ""))?.code;
}
function isHtmlErrorResponse(raw: string, status?: number): boolean {
@@ -336,17 +341,22 @@ function isHtmlErrorResponse(raw: string, status?: number): boolean {
if (!trimmed) {
return false;
}
const candidate = extractLeadingHttpStatus(trimmed) ? trimmed : stripErrorPrefix(trimmed);
const inferred =
typeof status === "number" && Number.isFinite(status)
? status
: extractLeadingHttpStatus(trimmed)?.code;
: extractLeadingHttpStatus(candidate)?.code;
if (typeof inferred !== "number" || inferred < 400) {
return false;
}
const rest = extractLeadingHttpStatus(trimmed)?.rest ?? trimmed;
const rest = extractLeadingHttpStatus(candidate)?.rest ?? candidate;
return HTML_BODY_RE.test(rest) && HTML_CLOSE_RE.test(rest);
}
function isTransportHtmlErrorStatus(status: number | undefined): boolean {
return status !== 401 && status !== 403 && status !== 407;
}
function isOpenAICodexScopeContext(raw: string, provider?: string): boolean {
const normalizedProvider = normalizeLowercaseStringOrEmpty(provider);
return (
@@ -669,7 +679,9 @@ function isOpenRouterKeyLimitExceededError(raw: string, provider?: string): bool
}
function isExactUnknownNoDetailsError(raw: string): boolean {
return normalizeOptionalLowercaseString(raw)?.trim() === "unknown error (no error details in response)";
return (
normalizeOptionalLowercaseString(raw)?.trim() === "unknown error (no error details in response)"
);
}
function classifyFailoverClassificationFromMessage(
@@ -757,6 +769,13 @@ function classifyFailoverClassificationFromMessage(
export function classifyFailoverSignal(signal: FailoverSignal): FailoverClassification | null {
const inferredStatus = inferSignalStatus(signal);
if (
signal.message &&
isTransportHtmlErrorStatus(inferredStatus) &&
isHtmlErrorResponse(signal.message, inferredStatus)
) {
return toReasonClassification("timeout");
}
const messageClassification = signal.message
? classifyFailoverClassificationFromMessage(signal.message, signal.provider)
: null;
@@ -791,12 +810,12 @@ export function classifyProviderRuntimeFailureKind(
if (message && isAuthScopeErrorMessage(message, status, normalizedSignal.provider)) {
return "auth_scope";
}
if (message && status === 403 && isHtmlErrorResponse(message, status)) {
return "auth_html_403";
}
if (message && isProxyErrorMessage(message, status)) {
return "proxy";
}
if (message && isHtmlErrorResponse(message, status)) {
return status === 403 ? "auth_html_403" : "upstream_html";
}
const failoverClassification = classifyFailoverSignal({
...normalizedSignal,
status,
@@ -885,6 +904,14 @@ export function formatAssistantErrorText(
);
}
if (providerRuntimeFailureKind === "upstream_html") {
return (
"The provider returned an HTML error page instead of an API response. " +
"This usually means a CDN or gateway (e.g. Cloudflare) blocked the request. " +
"Retry in a moment or check provider status."
);
}
if (providerRuntimeFailureKind === "proxy") {
return "LLM request failed: proxy or tunnel configuration blocked the provider request.";
}

View File

@@ -16,7 +16,11 @@ vi.mock("../../plugins/provider-runtime.js", async () => {
};
});
import { classifyFailoverReason, isContextOverflowError } from "./errors.js";
import {
classifyFailoverReason,
classifyProviderRuntimeFailureKind,
isContextOverflowError,
} from "./errors.js";
import {
classifyProviderSpecificError,
matchesProviderContextOverflow,
@@ -146,3 +150,69 @@ describe("classifyFailoverReason with provider patterns", () => {
);
});
});
describe("Cloudflare / CDN HTML error page classification (#67517)", () => {
const cloudflareHtml502 =
"<!doctype html><html><head><title>502 Bad Gateway</title></head>" +
"<body><h1>502 Bad Gateway</h1><p>cloudflare-nginx</p></body></html>";
const cloudflareHtml503 =
"<!doctype html><html><head><title>503</title></head>" +
"<body><h1>Service Unavailable</h1><p>Please try again. Rate limit exceeded.</p></body></html>";
const html401 =
"<!doctype html><html><head><title>401 Unauthorized</title></head>" +
"<body><h1>Unauthorized</h1></body></html>";
const html403 =
"<!doctype html><html><head><title>403 Forbidden</title></head>" +
"<body><h1>Forbidden</h1></body></html>";
const html407 =
"<!doctype html><html><head><title>407 Proxy Authentication Required</title></head>" +
"<body><h1>Proxy Authentication Required</h1></body></html>";
const prefixedHtml401 = `Error: 401 ${html401}`;
const prefixedHtml407 = `Error: 407 ${html407}`;
it("classifies Cloudflare HTML 502 as timeout", () => {
expect(classifyFailoverReason(`502 ${cloudflareHtml502}`)).toBe("timeout");
});
it("classifies Cloudflare HTML 503 with rate-limit text as timeout", () => {
expect(classifyFailoverReason(`503 ${cloudflareHtml503}`)).toBe("timeout");
});
it("preserves auth classification for 401 HTML", () => {
expect(classifyFailoverReason(`401 ${html401}`)).toBe("auth");
});
it("preserves auth classification for 403 HTML", () => {
expect(classifyFailoverReason(`403 ${html403}`)).toBe("auth");
});
it("preserves auth classification for Error-prefixed 401 HTML", () => {
expect(classifyFailoverReason(prefixedHtml401)).toBe("auth");
});
it("classifies runtime failure kind as upstream_html for non-auth HTML", () => {
expect(classifyProviderRuntimeFailureKind({ status: 502, message: cloudflareHtml502 })).toBe(
"upstream_html",
);
});
it("classifies 403 HTML runtime failures as auth_html_403", () => {
expect(classifyProviderRuntimeFailureKind({ status: 403, message: html403 })).toBe(
"auth_html_403",
);
});
it("classifies 407 HTML runtime failures as proxy", () => {
expect(classifyProviderRuntimeFailureKind({ status: 407, message: html407 })).toBe("proxy");
});
it("classifies Error-prefixed 407 HTML runtime failures as proxy", () => {
expect(classifyProviderRuntimeFailureKind(prefixedHtml407)).toBe("proxy");
});
it("does not misclassify JSON API rate-limit responses as HTML", () => {
const jsonRateLimit =
'429 {"error":{"type":"rate_limit_error","message":"Rate limit exceeded"}}';
expect(classifyFailoverReason(jsonRateLimit)).toBe("rate_limit");
});
});