Prevent Codex HTML challenge pages from looking like DNS failures

Cloudflare challenge pages from chatgpt.com/backend-api can arrive as raw HTML without an HTTP status prefix. The transport sanitizer scanned for generic "dns" substrings before HTML detection, so these pages could surface as DNS lookup failures instead of the existing HTML/CDN block message.

Constraint: Must preserve DNS transport classification for real ENOTFOUND/getaddrinfo failures
Rejected: Treat every bare HTML document as an upstream HTML error | too broad for arbitrary model text/errors
Confidence: high
Scope-risk: narrow
Directive: Keep standalone HTML challenge detection ahead of generic transport keyword matching so CDN block pages do not regress into DNS copy
Tested: oxfmt --check on changed files; targeted node --import tsx verification for standalone Cloudflare HTML classification and DNS control case
Not-tested: Full Vitest shard run in this environment
This commit is contained in:
Chris Yau
2026-04-16 22:21:47 +08:00
committed by Peter Steinberger
parent 51606e9889
commit 36dd58ac2a
4 changed files with 69 additions and 1 deletions

View File

@@ -185,6 +185,23 @@ describe("formatAssistantErrorText", () => {
);
});
it("does not misdiagnose standalone Cloudflare challenge HTML as DNS", () => {
const msg = makeAssistantError(`<!DOCTYPE html>
<html>
<head>
<title>Just a moment...</title>
<link rel="dns-prefetch" href="//chatgpt.com">
</head>
<body>
<span id="challenge-error-text">Enable JavaScript and cookies to continue</span>
<script src="/cdn-cgi/challenge-platform/h/g/orchestrate/chl_page/v1"></script>
</body>
</html>`);
expect(formatAssistantErrorText(msg)).toBe(
"The provider returned an HTML error page instead of an API response. This usually means a CDN or gateway (e.g. Cloudflare) blocked the request. Retry in a moment or check provider status.",
);
});
it("returns a friendly message for empty stream chunk errors", () => {
const msg = makeAssistantError("request ended without sending any chunks");
expect(formatAssistantErrorText(msg)).toBe("LLM request timed out.");
@@ -339,6 +356,21 @@ describe("formatRawAssistantErrorForUi", () => {
"The AI service is temporarily unavailable (HTTP 521). Please try again in a moment.",
);
});
it("formats standalone Cloudflare challenge HTML into a clean provider error", () => {
const htmlError = `<!DOCTYPE html>
<html lang="en-US">
<head><title>Just a moment...</title></head>
<body>
<span id="challenge-error-text">Enable JavaScript and cookies to continue</span>
<script src="/cdn-cgi/challenge-platform/h/g/orchestrate/chl_page/v1"></script>
</body>
</html>`;
expect(formatRawAssistantErrorForUi(htmlError)).toBe(
"The provider returned an HTML error page instead of an API response. This usually means a CDN or gateway (e.g. Cloudflare) blocked the request. Retry in a moment or check provider status.",
);
});
});
describe("raw API error payload helpers", () => {

View File

@@ -266,6 +266,18 @@ describe("isCloudflareOrHtmlErrorPage", () => {
expect(isCloudflareOrHtmlErrorPage(htmlError)).toBe(true);
});
it("detects standalone Cloudflare challenge HTML pages", () => {
const htmlError = `<!DOCTYPE html>
<html lang="en-US">
<head><title>Just a moment...</title></head>
<body>
<span id="challenge-error-text">Enable JavaScript and cookies to continue</span>
<script src="/cdn-cgi/challenge-platform/h/g/orchestrate/chl_page/v1"></script>
</body>
</html>`;
expect(isCloudflareOrHtmlErrorPage(htmlError)).toBe(true);
});
it("does not flag non-HTML status lines", () => {
expect(isCloudflareOrHtmlErrorPage("500 Internal Server Error")).toBe(false);
expect(isCloudflareOrHtmlErrorPage("429 Too Many Requests")).toBe(false);

View File

@@ -102,6 +102,11 @@ export function formatTransportErrorCopy(raw: string): string | undefined {
if (!raw) {
return undefined;
}
if (isCloudflareOrHtmlErrorPage(raw)) {
return undefined;
}
const lower = normalizeLowercaseStringOrEmpty(raw);
if (

View File

@@ -10,7 +10,10 @@ const HTTP_STATUS_CODE_PREFIX_RE = new RegExp(
"i",
);
const HTML_ERROR_PREFIX_RE = /^\s*(?:<!doctype\s+html\b|<html\b)/i;
const HTML_CLOSE_RE = /<\/html>/i;
const CLOUDFLARE_HTML_ERROR_CODES = new Set([521, 522, 523, 524, 525, 526, 530]);
const STANDALONE_HTML_ERROR_HINT_RE =
/\bcloudflare\b|cdn-cgi\/challenge-platform|challenge-error-text|enable javascript and cookies to continue|access denied|forbidden|service unavailable|bad gateway|web server is down|captcha|attention required/i;
type ErrorPayload = Record<string, unknown>;
@@ -94,6 +97,14 @@ export function isCloudflareOrHtmlErrorPage(raw: string): boolean {
return false;
}
if (
HTML_ERROR_PREFIX_RE.test(trimmed) &&
HTML_CLOSE_RE.test(trimmed) &&
STANDALONE_HTML_ERROR_HINT_RE.test(trimmed)
) {
return true;
}
const status = extractLeadingHttpStatus(trimmed);
if (!status || status.code < 500) {
return false;
@@ -104,7 +115,7 @@ export function isCloudflareOrHtmlErrorPage(raw: string): boolean {
}
return (
status.code < 600 && HTML_ERROR_PREFIX_RE.test(status.rest) && /<\/html>/i.test(status.rest)
status.code < 600 && HTML_ERROR_PREFIX_RE.test(status.rest) && HTML_CLOSE_RE.test(status.rest)
);
}
@@ -175,6 +186,14 @@ export function formatRawAssistantErrorForUi(raw?: string): string {
return `The AI service is temporarily unavailable (HTTP ${leadingStatus.code}). Please try again in a moment.`;
}
if (isCloudflareOrHtmlErrorPage(trimmed)) {
return (
"The provider returned an HTML error page instead of an API response. " +
"This usually means a CDN or gateway (e.g. Cloudflare) blocked the request. " +
"Retry in a moment or check provider status."
);
}
const httpMatch = trimmed.match(HTTP_STATUS_PREFIX_RE);
if (httpMatch) {
const rest = httpMatch[2].trim();