From 36dd58ac2a5dc5ae15706b4c8dae7d28d033bc89 Mon Sep 17 00:00:00 2001 From: Chris Yau Date: Thu, 16 Apr 2026 22:21:47 +0800 Subject: [PATCH] Prevent Codex HTML challenge pages from looking like DNS failures Cloudflare challenge pages from chatgpt.com/backend-api can arrive as raw HTML without an HTTP status prefix. The transport sanitizer scanned for generic "dns" substrings before HTML detection, so these pages could surface as DNS lookup failures instead of the existing HTML/CDN block message. Constraint: Must preserve DNS transport classification for real ENOTFOUND/getaddrinfo failures Rejected: Treat every bare HTML document as an upstream HTML error | too broad for arbitrary model text/errors Confidence: high Scope-risk: narrow Directive: Keep standalone HTML challenge detection ahead of generic transport keyword matching so CDN block pages do not regress into DNS copy Tested: oxfmt --check on changed files; targeted node --import tsx verification for standalone Cloudflare HTML classification and DNS control case Not-tested: Full Vitest shard run in this environment --- ...d-helpers.formatassistanterrortext.test.ts | 32 +++++++++++++++++++ ...dded-helpers.isbillingerrormessage.test.ts | 12 +++++++ .../sanitize-user-facing-text.ts | 5 +++ src/shared/assistant-error-format.ts | 21 +++++++++++- 4 files changed, 69 insertions(+), 1 deletion(-) diff --git a/src/agents/pi-embedded-helpers.formatassistanterrortext.test.ts b/src/agents/pi-embedded-helpers.formatassistanterrortext.test.ts index 5180fafa8d0..9c69a2bc3cd 100644 --- a/src/agents/pi-embedded-helpers.formatassistanterrortext.test.ts +++ b/src/agents/pi-embedded-helpers.formatassistanterrortext.test.ts @@ -185,6 +185,23 @@ describe("formatAssistantErrorText", () => { ); }); + it("does not misdiagnose standalone Cloudflare challenge HTML as DNS", () => { + const msg = makeAssistantError(` + + + Just a moment... + + + + Enable JavaScript and cookies to continue + + +`); + expect(formatAssistantErrorText(msg)).toBe( + "The provider returned an HTML error page instead of an API response. This usually means a CDN or gateway (e.g. Cloudflare) blocked the request. Retry in a moment or check provider status.", + ); + }); + it("returns a friendly message for empty stream chunk errors", () => { const msg = makeAssistantError("request ended without sending any chunks"); expect(formatAssistantErrorText(msg)).toBe("LLM request timed out."); @@ -339,6 +356,21 @@ describe("formatRawAssistantErrorForUi", () => { "The AI service is temporarily unavailable (HTTP 521). Please try again in a moment.", ); }); + + it("formats standalone Cloudflare challenge HTML into a clean provider error", () => { + const htmlError = ` + + Just a moment... + + Enable JavaScript and cookies to continue + + +`; + + expect(formatRawAssistantErrorForUi(htmlError)).toBe( + "The provider returned an HTML error page instead of an API response. This usually means a CDN or gateway (e.g. Cloudflare) blocked the request. Retry in a moment or check provider status.", + ); + }); }); describe("raw API error payload helpers", () => { diff --git a/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts b/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts index 1ac3fd0a26a..e0999a1d9bb 100644 --- a/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts +++ b/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts @@ -266,6 +266,18 @@ describe("isCloudflareOrHtmlErrorPage", () => { expect(isCloudflareOrHtmlErrorPage(htmlError)).toBe(true); }); + it("detects standalone Cloudflare challenge HTML pages", () => { + const htmlError = ` + + Just a moment... + + Enable JavaScript and cookies to continue + + +`; + expect(isCloudflareOrHtmlErrorPage(htmlError)).toBe(true); + }); + it("does not flag non-HTML status lines", () => { expect(isCloudflareOrHtmlErrorPage("500 Internal Server Error")).toBe(false); expect(isCloudflareOrHtmlErrorPage("429 Too Many Requests")).toBe(false); diff --git a/src/agents/pi-embedded-helpers/sanitize-user-facing-text.ts b/src/agents/pi-embedded-helpers/sanitize-user-facing-text.ts index 70ef2eb57f3..f0e3ace5332 100644 --- a/src/agents/pi-embedded-helpers/sanitize-user-facing-text.ts +++ b/src/agents/pi-embedded-helpers/sanitize-user-facing-text.ts @@ -102,6 +102,11 @@ export function formatTransportErrorCopy(raw: string): string | undefined { if (!raw) { return undefined; } + + if (isCloudflareOrHtmlErrorPage(raw)) { + return undefined; + } + const lower = normalizeLowercaseStringOrEmpty(raw); if ( diff --git a/src/shared/assistant-error-format.ts b/src/shared/assistant-error-format.ts index 80598de0244..7f00a05a905 100644 --- a/src/shared/assistant-error-format.ts +++ b/src/shared/assistant-error-format.ts @@ -10,7 +10,10 @@ const HTTP_STATUS_CODE_PREFIX_RE = new RegExp( "i", ); const HTML_ERROR_PREFIX_RE = /^\s*(?:/i; const CLOUDFLARE_HTML_ERROR_CODES = new Set([521, 522, 523, 524, 525, 526, 530]); +const STANDALONE_HTML_ERROR_HINT_RE = + /\bcloudflare\b|cdn-cgi\/challenge-platform|challenge-error-text|enable javascript and cookies to continue|access denied|forbidden|service unavailable|bad gateway|web server is down|captcha|attention required/i; type ErrorPayload = Record; @@ -94,6 +97,14 @@ export function isCloudflareOrHtmlErrorPage(raw: string): boolean { return false; } + if ( + HTML_ERROR_PREFIX_RE.test(trimmed) && + HTML_CLOSE_RE.test(trimmed) && + STANDALONE_HTML_ERROR_HINT_RE.test(trimmed) + ) { + return true; + } + const status = extractLeadingHttpStatus(trimmed); if (!status || status.code < 500) { return false; @@ -104,7 +115,7 @@ export function isCloudflareOrHtmlErrorPage(raw: string): boolean { } return ( - status.code < 600 && HTML_ERROR_PREFIX_RE.test(status.rest) && /<\/html>/i.test(status.rest) + status.code < 600 && HTML_ERROR_PREFIX_RE.test(status.rest) && HTML_CLOSE_RE.test(status.rest) ); } @@ -175,6 +186,14 @@ export function formatRawAssistantErrorForUi(raw?: string): string { return `The AI service is temporarily unavailable (HTTP ${leadingStatus.code}). Please try again in a moment.`; } + if (isCloudflareOrHtmlErrorPage(trimmed)) { + return ( + "The provider returned an HTML error page instead of an API response. " + + "This usually means a CDN or gateway (e.g. Cloudflare) blocked the request. " + + "Retry in a moment or check provider status." + ); + } + const httpMatch = trimmed.match(HTTP_STATUS_PREFIX_RE); if (httpMatch) { const rest = httpMatch[2].trim();