fix(agents/failover): classify undici terminated and codex Request failed as timeout (#69368)

This commit is contained in:
Sk7n4k3d
2026-04-20 22:01:06 +02:00
committed by Peter Steinberger
parent 79d8ce730a
commit f4891b083d
3 changed files with 42 additions and 0 deletions

View File

@@ -58,6 +58,8 @@ Docs: https://docs.openclaw.ai
- Thinking defaults/status: raise the implicit default thinking level for reasoning-capable models from legacy `off`/`low` fallback behavior to a safe provider-supported `medium` equivalent when no explicit config default is set, preserve configured-model reasoning metadata when runtime catalog loading is empty, and make `/status` report the same resolved default as runtime.
- Gateway/model pricing: fetch OpenRouter and LiteLLM pricing asynchronously at startup and extend catalog fetch timeouts to 30 seconds, reducing noisy timeout warnings during slow upstream responses.
- Agents/failover: classify bare undici transport failures (`terminated`, `UND_ERR_SOCKET`, `UND_ERR_CONNECT_TIMEOUT`, body/header timeouts, aborted streams) and pi-ai's openai-codex `Request failed` sentinel as `timeout`, so Cloudflare 502s with empty bodies and mid-response socket resets actually enter the configured fallback chain instead of surfacing as unclassified errors. Fixes #69368. (#69677) Thanks @sk7n4k3d.
- Providers/Anthropic Vertex: restore ADC-backed model discovery after the lightweight provider-discovery path by resolving emitted discovery entries, exposing synthetic auth on bootstrap discovery, and honoring copied env snapshots when probing the default GCP ADC path. Fixes #65715. (#65716) Thanks @feiskyer.
- Plugins/install: add newly installed plugin ids to an existing `plugins.allow` list before enabling them, so allowlisted configs load installed plugins after restart.
- Status: show `Fast` in `/status` when fast mode is enabled, including config/default-derived fast mode, and omit it when disabled.
- OpenAI/image generation: detect Azure OpenAI-style image endpoints, use Azure `api-key` auth plus deployment-scoped image URLs, and honor `AZURE_OPENAI_API_VERSION` so image generation and edits work against Azure-hosted OpenAI resources. (#70570) Thanks @zhanggpcsu.

View File

@@ -844,6 +844,34 @@ describe("isFailoverErrorMessage", () => {
expect(classifyFailoverReason(INTERNAL_SERVER_ERROR_STATUS_WITH_500_SAMPLE)).toBe("timeout");
expect(isFailoverErrorMessage(INTERNAL_SERVER_ERROR_STATUS_WITH_500_SAMPLE)).toBe(true);
});
it("matches bare undici transport failures as timeout (#69368)", () => {
expectTimeoutFailoverSamples([
"terminated",
"Terminated",
" terminated ",
"UND_ERR_SOCKET",
"Error: UND_ERR_SOCKET other side closed",
"UND_ERR_CONNECT_TIMEOUT",
"UND_ERR_HEADERS_TIMEOUT",
"UND_ERR_BODY_TIMEOUT",
"UND_ERR_ABORTED",
"UND_ERR_REQ_CONTENT_LENGTH_MISMATCH",
]);
});
it("matches pi-ai openai-codex bare transport failures as timeout (#69368)", () => {
expectTimeoutFailoverSamples([
"Request failed",
"request failed",
" Request failed ",
"Request failed after repeated internal retries.",
]);
});
it("does not classify unrelated 'terminated' prose as timeout", () => {
expectNotFailoverSample("The user terminated the session manually.");
});
});
describe("parseImageSizeError", () => {

View File

@@ -126,6 +126,18 @@ const ERROR_PATTERNS = {
// falls through to reason=unknown (#58315).
/\boperation was aborted\b/i,
/\bstream (?:was )?(?:closed|aborted)\b/i,
// Undici transport-level failures during CDN/provider outages (Cloudflare
// 502 served with an empty body, socket reset mid-response, body-stream
// aborted). These arrive as bare strings on the outer error and, without
// an explicit match, the fallback chain is never attempted (#69368).
/^terminated$/i,
/\bund_err_(?:socket|connect|headers?|body|req_content_length_mismatch|aborted|closed)\b/i,
// pi-ai's openai-codex provider surfaces `Request failed` when the HTTP
// response has no body and no status text (typical of Cloudflare 502s
// from the upstream Codex service). Treat it as a transport failure so
// the configured fallback chain runs instead of surfacing the error.
/^request failed$/i,
/\brequest failed after repeated internal retries\b/i,
],
billing: [
/["']?(?:status|code)["']?\s*[:=]\s*402\b|\bhttp\s*402\b|\berror(?:\s+code)?\s*[:=]?\s*402\b|\b(?:got|returned|received)\s+(?:a\s+)?402\b|^\s*402\s+payment/i,