mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 08:40:44 +00:00
fix(agents/failover): classify undici terminated and codex Request failed as timeout (#69368)
This commit is contained in:
committed by
Peter Steinberger
parent
79d8ce730a
commit
f4891b083d
@@ -58,6 +58,8 @@ Docs: https://docs.openclaw.ai
|
||||
|
||||
- Thinking defaults/status: raise the implicit default thinking level for reasoning-capable models from legacy `off`/`low` fallback behavior to a safe provider-supported `medium` equivalent when no explicit config default is set, preserve configured-model reasoning metadata when runtime catalog loading is empty, and make `/status` report the same resolved default as runtime.
|
||||
- Gateway/model pricing: fetch OpenRouter and LiteLLM pricing asynchronously at startup and extend catalog fetch timeouts to 30 seconds, reducing noisy timeout warnings during slow upstream responses.
|
||||
- Agents/failover: classify bare undici transport failures (`terminated`, `UND_ERR_SOCKET`, `UND_ERR_CONNECT_TIMEOUT`, body/header timeouts, aborted streams) and pi-ai's openai-codex `Request failed` sentinel as `timeout`, so Cloudflare 502s with empty bodies and mid-response socket resets actually enter the configured fallback chain instead of surfacing as unclassified errors. Fixes #69368. (#69677) Thanks @sk7n4k3d.
|
||||
- Providers/Anthropic Vertex: restore ADC-backed model discovery after the lightweight provider-discovery path by resolving emitted discovery entries, exposing synthetic auth on bootstrap discovery, and honoring copied env snapshots when probing the default GCP ADC path. Fixes #65715. (#65716) Thanks @feiskyer.
|
||||
- Plugins/install: add newly installed plugin ids to an existing `plugins.allow` list before enabling them, so allowlisted configs load installed plugins after restart.
|
||||
- Status: show `Fast` in `/status` when fast mode is enabled, including config/default-derived fast mode, and omit it when disabled.
|
||||
- OpenAI/image generation: detect Azure OpenAI-style image endpoints, use Azure `api-key` auth plus deployment-scoped image URLs, and honor `AZURE_OPENAI_API_VERSION` so image generation and edits work against Azure-hosted OpenAI resources. (#70570) Thanks @zhanggpcsu.
|
||||
|
||||
@@ -844,6 +844,34 @@ describe("isFailoverErrorMessage", () => {
|
||||
expect(classifyFailoverReason(INTERNAL_SERVER_ERROR_STATUS_WITH_500_SAMPLE)).toBe("timeout");
|
||||
expect(isFailoverErrorMessage(INTERNAL_SERVER_ERROR_STATUS_WITH_500_SAMPLE)).toBe(true);
|
||||
});
|
||||
|
||||
it("matches bare undici transport failures as timeout (#69368)", () => {
|
||||
expectTimeoutFailoverSamples([
|
||||
"terminated",
|
||||
"Terminated",
|
||||
" terminated ",
|
||||
"UND_ERR_SOCKET",
|
||||
"Error: UND_ERR_SOCKET other side closed",
|
||||
"UND_ERR_CONNECT_TIMEOUT",
|
||||
"UND_ERR_HEADERS_TIMEOUT",
|
||||
"UND_ERR_BODY_TIMEOUT",
|
||||
"UND_ERR_ABORTED",
|
||||
"UND_ERR_REQ_CONTENT_LENGTH_MISMATCH",
|
||||
]);
|
||||
});
|
||||
|
||||
it("matches pi-ai openai-codex bare transport failures as timeout (#69368)", () => {
|
||||
expectTimeoutFailoverSamples([
|
||||
"Request failed",
|
||||
"request failed",
|
||||
" Request failed ",
|
||||
"Request failed after repeated internal retries.",
|
||||
]);
|
||||
});
|
||||
|
||||
it("does not classify unrelated 'terminated' prose as timeout", () => {
|
||||
expectNotFailoverSample("The user terminated the session manually.");
|
||||
});
|
||||
});
|
||||
|
||||
describe("parseImageSizeError", () => {
|
||||
|
||||
@@ -126,6 +126,18 @@ const ERROR_PATTERNS = {
|
||||
// falls through to reason=unknown (#58315).
|
||||
/\boperation was aborted\b/i,
|
||||
/\bstream (?:was )?(?:closed|aborted)\b/i,
|
||||
// Undici transport-level failures during CDN/provider outages (Cloudflare
|
||||
// 502 served with an empty body, socket reset mid-response, body-stream
|
||||
// aborted). These arrive as bare strings on the outer error and, without
|
||||
// an explicit match, the fallback chain is never attempted (#69368).
|
||||
/^terminated$/i,
|
||||
/\bund_err_(?:socket|connect|headers?|body|req_content_length_mismatch|aborted|closed)\b/i,
|
||||
// pi-ai's openai-codex provider surfaces `Request failed` when the HTTP
|
||||
// response has no body and no status text (typical of Cloudflare 502s
|
||||
// from the upstream Codex service). Treat it as a transport failure so
|
||||
// the configured fallback chain runs instead of surfacing the error.
|
||||
/^request failed$/i,
|
||||
/\brequest failed after repeated internal retries\b/i,
|
||||
],
|
||||
billing: [
|
||||
/["']?(?:status|code)["']?\s*[:=]\s*402\b|\bhttp\s*402\b|\berror(?:\s+code)?\s*[:=]?\s*402\b|\b(?:got|returned|received)\s+(?:a\s+)?402\b|^\s*402\s+payment/i,
|
||||
|
||||
Reference in New Issue
Block a user