From a0900926c3883923b0d35d487c6b4adeadab402b Mon Sep 17 00:00:00 2001 From: Zhang Xiaofeng Date: Tue, 28 Apr 2026 15:44:17 +0800 Subject: [PATCH] fix: add CJK error patterns to failover classification (#56242) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: add CJK error patterns to failover classification Chinese LLM providers (ZhipuAI/GLM, Bailian, Kimi/Moonshot, DeepSeek, etc.) return error messages in Chinese. The existing failover classification only matches English patterns, causing these errors to fall through as unclassified — surfacing raw provider errors to users instead of triggering model fallback. Real production example: ZhipuAI error code 1234 returns '网络错误,错误id:xxx,请联系客服。' (network error). This was not matched by the existing 'network error' English pattern, so no failover was triggered despite having a configured fallback model. Changes: - Add Chinese patterns to all error categories in failover-matches.ts: timeout, serverError, rateLimit, billing, auth, overloaded - Add Chinese network error detection in formatTransportErrorCopy() for user-friendly error messages - Add comprehensive test coverage for all CJK error categories Follows the existing precedent set by Chinese context overflow patterns in isContextOverflowError(). * fix: narrow billing pattern and fix placeholder issue URL - Change '账户余额' to '账户余额不足' to avoid false positives on messages that merely mention account balance (per greptile review) - Replace XXXXX placeholder with actual issue #56242 * fix: wire CJK auth failover patterns * fix: classify CJK provider failover errors * fix: place failover changelog entry in unreleased --------- Co-authored-by: Altay --- CHANGELOG.md | 1 + src/agents/failover-error.test.ts | 27 +++++++++ ...dded-helpers.isbillingerrormessage.test.ts | 56 +++++++++++++++++++ .../pi-embedded-helpers/failover-matches.ts | 41 ++++++++++++++ .../sanitize-user-facing-text.ts | 4 ++ 5 files changed, 129 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6de0f1221e9..99842926ff4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -37,6 +37,7 @@ Docs: https://docs.openclaw.ai - Zalo Personal: persist refreshed `zca-js` session cookies after QR login, session restore, and successful API calls so gateway restarts restore the freshest local session. (#73277) Thanks @darkamenosa. - Logging/security: redact sensitive tokens (sk-\* keys, Bearer/Authorization values, etc.) at the subsystem console sink so `createSubsystemLogger().info/warn/error` output that bypasses the patched console-capture handler still applies the same redaction the file transport already does. Fixes #73284; refs #67953 and #64046. Thanks @edwin-rivera-dev. - Plugins/runtime deps: reuse enclosing versioned cache roots when bundled plugins resolve from nested staged paths, so plugin-runtime-deps no longer mints `openclaw-unknown-*` directories or loops on `ENOTEMPTY`. Fixes #72956. (#73205) Thanks @SymbolStar. +- Agents/failover: classify CJK provider transport, quota, billing, auth, and overload error text so Chinese-language provider failures trigger fallback and user-facing transport copy instead of surfacing as unclassified raw errors. (#56242) Thanks @tomcatzh. ## 2026.4.27 diff --git a/src/agents/failover-error.test.ts b/src/agents/failover-error.test.ts index 8b3b57a875f..b4320693afc 100644 --- a/src/agents/failover-error.test.ts +++ b/src/agents/failover-error.test.ts @@ -602,6 +602,33 @@ describe("failover-error", () => { ).toBe("rate_limit"); }); + it("treats Chinese provider network/server errors as timeout for failover", () => { + // ZhipuAI/GLM error code 1234: "网络错误" — real production error + expect( + resolveFailoverReasonFromError({ + message: + "LLM error 1234: 网络错误,错误id:202603281427587491f4467f1c4712,请联系客服。 (request_id: 202603281427587491f4467f1c4712)", + }), + ).toBe("timeout"); + // JSON payload variant + expect( + resolveFailoverReasonFromError({ + message: + '{"error":{"code":"1234","message":"网络错误,错误id:abc123,请联系客服。"},"request_id":"abc123"}', + }), + ).toBe("timeout"); + // Generic Chinese server errors + expect(resolveFailoverReasonFromError({ message: "系统错误,请稍后重试" })).toBe("timeout"); + expect(resolveFailoverReasonFromError({ message: "服务器内部错误" })).toBe("timeout"); + }); + + it("treats Chinese provider auth errors as auth for failover", () => { + // ZhipuAI/GLM 403: "您无权访问glm-5.1" — real production error + expect(resolveFailoverReasonFromError({ message: "403 您无权访问glm-5.1。" })).toBe("auth"); + expect(resolveFailoverReasonFromError({ message: "认证失败" })).toBe("auth"); + expect(resolveFailoverReasonFromError({ message: "鉴权失败,请检查API Key" })).toBe("auth"); + }); + it("treats overloaded provider payloads as overloaded", () => { expect( resolveFailoverReasonFromError({ diff --git a/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts b/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts index f8d9171ccc2..a6352504f11 100644 --- a/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts +++ b/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts @@ -1266,6 +1266,62 @@ describe("classifyFailoverReason", () => { ), ).toBe("auth_permanent"); }); + + it("classifies Chinese provider error messages correctly", () => { + // ZhipuAI/GLM error code 1234: "网络错误" (network error) — real production error + // from https://github.com/openclaw/openclaw/issues/56242 + expect( + classifyFailoverReason( + "LLM error 1234: 网络错误,错误id:202603281427587491f4467f1c4712,请联系客服。 (request_id: 202603281427587491f4467f1c4712)", + ), + ).toBe("timeout"); + expect( + classifyFailoverReason( + '{"error":{"code":"1234","message":"网络错误,错误id:abc123,请联系客服。"},"request_id":"abc123"}', + ), + ).toBe("timeout"); + + // Network/connection errors + expect(classifyFailoverReason("网络异常,请稍后重试")).toBe("timeout"); + expect(classifyFailoverReason("连接超时")).toBe("timeout"); + expect(classifyFailoverReason("请求超时,请重试")).toBe("timeout"); + expect(classifyFailoverReason("服务暂时不可用")).toBe("timeout"); + expect(classifyFailoverReason("连接错误")).toBe("timeout"); + expect(classifyFailoverReason("服务繁忙,请稍后再试")).toBe("timeout"); + + // Server errors + expect(classifyFailoverReason("内部错误")).toBe("timeout"); + expect(classifyFailoverReason("服务器错误")).toBe("timeout"); + expect(classifyFailoverReason("服务器内部错误")).toBe("timeout"); + expect(classifyFailoverReason("系统错误,请稍后重试")).toBe("timeout"); + expect(classifyFailoverReason("系统繁忙")).toBe("timeout"); + expect(classifyFailoverReason("系统异常")).toBe("timeout"); + + // Rate limit errors + expect(classifyFailoverReason("请求过于频繁,请稍后重试")).toBe("rate_limit"); + expect(classifyFailoverReason("调用频率超限")).toBe("rate_limit"); + expect(classifyFailoverReason("频率限制")).toBe("rate_limit"); + expect(classifyFailoverReason("配额不足")).toBe("rate_limit"); + expect(classifyFailoverReason("配额已用尽")).toBe("rate_limit"); + expect(classifyFailoverReason("额度不足,请充值")).toBe("rate_limit"); + expect(classifyFailoverReason("额度已用尽")).toBe("rate_limit"); + + // Billing errors + expect(classifyFailoverReason("余额不足,请充值")).toBe("billing"); + expect(classifyFailoverReason("账户余额不足")).toBe("billing"); + expect(classifyFailoverReason("账户已欠费")).toBe("billing"); + + // Auth errors + expect(classifyFailoverReason("无权访问该模型")).toBe("auth"); + expect(classifyFailoverReason("403 您无权访问glm-5.1。")).toBe("auth"); + expect(classifyFailoverReason("认证失败")).toBe("auth"); + expect(classifyFailoverReason("鉴权失败,请检查API Key")).toBe("auth"); + expect(classifyFailoverReason("密钥无效")).toBe("auth"); + + // Overloaded errors + expect(classifyFailoverReason("服务过载,请稍后重试")).toBe("overloaded"); + expect(classifyFailoverReason("当前负载过高")).toBe("overloaded"); + }); }); describe("classifyProviderRuntimeFailureKind", () => { diff --git a/src/agents/pi-embedded-helpers/failover-matches.ts b/src/agents/pi-embedded-helpers/failover-matches.ts index de871aa6809..0b0773e3f26 100644 --- a/src/agents/pi-embedded-helpers/failover-matches.ts +++ b/src/agents/pi-embedded-helpers/failover-matches.ts @@ -40,6 +40,14 @@ const COMMON_AUTH_ERROR_PATTERNS = [ /\bfailed to (?:extract|parse|validate|decode)\b.*\btoken\b/, ] as const satisfies readonly ErrorPattern[]; +const CJK_AUTH_ERROR_PATTERNS = [ + "无权访问", + "认证失败", + "鉴权失败", + "密钥无效", + "apikey 无效", +] as const satisfies readonly ErrorPattern[]; + const ZAI_BILLING_CODE_1311_RE = /"code"\s*:\s*1311\b/; const ZAI_AUTH_CODE_1113_RE = /"code"\s*:\s*1113\b/; const STATUS_INTERNAL_SERVER_ERROR_RE = /\bstatus:\s*internal server error\b/i; @@ -69,6 +77,14 @@ const ERROR_PATTERNS = { /\btpm\b/i, "tokens per minute", "tokens per day", + // Chinese provider rate-limit messages + "请求过于频繁", + "调用频率", + "频率限制", + "配额不足", + "配额已用尽", + "额度不足", + "额度已用尽", ], overloaded: [ /overloaded_error|"type"\s*:\s*"overloaded_error"/i, @@ -79,6 +95,9 @@ const ERROR_PATTERNS = { // provider-overload (#32828). /service[_ ]unavailable.*(?:overload|capacity|high[_ ]demand)|(?:overload|capacity|high[_ ]demand).*service[_ ]unavailable/i, "high demand", + // Chinese provider overloaded messages + "服务过载", + "当前负载过高", ], serverError: [ "an error occurred while processing", @@ -92,6 +111,13 @@ const ERROR_PATTERNS = { "upstream error", "upstream connect error", "connection reset", + // Chinese provider server error messages + "内部错误", + "服务器错误", + "服务器内部错误", + "系统错误", + "系统繁忙", + "系统异常", ], timeout: [ "timeout", @@ -106,6 +132,14 @@ const ERROR_PATTERNS = { "network request failed", "fetch failed", "socket hang up", + // Chinese provider error messages (ZhipuAI/GLM, Bailian, Kimi/Moonshot, DeepSeek, etc.) + "网络错误", + "网络异常", + "服务暂时不可用", + "服务繁忙", + "请求超时", + "连接超时", + "连接错误", /\beconn(?:refused|reset|aborted)\b/i, /\benetunreach\b/i, /\behostunreach\b/i, @@ -153,6 +187,11 @@ const ERROR_PATTERNS = { /out of extra usage/i, /draw from your extra usage/i, /extra usage is required(?: for long context requests)?/i, + // Chinese provider billing messages + "余额不足", + "账户余额不足", + "欠费", + "账户已欠费", // Z.ai: error 1311 = model not included in current subscription plan (#48988) ZAI_BILLING_CODE_1311_RE, ], @@ -161,6 +200,7 @@ const ERROR_PATTERNS = { ...AMBIGUOUS_AUTH_ERROR_PATTERNS, ...COMMON_AUTH_ERROR_PATTERNS, ...ZAI_AUTH_ERROR_PATTERNS, + ...CJK_AUTH_ERROR_PATTERNS, ], format: [ "string should match pattern", @@ -245,6 +285,7 @@ export function isAuthErrorMessage(raw: string): boolean { AMBIGUOUS_AUTH_ERROR_PATTERNS, COMMON_AUTH_ERROR_PATTERNS, ZAI_AUTH_ERROR_PATTERNS, + CJK_AUTH_ERROR_PATTERNS, ]); } diff --git a/src/agents/pi-embedded-helpers/sanitize-user-facing-text.ts b/src/agents/pi-embedded-helpers/sanitize-user-facing-text.ts index c2bb9d2dfe7..7708e09a952 100644 --- a/src/agents/pi-embedded-helpers/sanitize-user-facing-text.ts +++ b/src/agents/pi-embedded-helpers/sanitize-user-facing-text.ts @@ -159,6 +159,10 @@ export function formatTransportErrorCopy(raw: string): string | undefined { return "LLM request failed: network connection error."; } + if (raw.includes("网络错误") || raw.includes("网络异常") || raw.includes("连接错误")) { + return "LLM request failed: provider reported a network error."; + } + return undefined; }