fix: don't classify 400/422 with no body as format error (#67024)

* fix: keep no-body 400/422 failover errors out of format

* fix: keep failover changelog entry in unreleased fixes
This commit is contained in:
Altay
2026-04-25 01:37:28 +03:00
committed by GitHub
parent 9613a0759c
commit 9d3c56d236
5 changed files with 280 additions and 17 deletions

View File

@@ -8,6 +8,10 @@ Docs: https://docs.openclaw.ai
- Matrix: require full cross-signing identity trust for self-device verification and add `openclaw matrix verify self` so operators can establish that trust from the CLI. (#70401) Thanks @gumadeiras.
### Fixes
- Agents/failover: stop body-less HTTP 400/422 proxy failures from defaulting to `"format"` classification, so embedded retries surface the opaque provider failure instead of falling into a compaction loop. Fixes #66462. (#67024) Thanks @altaywtf and @HongzhuLiu.
## 2026.4.24
### Breaking

View File

@@ -73,8 +73,105 @@ describe("failover-error", () => {
expect(resolveFailoverReasonFromError({ status: 408 })).toBe("timeout");
expect(resolveFailoverReasonFromError({ status: 410 })).toBe("timeout");
expect(resolveFailoverReasonFromError({ status: 499 })).toBe("timeout");
expect(resolveFailoverReasonFromError({ status: 400 })).toBe("format");
expect(resolveFailoverReasonFromError({ status: 422 })).toBe("format");
// 400/422 with no body returns null — avoids triggering a compaction loop
// when the provider returns an empty or wrapper-only 400/422 (e.g.
// transient proxy issue).
expect(resolveFailoverReasonFromError({ status: 400 })).toBeNull();
expect(resolveFailoverReasonFromError({ status: 422 })).toBeNull();
expect(
resolveFailoverReasonFromError({
status: 400,
message: "400 status code (no body)",
}),
).toBeNull();
expect(
resolveFailoverReasonFromError({
status: 422,
message: "HTTP 422: No body",
}),
).toBeNull();
expect(
resolveFailoverReasonFromError({
status: 422,
message: "HTTP 422: No response body",
}),
).toBeNull();
expect(
resolveFailoverReasonFromError({
status: 422,
message: "Error: HTTP 422: No response body",
}),
).toBeNull();
expect(resolveFailoverReasonFromError({ message: "400 status code (no body)" })).toBeNull();
expect(resolveFailoverReasonFromError({ message: "HTTP 422: No body" })).toBeNull();
expect(resolveFailoverReasonFromError({ message: "HTTP 422: No response body" })).toBeNull();
expect(
resolveFailoverReasonFromError({
message: "outer wrapper",
cause: {
status: 422,
message: "HTTP 422: No response body",
},
}),
).toBeNull();
expect(
resolveFailoverReasonFromError({
status: 422,
message: "check open ai req parameter error",
cause: {
status: 422,
message: "HTTP 422: No response body",
},
}),
).toBeNull();
expect(
resolveFailoverReasonFromError({
status: 422,
message: "check open ai req parameter error",
cause: new Error("No response body"),
}),
).toBeNull();
expect(
resolveFailoverReasonFromError({
status: 422,
message: "Unprocessable Entity",
error: {
message: "HTTP 422: No response body",
},
}),
).toBeNull();
expect(
resolveFailoverReasonFromError({
status: 422,
message: "Unprocessable Entity",
cause: {
message: "Unprocessable Entity",
error: {
message: "HTTP 422: No response body",
},
},
}),
).toBeNull();
expect(
resolveFailoverReasonFromError({
status: 422,
error: {
message: "missing required property",
},
cause: {},
}),
).toBe("format");
expect(
resolveFailoverReasonFromError({
status: 422,
error: {
message: "missing required property",
},
cause: {
message: "HTTP 422: No response body",
},
}),
).toBe("format");
// Transient server errors (500/502/503/504) should trigger failover as timeout.
expect(resolveFailoverReasonFromError({ status: 500 })).toBe("timeout");
expect(resolveFailoverReasonFromError({ status: 502 })).toBe("timeout");
@@ -87,6 +184,14 @@ describe("failover-error", () => {
expect(resolveFailoverReasonFromError({ status: 529 })).toBe("overloaded");
});
it("stops on cyclic cause chains", () => {
const first: { cause?: unknown } = {};
const second: { cause?: unknown } = { cause: first };
first.cause = second;
expect(resolveFailoverReasonFromError(first)).toBeNull();
});
it("treats session-specific HTTP 410s differently from generic 410s", () => {
expect(
resolveFailoverReasonFromError({

View File

@@ -1,6 +1,7 @@
import { readErrorName } from "../infra/errors.js";
import {
classifyFailoverSignal,
isUnclassifiedNoBodyHttpSignal,
type FailoverClassification,
type FailoverSignal,
} from "./pi-embedded-helpers/errors.js";
@@ -8,6 +9,7 @@ import { isTimeoutErrorMessage } from "./pi-embedded-helpers/errors.js";
import type { FailoverReason } from "./pi-embedded-helpers/types.js";
const ABORT_TIMEOUT_RE = /request was aborted|request aborted/i;
const MAX_FAILOVER_CAUSE_DEPTH = 25;
export class FailoverError extends Error {
readonly reason: FailoverReason;
@@ -186,11 +188,14 @@ function getErrorMessage(err: unknown): string {
return findErrorProperty(err, readDirectErrorMessage) ?? "";
}
function getErrorCause(err: unknown): unknown {
if (!err || typeof err !== "object" || !("cause" in err)) {
return undefined;
}
return (err as { cause?: unknown }).cause;
function normalizeDirectErrorSignal(err: unknown): FailoverSignal {
const message = readDirectErrorMessage(err);
return {
status: readDirectStatusCode(err),
code: readDirectErrorCode(err),
message: message || undefined,
provider: readDirectProvider(err),
};
}
function hasTimeoutHint(err: unknown): boolean {
@@ -239,7 +244,72 @@ function normalizeErrorSignal(err: unknown): FailoverSignal {
};
}
function resolveFailoverClassificationFromError(err: unknown): FailoverClassification | null {
function getNestedErrorCandidates(err: unknown): unknown[] {
if (!err || typeof err !== "object") {
return [];
}
const candidate = err as { error?: unknown; cause?: unknown };
return [candidate.error, candidate.cause].filter(
(value): value is unknown => value !== undefined && value !== err,
);
}
function isFormatClassification(classification: FailoverClassification | null): boolean {
return classification?.kind === "reason" && classification.reason === "format";
}
function decideNestedFormatOverride(
candidate: unknown,
inheritedStatus: number | undefined,
seen: Set<object>,
depth: number,
): boolean | null {
if (depth > MAX_FAILOVER_CAUSE_DEPTH) {
return null;
}
if (candidate && typeof candidate === "object") {
if (seen.has(candidate)) {
return null;
}
seen.add(candidate);
}
const directSignal = normalizeDirectErrorSignal(candidate);
const nestedCandidates = getNestedErrorCandidates(candidate);
const nestedStatus = directSignal.status ?? inheritedStatus;
const hasDirectMessage = Boolean(directSignal.message?.trim());
if (
hasDirectMessage &&
isUnclassifiedNoBodyHttpSignal({ ...directSignal, status: nestedStatus })
) {
return true;
}
if (hasDirectMessage && (nestedCandidates.length === 0 || classifyFailoverSignal(directSignal))) {
return false;
}
for (const nestedCandidate of nestedCandidates) {
const decision = decideNestedFormatOverride(nestedCandidate, nestedStatus, seen, depth + 1);
if (decision !== null) {
return decision;
}
}
return null;
}
function resolveFailoverClassificationFromErrorInternal(
err: unknown,
seen: Set<object>,
depth: number,
): FailoverClassification | null {
if (depth > MAX_FAILOVER_CAUSE_DEPTH) {
return null;
}
if (err && typeof err === "object") {
if (seen.has(err)) {
return null;
}
seen.add(err);
}
if (isFailoverError(err)) {
return {
kind: "reason",
@@ -247,14 +317,36 @@ function resolveFailoverClassificationFromError(err: unknown): FailoverClassific
};
}
const classification = classifyFailoverSignal(normalizeErrorSignal(err));
const signal = normalizeErrorSignal(err);
const classification = classifyFailoverSignal(signal);
const nestedCandidates = getNestedErrorCandidates(err);
if (!classification || classification.kind === "context_overflow") {
// Let wrapped causes override parent timeout/overflow guesses.
const cause = getErrorCause(err);
if (cause && cause !== err) {
const causeClassification = resolveFailoverClassificationFromError(cause);
if (causeClassification) {
return causeClassification;
for (const candidate of nestedCandidates) {
const nestedClassification = resolveFailoverClassificationFromErrorInternal(
candidate,
seen,
depth + 1,
);
if (nestedClassification) {
return nestedClassification;
}
}
}
if (isFormatClassification(classification)) {
for (const candidate of nestedCandidates) {
const shouldClearFormat = decideNestedFormatOverride(
candidate,
signal.status,
seen,
depth + 1,
);
if (shouldClearFormat === true) {
return null;
}
if (shouldClearFormat === false) {
break;
}
}
}
@@ -272,6 +364,10 @@ function resolveFailoverClassificationFromError(err: unknown): FailoverClassific
return null;
}
function resolveFailoverClassificationFromError(err: unknown): FailoverClassification | null {
return resolveFailoverClassificationFromErrorInternal(err, new Set<object>(), 0);
}
export function resolveFailoverReasonFromError(err: unknown): FailoverReason | null {
return failoverReasonFromClassification(resolveFailoverClassificationFromError(err));
}

View File

@@ -575,8 +575,21 @@ describe("classifyFailoverReasonFromHttpStatus", () => {
expect(classifyFailoverReasonFromHttpStatus(401, "invalid_api_key")).toBe("auth");
});
it("treats HTTP 422 as format error", () => {
expect(classifyFailoverReasonFromHttpStatus(422)).toBe("format");
it("treats body-less HTTP 422 as unknown instead of format", () => {
expect(classifyFailoverReasonFromHttpStatus(422)).toBeNull();
});
it("treats no-body HTTP 400/422 wrappers as unknown instead of format", () => {
expect(classifyFailoverReasonFromHttpStatus(400, "No body response")).toBeNull();
expect(classifyFailoverReasonFromHttpStatus(400, "400 status code (no body)")).toBeNull();
expect(classifyFailoverReasonFromHttpStatus(422, "HTTP 422: No body")).toBeNull();
expect(classifyFailoverReasonFromHttpStatus(422, "HTTP 422: No response body")).toBeNull();
expect(
classifyFailoverReasonFromHttpStatus(422, "Error: HTTP 422: No response body"),
).toBeNull();
});
it("treats HTTP 422 with an unclassifiable body as format error", () => {
expect(classifyFailoverReasonFromHttpStatus(422, "check open ai req parameter error")).toBe(
"format",
);
@@ -686,6 +699,15 @@ describe("classifyFailoverReason", () => {
expect(classifyFailoverReason("HTTP 404: No body")).toBe("model_not_found");
});
it("keeps HTTP 400/422 no-body wrappers out of the format bucket", () => {
expect(classifyFailoverReason("400 status code (no body)")).toBeNull();
expect(classifyFailoverReason("HTTP 400: No body")).toBeNull();
expect(classifyFailoverReason("422 status code (no body)")).toBeNull();
expect(classifyFailoverReason("HTTP 422: No body")).toBeNull();
expect(classifyFailoverReason("HTTP 422: No response body")).toBeNull();
expect(classifyFailoverReason("Error: HTTP 422: No response body")).toBeNull();
});
it("preserves session and auth billing signals on HTTP 404 text", () => {
expect(classifyFailoverReason("HTTP 404: session not found")).toBe("session_expired");
expect(classifyFailoverReason("HTTP 404: invalid_api_key")).toBe("auth");

View File

@@ -330,6 +330,8 @@ const REPLAY_INVALID_RE =
/\bprevious_response_id\b.*\b(?:invalid|unknown|not found|does not exist|expired|mismatch)\b|\btool_(?:use|call)\.(?:input|arguments)\b.*\b(?:missing|required)\b|\bincorrect role information\b|\broles must alternate\b|\binput item id does not belong to this connection\b/i;
const SANDBOX_BLOCKED_RE =
/\bapproval is required\b|\bapproval timed out\b|\bapproval was denied\b|\bblocked by sandbox\b|\bsandbox\b.*\b(?:blocked|denied|forbidden|disabled|not allowed)\b/i;
const NO_BODY_HTTP_WRAPPER_RE =
/^(?:no body(?: response)?|no response body|status code \(no body\))$/i;
function stripErrorPrefix(raw: string): string {
return raw.replace(/^error:\s*/i, "").trim();
@@ -342,6 +344,31 @@ function inferSignalStatus(signal: FailoverSignal): number | undefined {
return extractLeadingHttpStatus(stripErrorPrefix(signal.message?.trim() ?? ""))?.code;
}
function isExplicitNoBodyHttpMessage(raw: string | undefined, status?: number): boolean {
const trimmed = raw?.trim();
if (!trimmed) {
return false;
}
const candidate = extractLeadingHttpStatus(trimmed) ? trimmed : stripErrorPrefix(trimmed);
const leadingStatus = extractLeadingHttpStatus(candidate);
if (leadingStatus) {
if (typeof status === "number" && leadingStatus.code !== status) {
return false;
}
return NO_BODY_HTTP_WRAPPER_RE.test(leadingStatus.rest);
}
return NO_BODY_HTTP_WRAPPER_RE.test(candidate);
}
export function isUnclassifiedNoBodyHttpSignal(signal: FailoverSignal): boolean {
const status = inferSignalStatus(signal);
if (status !== 400 && status !== 422) {
return false;
}
const message = signal.message?.trim();
return !message || isExplicitNoBodyHttpMessage(message, status);
}
function isHtmlErrorResponse(raw: string, status?: number): boolean {
const trimmed = raw.trim();
if (!trimmed) {
@@ -683,6 +710,15 @@ function classifyFailoverClassificationFromHttpStatus(
if (messageClassification) {
return messageClassification;
}
// When the response has no body at all, or only surfaces as an HTTP wrapper
// like "400 status code (no body)", return null instead of defaulting to
// "format". These shapes are likely transient proxy issues — classifying
// them as "format" triggers a compaction loop that cannot recover.
if (isUnclassifiedNoBodyHttpSignal({ status, message })) {
return null;
}
// Body exists but couldn't be classified — still treat as format error
// since the provider rejected the request schema.
return toReasonClassification("format");
}
return null;