mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 09:30:43 +00:00
fix: don't classify 400/422 with no body as format error (#67024)
* fix: keep no-body 400/422 failover errors out of format * fix: keep failover changelog entry in unreleased fixes
This commit is contained in:
@@ -8,6 +8,10 @@ Docs: https://docs.openclaw.ai
|
||||
|
||||
- Matrix: require full cross-signing identity trust for self-device verification and add `openclaw matrix verify self` so operators can establish that trust from the CLI. (#70401) Thanks @gumadeiras.
|
||||
|
||||
### Fixes
|
||||
|
||||
- Agents/failover: stop body-less HTTP 400/422 proxy failures from defaulting to `"format"` classification, so embedded retries surface the opaque provider failure instead of falling into a compaction loop. Fixes #66462. (#67024) Thanks @altaywtf and @HongzhuLiu.
|
||||
|
||||
## 2026.4.24
|
||||
|
||||
### Breaking
|
||||
|
||||
@@ -73,8 +73,105 @@ describe("failover-error", () => {
|
||||
expect(resolveFailoverReasonFromError({ status: 408 })).toBe("timeout");
|
||||
expect(resolveFailoverReasonFromError({ status: 410 })).toBe("timeout");
|
||||
expect(resolveFailoverReasonFromError({ status: 499 })).toBe("timeout");
|
||||
expect(resolveFailoverReasonFromError({ status: 400 })).toBe("format");
|
||||
expect(resolveFailoverReasonFromError({ status: 422 })).toBe("format");
|
||||
// 400/422 with no body returns null — avoids triggering a compaction loop
|
||||
// when the provider returns an empty or wrapper-only 400/422 (e.g.
|
||||
// transient proxy issue).
|
||||
expect(resolveFailoverReasonFromError({ status: 400 })).toBeNull();
|
||||
expect(resolveFailoverReasonFromError({ status: 422 })).toBeNull();
|
||||
expect(
|
||||
resolveFailoverReasonFromError({
|
||||
status: 400,
|
||||
message: "400 status code (no body)",
|
||||
}),
|
||||
).toBeNull();
|
||||
expect(
|
||||
resolveFailoverReasonFromError({
|
||||
status: 422,
|
||||
message: "HTTP 422: No body",
|
||||
}),
|
||||
).toBeNull();
|
||||
expect(
|
||||
resolveFailoverReasonFromError({
|
||||
status: 422,
|
||||
message: "HTTP 422: No response body",
|
||||
}),
|
||||
).toBeNull();
|
||||
expect(
|
||||
resolveFailoverReasonFromError({
|
||||
status: 422,
|
||||
message: "Error: HTTP 422: No response body",
|
||||
}),
|
||||
).toBeNull();
|
||||
expect(resolveFailoverReasonFromError({ message: "400 status code (no body)" })).toBeNull();
|
||||
expect(resolveFailoverReasonFromError({ message: "HTTP 422: No body" })).toBeNull();
|
||||
expect(resolveFailoverReasonFromError({ message: "HTTP 422: No response body" })).toBeNull();
|
||||
expect(
|
||||
resolveFailoverReasonFromError({
|
||||
message: "outer wrapper",
|
||||
cause: {
|
||||
status: 422,
|
||||
message: "HTTP 422: No response body",
|
||||
},
|
||||
}),
|
||||
).toBeNull();
|
||||
expect(
|
||||
resolveFailoverReasonFromError({
|
||||
status: 422,
|
||||
message: "check open ai req parameter error",
|
||||
cause: {
|
||||
status: 422,
|
||||
message: "HTTP 422: No response body",
|
||||
},
|
||||
}),
|
||||
).toBeNull();
|
||||
expect(
|
||||
resolveFailoverReasonFromError({
|
||||
status: 422,
|
||||
message: "check open ai req parameter error",
|
||||
cause: new Error("No response body"),
|
||||
}),
|
||||
).toBeNull();
|
||||
expect(
|
||||
resolveFailoverReasonFromError({
|
||||
status: 422,
|
||||
message: "Unprocessable Entity",
|
||||
error: {
|
||||
message: "HTTP 422: No response body",
|
||||
},
|
||||
}),
|
||||
).toBeNull();
|
||||
expect(
|
||||
resolveFailoverReasonFromError({
|
||||
status: 422,
|
||||
message: "Unprocessable Entity",
|
||||
cause: {
|
||||
message: "Unprocessable Entity",
|
||||
error: {
|
||||
message: "HTTP 422: No response body",
|
||||
},
|
||||
},
|
||||
}),
|
||||
).toBeNull();
|
||||
expect(
|
||||
resolveFailoverReasonFromError({
|
||||
status: 422,
|
||||
error: {
|
||||
message: "missing required property",
|
||||
},
|
||||
cause: {},
|
||||
}),
|
||||
).toBe("format");
|
||||
expect(
|
||||
resolveFailoverReasonFromError({
|
||||
status: 422,
|
||||
error: {
|
||||
message: "missing required property",
|
||||
},
|
||||
cause: {
|
||||
message: "HTTP 422: No response body",
|
||||
},
|
||||
}),
|
||||
).toBe("format");
|
||||
// Transient server errors (500/502/503/504) should trigger failover as timeout.
|
||||
expect(resolveFailoverReasonFromError({ status: 500 })).toBe("timeout");
|
||||
expect(resolveFailoverReasonFromError({ status: 502 })).toBe("timeout");
|
||||
@@ -87,6 +184,14 @@ describe("failover-error", () => {
|
||||
expect(resolveFailoverReasonFromError({ status: 529 })).toBe("overloaded");
|
||||
});
|
||||
|
||||
it("stops on cyclic cause chains", () => {
|
||||
const first: { cause?: unknown } = {};
|
||||
const second: { cause?: unknown } = { cause: first };
|
||||
first.cause = second;
|
||||
|
||||
expect(resolveFailoverReasonFromError(first)).toBeNull();
|
||||
});
|
||||
|
||||
it("treats session-specific HTTP 410s differently from generic 410s", () => {
|
||||
expect(
|
||||
resolveFailoverReasonFromError({
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import { readErrorName } from "../infra/errors.js";
|
||||
import {
|
||||
classifyFailoverSignal,
|
||||
isUnclassifiedNoBodyHttpSignal,
|
||||
type FailoverClassification,
|
||||
type FailoverSignal,
|
||||
} from "./pi-embedded-helpers/errors.js";
|
||||
@@ -8,6 +9,7 @@ import { isTimeoutErrorMessage } from "./pi-embedded-helpers/errors.js";
|
||||
import type { FailoverReason } from "./pi-embedded-helpers/types.js";
|
||||
|
||||
const ABORT_TIMEOUT_RE = /request was aborted|request aborted/i;
|
||||
const MAX_FAILOVER_CAUSE_DEPTH = 25;
|
||||
|
||||
export class FailoverError extends Error {
|
||||
readonly reason: FailoverReason;
|
||||
@@ -186,11 +188,14 @@ function getErrorMessage(err: unknown): string {
|
||||
return findErrorProperty(err, readDirectErrorMessage) ?? "";
|
||||
}
|
||||
|
||||
function getErrorCause(err: unknown): unknown {
|
||||
if (!err || typeof err !== "object" || !("cause" in err)) {
|
||||
return undefined;
|
||||
}
|
||||
return (err as { cause?: unknown }).cause;
|
||||
function normalizeDirectErrorSignal(err: unknown): FailoverSignal {
|
||||
const message = readDirectErrorMessage(err);
|
||||
return {
|
||||
status: readDirectStatusCode(err),
|
||||
code: readDirectErrorCode(err),
|
||||
message: message || undefined,
|
||||
provider: readDirectProvider(err),
|
||||
};
|
||||
}
|
||||
|
||||
function hasTimeoutHint(err: unknown): boolean {
|
||||
@@ -239,7 +244,72 @@ function normalizeErrorSignal(err: unknown): FailoverSignal {
|
||||
};
|
||||
}
|
||||
|
||||
function resolveFailoverClassificationFromError(err: unknown): FailoverClassification | null {
|
||||
function getNestedErrorCandidates(err: unknown): unknown[] {
|
||||
if (!err || typeof err !== "object") {
|
||||
return [];
|
||||
}
|
||||
const candidate = err as { error?: unknown; cause?: unknown };
|
||||
return [candidate.error, candidate.cause].filter(
|
||||
(value): value is unknown => value !== undefined && value !== err,
|
||||
);
|
||||
}
|
||||
|
||||
function isFormatClassification(classification: FailoverClassification | null): boolean {
|
||||
return classification?.kind === "reason" && classification.reason === "format";
|
||||
}
|
||||
|
||||
function decideNestedFormatOverride(
|
||||
candidate: unknown,
|
||||
inheritedStatus: number | undefined,
|
||||
seen: Set<object>,
|
||||
depth: number,
|
||||
): boolean | null {
|
||||
if (depth > MAX_FAILOVER_CAUSE_DEPTH) {
|
||||
return null;
|
||||
}
|
||||
if (candidate && typeof candidate === "object") {
|
||||
if (seen.has(candidate)) {
|
||||
return null;
|
||||
}
|
||||
seen.add(candidate);
|
||||
}
|
||||
|
||||
const directSignal = normalizeDirectErrorSignal(candidate);
|
||||
const nestedCandidates = getNestedErrorCandidates(candidate);
|
||||
const nestedStatus = directSignal.status ?? inheritedStatus;
|
||||
const hasDirectMessage = Boolean(directSignal.message?.trim());
|
||||
if (
|
||||
hasDirectMessage &&
|
||||
isUnclassifiedNoBodyHttpSignal({ ...directSignal, status: nestedStatus })
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
if (hasDirectMessage && (nestedCandidates.length === 0 || classifyFailoverSignal(directSignal))) {
|
||||
return false;
|
||||
}
|
||||
for (const nestedCandidate of nestedCandidates) {
|
||||
const decision = decideNestedFormatOverride(nestedCandidate, nestedStatus, seen, depth + 1);
|
||||
if (decision !== null) {
|
||||
return decision;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function resolveFailoverClassificationFromErrorInternal(
|
||||
err: unknown,
|
||||
seen: Set<object>,
|
||||
depth: number,
|
||||
): FailoverClassification | null {
|
||||
if (depth > MAX_FAILOVER_CAUSE_DEPTH) {
|
||||
return null;
|
||||
}
|
||||
if (err && typeof err === "object") {
|
||||
if (seen.has(err)) {
|
||||
return null;
|
||||
}
|
||||
seen.add(err);
|
||||
}
|
||||
if (isFailoverError(err)) {
|
||||
return {
|
||||
kind: "reason",
|
||||
@@ -247,14 +317,36 @@ function resolveFailoverClassificationFromError(err: unknown): FailoverClassific
|
||||
};
|
||||
}
|
||||
|
||||
const classification = classifyFailoverSignal(normalizeErrorSignal(err));
|
||||
const signal = normalizeErrorSignal(err);
|
||||
const classification = classifyFailoverSignal(signal);
|
||||
const nestedCandidates = getNestedErrorCandidates(err);
|
||||
|
||||
if (!classification || classification.kind === "context_overflow") {
|
||||
// Let wrapped causes override parent timeout/overflow guesses.
|
||||
const cause = getErrorCause(err);
|
||||
if (cause && cause !== err) {
|
||||
const causeClassification = resolveFailoverClassificationFromError(cause);
|
||||
if (causeClassification) {
|
||||
return causeClassification;
|
||||
for (const candidate of nestedCandidates) {
|
||||
const nestedClassification = resolveFailoverClassificationFromErrorInternal(
|
||||
candidate,
|
||||
seen,
|
||||
depth + 1,
|
||||
);
|
||||
if (nestedClassification) {
|
||||
return nestedClassification;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (isFormatClassification(classification)) {
|
||||
for (const candidate of nestedCandidates) {
|
||||
const shouldClearFormat = decideNestedFormatOverride(
|
||||
candidate,
|
||||
signal.status,
|
||||
seen,
|
||||
depth + 1,
|
||||
);
|
||||
if (shouldClearFormat === true) {
|
||||
return null;
|
||||
}
|
||||
if (shouldClearFormat === false) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -272,6 +364,10 @@ function resolveFailoverClassificationFromError(err: unknown): FailoverClassific
|
||||
return null;
|
||||
}
|
||||
|
||||
function resolveFailoverClassificationFromError(err: unknown): FailoverClassification | null {
|
||||
return resolveFailoverClassificationFromErrorInternal(err, new Set<object>(), 0);
|
||||
}
|
||||
|
||||
export function resolveFailoverReasonFromError(err: unknown): FailoverReason | null {
|
||||
return failoverReasonFromClassification(resolveFailoverClassificationFromError(err));
|
||||
}
|
||||
|
||||
@@ -575,8 +575,21 @@ describe("classifyFailoverReasonFromHttpStatus", () => {
|
||||
expect(classifyFailoverReasonFromHttpStatus(401, "invalid_api_key")).toBe("auth");
|
||||
});
|
||||
|
||||
it("treats HTTP 422 as format error", () => {
|
||||
expect(classifyFailoverReasonFromHttpStatus(422)).toBe("format");
|
||||
it("treats body-less HTTP 422 as unknown instead of format", () => {
|
||||
expect(classifyFailoverReasonFromHttpStatus(422)).toBeNull();
|
||||
});
|
||||
|
||||
it("treats no-body HTTP 400/422 wrappers as unknown instead of format", () => {
|
||||
expect(classifyFailoverReasonFromHttpStatus(400, "No body response")).toBeNull();
|
||||
expect(classifyFailoverReasonFromHttpStatus(400, "400 status code (no body)")).toBeNull();
|
||||
expect(classifyFailoverReasonFromHttpStatus(422, "HTTP 422: No body")).toBeNull();
|
||||
expect(classifyFailoverReasonFromHttpStatus(422, "HTTP 422: No response body")).toBeNull();
|
||||
expect(
|
||||
classifyFailoverReasonFromHttpStatus(422, "Error: HTTP 422: No response body"),
|
||||
).toBeNull();
|
||||
});
|
||||
|
||||
it("treats HTTP 422 with an unclassifiable body as format error", () => {
|
||||
expect(classifyFailoverReasonFromHttpStatus(422, "check open ai req parameter error")).toBe(
|
||||
"format",
|
||||
);
|
||||
@@ -686,6 +699,15 @@ describe("classifyFailoverReason", () => {
|
||||
expect(classifyFailoverReason("HTTP 404: No body")).toBe("model_not_found");
|
||||
});
|
||||
|
||||
it("keeps HTTP 400/422 no-body wrappers out of the format bucket", () => {
|
||||
expect(classifyFailoverReason("400 status code (no body)")).toBeNull();
|
||||
expect(classifyFailoverReason("HTTP 400: No body")).toBeNull();
|
||||
expect(classifyFailoverReason("422 status code (no body)")).toBeNull();
|
||||
expect(classifyFailoverReason("HTTP 422: No body")).toBeNull();
|
||||
expect(classifyFailoverReason("HTTP 422: No response body")).toBeNull();
|
||||
expect(classifyFailoverReason("Error: HTTP 422: No response body")).toBeNull();
|
||||
});
|
||||
|
||||
it("preserves session and auth billing signals on HTTP 404 text", () => {
|
||||
expect(classifyFailoverReason("HTTP 404: session not found")).toBe("session_expired");
|
||||
expect(classifyFailoverReason("HTTP 404: invalid_api_key")).toBe("auth");
|
||||
|
||||
@@ -330,6 +330,8 @@ const REPLAY_INVALID_RE =
|
||||
/\bprevious_response_id\b.*\b(?:invalid|unknown|not found|does not exist|expired|mismatch)\b|\btool_(?:use|call)\.(?:input|arguments)\b.*\b(?:missing|required)\b|\bincorrect role information\b|\broles must alternate\b|\binput item id does not belong to this connection\b/i;
|
||||
const SANDBOX_BLOCKED_RE =
|
||||
/\bapproval is required\b|\bapproval timed out\b|\bapproval was denied\b|\bblocked by sandbox\b|\bsandbox\b.*\b(?:blocked|denied|forbidden|disabled|not allowed)\b/i;
|
||||
const NO_BODY_HTTP_WRAPPER_RE =
|
||||
/^(?:no body(?: response)?|no response body|status code \(no body\))$/i;
|
||||
|
||||
function stripErrorPrefix(raw: string): string {
|
||||
return raw.replace(/^error:\s*/i, "").trim();
|
||||
@@ -342,6 +344,31 @@ function inferSignalStatus(signal: FailoverSignal): number | undefined {
|
||||
return extractLeadingHttpStatus(stripErrorPrefix(signal.message?.trim() ?? ""))?.code;
|
||||
}
|
||||
|
||||
function isExplicitNoBodyHttpMessage(raw: string | undefined, status?: number): boolean {
|
||||
const trimmed = raw?.trim();
|
||||
if (!trimmed) {
|
||||
return false;
|
||||
}
|
||||
const candidate = extractLeadingHttpStatus(trimmed) ? trimmed : stripErrorPrefix(trimmed);
|
||||
const leadingStatus = extractLeadingHttpStatus(candidate);
|
||||
if (leadingStatus) {
|
||||
if (typeof status === "number" && leadingStatus.code !== status) {
|
||||
return false;
|
||||
}
|
||||
return NO_BODY_HTTP_WRAPPER_RE.test(leadingStatus.rest);
|
||||
}
|
||||
return NO_BODY_HTTP_WRAPPER_RE.test(candidate);
|
||||
}
|
||||
|
||||
export function isUnclassifiedNoBodyHttpSignal(signal: FailoverSignal): boolean {
|
||||
const status = inferSignalStatus(signal);
|
||||
if (status !== 400 && status !== 422) {
|
||||
return false;
|
||||
}
|
||||
const message = signal.message?.trim();
|
||||
return !message || isExplicitNoBodyHttpMessage(message, status);
|
||||
}
|
||||
|
||||
function isHtmlErrorResponse(raw: string, status?: number): boolean {
|
||||
const trimmed = raw.trim();
|
||||
if (!trimmed) {
|
||||
@@ -683,6 +710,15 @@ function classifyFailoverClassificationFromHttpStatus(
|
||||
if (messageClassification) {
|
||||
return messageClassification;
|
||||
}
|
||||
// When the response has no body at all, or only surfaces as an HTTP wrapper
|
||||
// like "400 status code (no body)", return null instead of defaulting to
|
||||
// "format". These shapes are likely transient proxy issues — classifying
|
||||
// them as "format" triggers a compaction loop that cannot recover.
|
||||
if (isUnclassifiedNoBodyHttpSignal({ status, message })) {
|
||||
return null;
|
||||
}
|
||||
// Body exists but couldn't be classified — still treat as format error
|
||||
// since the provider rejected the request schema.
|
||||
return toReasonClassification("format");
|
||||
}
|
||||
return null;
|
||||
|
||||
Reference in New Issue
Block a user