mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 10:30:44 +00:00
fix(auth-profiles): exclude format rejections from profile cooldown
A format-classified failure means the provider rejected the request payload shape (e.g. an assistant-prefill 400 when a session transcript ends with a stream-error placeholder turn). That is a per-session transcript-shape problem, not a profile-wide reliability signal. Mark the reason with the existing transport-timeout exclusion so a single bad session no longer cascades to a profile cooldown that takes down every other healthy session sharing the same auth profile or, when all profiles share the same fault, the whole provider for the backoff window. Refs #77228 — addresses the cascading-cooldown amplifier only. The other two items in the same issue (the prefill placeholder leaving transcripts ending in assistant, and the auto-repair filling the JSONL with null-role entries) are separate failure modes and remain open.
This commit is contained in:
@@ -39,4 +39,23 @@ describe("resolveAuthProfileFailureReason", () => {
|
||||
}),
|
||||
).toBeNull();
|
||||
});
|
||||
|
||||
it("does not persist request-shape (format) rejections as auth-profile health (#77228)", () => {
|
||||
// A format rejection (e.g. the github-copilot prefill-strict 400
|
||||
// "conversation must end with a user message" reported in #77228) is
|
||||
// a per-session transcript-shape problem; cascading it to a profile
|
||||
// cooldown blocks every other healthy session sharing the same auth
|
||||
// profile and can take down the whole provider for the backoff window.
|
||||
expect(
|
||||
resolveAuthProfileFailureReason({
|
||||
failoverReason: "format",
|
||||
}),
|
||||
).toBeNull();
|
||||
expect(
|
||||
resolveAuthProfileFailureReason({
|
||||
failoverReason: "format",
|
||||
policy: "shared",
|
||||
}),
|
||||
).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
@@ -6,8 +6,21 @@ export function resolveAuthProfileFailureReason(params: {
|
||||
failoverReason: FailoverReason | null;
|
||||
policy?: AuthProfileFailurePolicy;
|
||||
}): AuthProfileFailureReason | null {
|
||||
// Helper-local runs and transport timeouts should not poison shared provider auth health.
|
||||
if (params.policy === "local" || !params.failoverReason || params.failoverReason === "timeout") {
|
||||
// Helper-local runs, transport timeouts, and request-shape ("format") rejections
|
||||
// should not poison shared provider auth health. A `format` failure means the
|
||||
// provider rejected the request payload (e.g. an assistant-prefill 400 from a
|
||||
// strict provider when a session transcript ends with a stream-error placeholder
|
||||
// turn) — that is a per-session transcript-shape problem, not a profile-wide
|
||||
// reliability signal. Cascading it to a profile cooldown blocks every other
|
||||
// healthy session sharing the same auth profile and, when all profiles share
|
||||
// the same fault, takes down the entire provider for the configured backoff
|
||||
// window (#77228).
|
||||
if (
|
||||
params.policy === "local" ||
|
||||
!params.failoverReason ||
|
||||
params.failoverReason === "timeout" ||
|
||||
params.failoverReason === "format"
|
||||
) {
|
||||
return null;
|
||||
}
|
||||
return params.failoverReason;
|
||||
|
||||
Reference in New Issue
Block a user