mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-13 09:10:42 +00:00
fix(agents): classify stream_read_error as transient (#79692)
* fix(agents): classify stream_read_error as transient * fix: classify stream read errors as transient (#79692) --------- Co-authored-by: Peter Steinberger <steipete@gmail.com>
This commit is contained in:
@@ -153,6 +153,8 @@ Docs: https://docs.openclaw.ai
|
||||
|
||||
- Memory/QMD: warn with a manual stale collection removal hint when QMD reports a path/pattern conflict but `collection list` lacks verifiable metadata, avoiding unsafe stderr-only rebinds. Refs #71783. (#72297) Thanks @MonkeyLeeT.
|
||||
- Models/auth: make `openclaw models status --check` and dashboard auth health honor effective auth profile order while keeping stale profiles visible. (#79685) Thanks @nimbleenigma.
|
||||
- Agents/failover: classify bare `stream_read_error` streaming failures as transient timeouts so configured model fallback runs instead of surfacing the raw transport error. Fixes #79689. (#79692) Thanks @hekunwang.
|
||||
- Agents/failover: persist overloaded auth-profile cooldown marks before exhausted fallback summaries surface, so immediate fallback retries honor the recorded cooldown state.
|
||||
- Docs/Subagents: correct the listed sub-agent bootstrap context files to include `SOUL.md`, `IDENTITY.md`, and `USER.md`. (#79470) Thanks @lastguru-net.
|
||||
- Backup: keep live backup archives from copying current agent session transcripts, cron run logs, and delivery queues while preserving workspace lock/temp files and keeping `--json` output parseable when volatile files are skipped. Fixes #72249. (#72251) Thanks @abnershang.
|
||||
- OpenAI/Codex: install the Codex runtime plugin from npm during OpenAI onboarding and load it automatically for implicit OpenAI model routes, while preserving manual PI runtime overrides. Fixes #79358.
|
||||
|
||||
@@ -483,6 +483,10 @@ describe("runWithModelFallback + runEmbeddedPiAgent failover behavior", () => {
|
||||
name: "undici-terminated",
|
||||
message: "terminated",
|
||||
},
|
||||
{
|
||||
name: "stream-read-error",
|
||||
message: "stream_read_error",
|
||||
},
|
||||
{
|
||||
name: "codex-empty-transport-response",
|
||||
message: "Request failed",
|
||||
|
||||
@@ -1113,6 +1113,14 @@ describe("runWithModelFallback", () => {
|
||||
error: new Error("Model not found: openai/gpt-6"),
|
||||
expectedFallback: ["anthropic", "claude-haiku-3-5"],
|
||||
},
|
||||
{
|
||||
name: "bare stream read transport error",
|
||||
provider: "openai",
|
||||
model: "gpt-4.1-mini",
|
||||
error: new Error("stream_read_error"),
|
||||
expectedFallback: ["anthropic", "claude-haiku-3-5"],
|
||||
expectedReason: "timeout",
|
||||
},
|
||||
];
|
||||
|
||||
for (const testCase of cases) {
|
||||
|
||||
@@ -923,6 +923,8 @@ describe("isFailoverErrorMessage", () => {
|
||||
"terminated",
|
||||
"Terminated",
|
||||
" terminated ",
|
||||
"stream_read_error",
|
||||
" stream_read_error ",
|
||||
"UND_ERR_SOCKET",
|
||||
"Error: UND_ERR_SOCKET other side closed",
|
||||
"UND_ERR_CONNECT_TIMEOUT",
|
||||
|
||||
@@ -168,6 +168,7 @@ const ERROR_PATTERNS = {
|
||||
// aborted). These arrive as bare strings on the outer error and, without
|
||||
// an explicit match, the fallback chain is never attempted (#69368).
|
||||
/^terminated$/i,
|
||||
/^stream_read_error$/i,
|
||||
/\bund_err_(?:socket|connect|headers?|body|req_content_length_mismatch|aborted|closed)\b/i,
|
||||
// pi-ai's openai-codex provider surfaces `Request failed` when the HTTP
|
||||
// response has no body and no status text (typical of Cloudflare 502s
|
||||
|
||||
@@ -99,17 +99,19 @@ export async function handleAssistantFailover(params: {
|
||||
if (decision.action === "rotate_profile") {
|
||||
const failedProfileId = params.lastProfileId;
|
||||
const failureReason = params.timedOut ? "timeout" : params.assistantProfileFailureReason;
|
||||
const markFailedProfile = () => {
|
||||
const markFailedProfile = async () => {
|
||||
if (!failedProfileId || !failureReason || failureReason === "timeout") {
|
||||
return;
|
||||
}
|
||||
params
|
||||
.maybeMarkAuthProfileFailure({
|
||||
try {
|
||||
await params.maybeMarkAuthProfileFailure({
|
||||
profileId: failedProfileId,
|
||||
reason: failureReason,
|
||||
modelId: params.modelId,
|
||||
})
|
||||
.catch((err) => params.warn(`deferred profile failure mark failed: ${String(err)}`));
|
||||
});
|
||||
} catch (err) {
|
||||
params.warn(`profile failure mark failed: ${String(err)}`);
|
||||
}
|
||||
};
|
||||
|
||||
if (params.failoverReason === "overloaded") {
|
||||
@@ -122,7 +124,7 @@ export async function handleAssistantFailover(params: {
|
||||
params.warn(
|
||||
`overload profile rotation cap reached for ${sanitizeForLog(params.provider)}/${sanitizeForLog(params.modelId)} after ${overloadProfileRotations} rotations; escalating to model fallback`,
|
||||
);
|
||||
markFailedProfile();
|
||||
await markFailedProfile();
|
||||
params.logAssistantFailoverDecision("fallback_model", { status });
|
||||
return {
|
||||
action: "throw",
|
||||
@@ -151,7 +153,7 @@ export async function handleAssistantFailover(params: {
|
||||
}
|
||||
|
||||
const rotated = await params.advanceAuthProfile();
|
||||
markFailedProfile();
|
||||
await markFailedProfile();
|
||||
if (params.timedOut && !params.isProbeSession && failedProfileId) {
|
||||
params.warn(`Profile ${failedProfileId} timed out. Trying next account...`);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user