mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-24 03:39:50 +00:00
fix(agents): escalate LLM idle timeout to model fallback after profile rotation
When the LLM idle watchdog fires (model produced no tokens for N seconds), idleTimedOut is set in handleAssistantFailover but was never passed into resolveRunFailoverDecision. As a result, shouldRotateAssistant saw neither failoverReason nor timedOut (the run-budget timeout) set, returned false, and the decision fell through to continue_normal -- the agent silently froze without surfacing an error or advancing the fallback chain. Fixes #76877 (regression since 2026.4.24). Changes: - failover-policy.ts: add idleTimedOut to AssistantDecisionParams; include it in shouldRotateAssistant and reason selection in resolveRunFailoverDecision - assistant-failover.ts: pass idleTimedOut into resolveRunFailoverDecision - failover-policy.test.ts: 4 new cases for idle timeout path; update existing assistant stage cases with the new required field (idleTimedOut: false)
This commit is contained in:
@@ -190,6 +190,7 @@ export async function handleAssistantFailover(params: {
|
||||
failoverFailure: params.failoverFailure,
|
||||
failoverReason: params.failoverReason,
|
||||
timedOut: params.timedOut,
|
||||
idleTimedOut: params.idleTimedOut,
|
||||
timedOutDuringCompaction: params.timedOutDuringCompaction,
|
||||
timedOutDuringToolExecution: params.timedOutDuringToolExecution,
|
||||
profileRotated: true,
|
||||
|
||||
@@ -106,6 +106,7 @@ describe("resolveRunFailoverDecision", () => {
|
||||
failoverFailure: false,
|
||||
failoverReason: "rate_limit",
|
||||
timedOut: false,
|
||||
idleTimedOut: false,
|
||||
timedOutDuringCompaction: false,
|
||||
timedOutDuringToolExecution: false,
|
||||
profileRotated: false,
|
||||
@@ -167,6 +168,7 @@ describe("resolveRunFailoverDecision", () => {
|
||||
failoverFailure: false,
|
||||
failoverReason: "rate_limit",
|
||||
timedOut: false,
|
||||
idleTimedOut: false,
|
||||
timedOutDuringCompaction: false,
|
||||
timedOutDuringToolExecution: false,
|
||||
profileRotated: true,
|
||||
@@ -187,6 +189,7 @@ describe("resolveRunFailoverDecision", () => {
|
||||
failoverFailure: false,
|
||||
failoverReason: null,
|
||||
timedOut: false,
|
||||
idleTimedOut: false,
|
||||
timedOutDuringCompaction: false,
|
||||
timedOutDuringToolExecution: false,
|
||||
profileRotated: false,
|
||||
@@ -223,6 +226,7 @@ describe("resolveRunFailoverDecision", () => {
|
||||
failoverFailure: false,
|
||||
failoverReason: null,
|
||||
timedOut: true,
|
||||
idleTimedOut: false,
|
||||
timedOutDuringCompaction: false,
|
||||
timedOutDuringToolExecution: true,
|
||||
profileRotated: false,
|
||||
@@ -242,6 +246,7 @@ describe("resolveRunFailoverDecision", () => {
|
||||
failoverFailure: false,
|
||||
failoverReason: null,
|
||||
timedOut: true,
|
||||
idleTimedOut: false,
|
||||
timedOutDuringCompaction: false,
|
||||
timedOutDuringToolExecution: true,
|
||||
profileRotated: true,
|
||||
@@ -261,6 +266,7 @@ describe("resolveRunFailoverDecision", () => {
|
||||
failoverFailure: false,
|
||||
failoverReason: null,
|
||||
timedOut: true,
|
||||
idleTimedOut: false,
|
||||
timedOutDuringCompaction: false,
|
||||
timedOutDuringToolExecution: false,
|
||||
profileRotated: false,
|
||||
@@ -281,6 +287,95 @@ describe("resolveRunFailoverDecision", () => {
|
||||
failoverFailure: false,
|
||||
failoverReason: null,
|
||||
timedOut: true,
|
||||
idleTimedOut: false,
|
||||
timedOutDuringCompaction: false,
|
||||
timedOutDuringToolExecution: false,
|
||||
profileRotated: false,
|
||||
}),
|
||||
).toEqual({
|
||||
action: "surface_error",
|
||||
reason: null,
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
it("rotates profile on LLM idle timeout before falling back", () => {
|
||||
// idleTimedOut = model produced no tokens; no provider API error was classified.
|
||||
// Before this fix, failoverReason=null + timedOut=false → shouldRotateAssistant=false
|
||||
// → continue_normal, causing a silent agent freeze.
|
||||
expect(
|
||||
resolveRunFailoverDecision({
|
||||
stage: "assistant",
|
||||
aborted: false,
|
||||
externalAbort: false,
|
||||
fallbackConfigured: true,
|
||||
failoverFailure: false,
|
||||
failoverReason: null,
|
||||
timedOut: false,
|
||||
idleTimedOut: true,
|
||||
timedOutDuringCompaction: false,
|
||||
timedOutDuringToolExecution: false,
|
||||
profileRotated: false,
|
||||
}),
|
||||
).toEqual({
|
||||
action: "rotate_profile",
|
||||
reason: null,
|
||||
});
|
||||
});
|
||||
|
||||
it("escalates LLM idle timeout to fallback_model after profile rotation is exhausted", () => {
|
||||
expect(
|
||||
resolveRunFailoverDecision({
|
||||
stage: "assistant",
|
||||
aborted: false,
|
||||
externalAbort: false,
|
||||
fallbackConfigured: true,
|
||||
failoverFailure: false,
|
||||
failoverReason: null,
|
||||
timedOut: false,
|
||||
idleTimedOut: true,
|
||||
timedOutDuringCompaction: false,
|
||||
timedOutDuringToolExecution: false,
|
||||
profileRotated: true,
|
||||
}),
|
||||
).toEqual({
|
||||
action: "fallback_model",
|
||||
reason: "timeout",
|
||||
});
|
||||
});
|
||||
|
||||
it("surfaces error on LLM idle timeout when no fallback is configured and rotation is exhausted", () => {
|
||||
expect(
|
||||
resolveRunFailoverDecision({
|
||||
stage: "assistant",
|
||||
aborted: false,
|
||||
externalAbort: false,
|
||||
fallbackConfigured: false,
|
||||
failoverFailure: false,
|
||||
failoverReason: null,
|
||||
timedOut: false,
|
||||
idleTimedOut: true,
|
||||
timedOutDuringCompaction: false,
|
||||
timedOutDuringToolExecution: false,
|
||||
profileRotated: true,
|
||||
}),
|
||||
).toEqual({
|
||||
action: "surface_error",
|
||||
reason: null,
|
||||
});
|
||||
});
|
||||
|
||||
it("does not escalate LLM idle timeout after an external abort", () => {
|
||||
expect(
|
||||
resolveRunFailoverDecision({
|
||||
stage: "assistant",
|
||||
aborted: false,
|
||||
externalAbort: true,
|
||||
fallbackConfigured: true,
|
||||
failoverFailure: false,
|
||||
failoverReason: null,
|
||||
timedOut: false,
|
||||
idleTimedOut: true,
|
||||
timedOutDuringCompaction: false,
|
||||
timedOutDuringToolExecution: false,
|
||||
profileRotated: false,
|
||||
|
||||
@@ -57,6 +57,7 @@ type AssistantDecisionParams = {
|
||||
failoverFailure: boolean;
|
||||
failoverReason: FailoverReason | null;
|
||||
timedOut: boolean;
|
||||
idleTimedOut: boolean;
|
||||
timedOutDuringCompaction: boolean;
|
||||
timedOutDuringToolExecution: boolean;
|
||||
profileRotated: boolean;
|
||||
@@ -98,7 +99,8 @@ function shouldRotateAssistant(params: AssistantDecisionParams): boolean {
|
||||
}
|
||||
return (
|
||||
(!params.aborted && (params.failoverFailure || params.failoverReason !== null)) ||
|
||||
(params.timedOut && !params.timedOutDuringCompaction && !params.timedOutDuringToolExecution)
|
||||
(params.timedOut && !params.timedOutDuringCompaction && !params.timedOutDuringToolExecution) ||
|
||||
params.idleTimedOut
|
||||
);
|
||||
}
|
||||
|
||||
@@ -178,7 +180,8 @@ export function resolveRunFailoverDecision(params: RunFailoverDecisionParams): R
|
||||
if (assistantShouldRotate && params.fallbackConfigured) {
|
||||
return {
|
||||
action: "fallback_model",
|
||||
reason: params.timedOut ? "timeout" : (params.failoverReason ?? "unknown"),
|
||||
reason:
|
||||
params.timedOut || params.idleTimedOut ? "timeout" : (params.failoverReason ?? "unknown"),
|
||||
};
|
||||
}
|
||||
if (!assistantShouldRotate) {
|
||||
|
||||
Reference in New Issue
Block a user