mirror of
https://github.com/openclaw/openclaw.git
synced 2026-03-12 07:20:45 +00:00
fix(heartbeat): keep requests-in-flight retries from drifting schedule (#39182, thanks @MumuTW)
Co-authored-by: MumuTW <clothl47364@gmail.com>
This commit is contained in:
@@ -286,6 +286,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Discord/plugin native command aliases: let plugins declare provider-specific slash names so native Discord registration can avoid built-in command collisions; the bundled Talk voice plugin now uses `/talkvoice` natively on Discord while keeping text `/voice`.
|
||||
- Daemon/Windows schtasks status normalization: derive runtime state from locale-neutral numeric `Last Run Result` codes only (without language string matching) and surface unknown when numeric result data is unavailable, preventing locale-specific misclassification drift. (#39153) Thanks @scoootscooob.
|
||||
- Telegram/polling conflict recovery: reset the polling `webhookCleared` latch on `getUpdates` 409 conflicts so webhook cleanup re-runs on restart cycles and polling avoids infinite conflict loops. (#39205) Thanks @amittell.
|
||||
- Heartbeat/requests-in-flight scheduling: stop advancing `nextDueMs` and avoid immediate `scheduleNext()` timer overrides on requests-in-flight skips, so wake-layer retry cooldowns are honored and heartbeat cadence no longer drifts under sustained contention. (#39182) Thanks @MumuTW.
|
||||
|
||||
## 2026.3.2
|
||||
|
||||
|
||||
@@ -158,13 +158,55 @@ describe("startHeartbeatRunner", () => {
|
||||
await vi.advanceTimersByTimeAsync(30 * 60_000 + 1_000);
|
||||
expect(runSpy).toHaveBeenCalledTimes(1);
|
||||
|
||||
// Timer should be rescheduled; next heartbeat should still fire
|
||||
await vi.advanceTimersByTimeAsync(30 * 60_000 + 1_000);
|
||||
// The wake layer retries after DEFAULT_RETRY_MS (1 s). No scheduleNext()
|
||||
// is called inside runOnce, so we must wait for the full cooldown.
|
||||
await vi.advanceTimersByTimeAsync(1_000);
|
||||
expect(runSpy).toHaveBeenCalledTimes(2);
|
||||
|
||||
runner.stop();
|
||||
});
|
||||
|
||||
it("does not push nextDueMs forward on repeated requests-in-flight skips", async () => {
|
||||
vi.useFakeTimers();
|
||||
vi.setSystemTime(new Date(0));
|
||||
|
||||
// Simulate a long-running heartbeat: the first 5 calls return
|
||||
// requests-in-flight (retries from the wake layer), then the 6th succeeds.
|
||||
let callCount = 0;
|
||||
const runSpy = vi.fn().mockImplementation(async () => {
|
||||
callCount++;
|
||||
if (callCount <= 5) {
|
||||
return { status: "skipped", reason: "requests-in-flight" };
|
||||
}
|
||||
return { status: "ran", durationMs: 1 };
|
||||
});
|
||||
|
||||
const runner = startHeartbeatRunner({
|
||||
cfg: {
|
||||
agents: { defaults: { heartbeat: { every: "30m" } } },
|
||||
} as OpenClawConfig,
|
||||
runOnce: runSpy,
|
||||
});
|
||||
|
||||
// Trigger the first heartbeat at t=30m — returns requests-in-flight.
|
||||
await vi.advanceTimersByTimeAsync(30 * 60_000 + 1_000);
|
||||
expect(runSpy).toHaveBeenCalledTimes(1);
|
||||
|
||||
// Simulate 4 more retries at short intervals (wake layer retries).
|
||||
for (let i = 0; i < 4; i++) {
|
||||
requestHeartbeatNow({ reason: "retry", coalesceMs: 0 });
|
||||
await vi.advanceTimersByTimeAsync(1_000);
|
||||
}
|
||||
expect(runSpy).toHaveBeenCalledTimes(5);
|
||||
|
||||
// The next interval tick at ~t=60m should still fire — the schedule
|
||||
// must not have been pushed to t=30m * 6 = 180m by the 5 retries.
|
||||
await vi.advanceTimersByTimeAsync(30 * 60_000);
|
||||
expect(runSpy).toHaveBeenCalledTimes(6);
|
||||
|
||||
runner.stop();
|
||||
});
|
||||
|
||||
it("routes targeted wake requests to the requested agent/session", async () => {
|
||||
vi.useFakeTimers();
|
||||
vi.setSystemTime(new Date(0));
|
||||
|
||||
@@ -1190,8 +1190,10 @@ export function startHeartbeatRunner(opts: {
|
||||
continue;
|
||||
}
|
||||
if (res.status === "skipped" && res.reason === "requests-in-flight") {
|
||||
advanceAgentSchedule(agent, now);
|
||||
scheduleNext();
|
||||
// Do not advance the schedule — the main lane is busy and the wake
|
||||
// layer will retry shortly (DEFAULT_RETRY_MS = 1 s). Calling
|
||||
// scheduleNext() here would register a 0 ms timer that races with
|
||||
// the wake layer's 1 s retry and wins, bypassing the cooldown.
|
||||
return res;
|
||||
}
|
||||
if (res.status !== "skipped" || res.reason !== "disabled") {
|
||||
|
||||
Reference in New Issue
Block a user