fix(heartbeat): keep requests-in-flight retries from drifting schedule (#39182, thanks @MumuTW)

Co-authored-by: MumuTW <clothl47364@gmail.com>
This commit is contained in:
Peter Steinberger
2026-03-07 22:10:51 +00:00
parent 42bf4998d3
commit 733f7af92b
3 changed files with 49 additions and 4 deletions

View File

@@ -286,6 +286,7 @@ Docs: https://docs.openclaw.ai
- Discord/plugin native command aliases: let plugins declare provider-specific slash names so native Discord registration can avoid built-in command collisions; the bundled Talk voice plugin now uses `/talkvoice` natively on Discord while keeping text `/voice`.
- Daemon/Windows schtasks status normalization: derive runtime state from locale-neutral numeric `Last Run Result` codes only (without language string matching) and surface unknown when numeric result data is unavailable, preventing locale-specific misclassification drift. (#39153) Thanks @scoootscooob.
- Telegram/polling conflict recovery: reset the polling `webhookCleared` latch on `getUpdates` 409 conflicts so webhook cleanup re-runs on restart cycles and polling avoids infinite conflict loops. (#39205) Thanks @amittell.
- Heartbeat/requests-in-flight scheduling: stop advancing `nextDueMs` and avoid immediate `scheduleNext()` timer overrides on requests-in-flight skips, so wake-layer retry cooldowns are honored and heartbeat cadence no longer drifts under sustained contention. (#39182) Thanks @MumuTW.
## 2026.3.2

View File

@@ -158,13 +158,55 @@ describe("startHeartbeatRunner", () => {
await vi.advanceTimersByTimeAsync(30 * 60_000 + 1_000);
expect(runSpy).toHaveBeenCalledTimes(1);
// Timer should be rescheduled; next heartbeat should still fire
await vi.advanceTimersByTimeAsync(30 * 60_000 + 1_000);
// The wake layer retries after DEFAULT_RETRY_MS (1 s). No scheduleNext()
// is called inside runOnce, so we must wait for the full cooldown.
await vi.advanceTimersByTimeAsync(1_000);
expect(runSpy).toHaveBeenCalledTimes(2);
runner.stop();
});
it("does not push nextDueMs forward on repeated requests-in-flight skips", async () => {
vi.useFakeTimers();
vi.setSystemTime(new Date(0));
// Simulate a long-running heartbeat: the first 5 calls return
// requests-in-flight (retries from the wake layer), then the 6th succeeds.
let callCount = 0;
const runSpy = vi.fn().mockImplementation(async () => {
callCount++;
if (callCount <= 5) {
return { status: "skipped", reason: "requests-in-flight" };
}
return { status: "ran", durationMs: 1 };
});
const runner = startHeartbeatRunner({
cfg: {
agents: { defaults: { heartbeat: { every: "30m" } } },
} as OpenClawConfig,
runOnce: runSpy,
});
// Trigger the first heartbeat at t=30m — returns requests-in-flight.
await vi.advanceTimersByTimeAsync(30 * 60_000 + 1_000);
expect(runSpy).toHaveBeenCalledTimes(1);
// Simulate 4 more retries at short intervals (wake layer retries).
for (let i = 0; i < 4; i++) {
requestHeartbeatNow({ reason: "retry", coalesceMs: 0 });
await vi.advanceTimersByTimeAsync(1_000);
}
expect(runSpy).toHaveBeenCalledTimes(5);
// The next interval tick at ~t=60m should still fire — the schedule
// must not have been pushed to t=30m * 6 = 180m by the 5 retries.
await vi.advanceTimersByTimeAsync(30 * 60_000);
expect(runSpy).toHaveBeenCalledTimes(6);
runner.stop();
});
it("routes targeted wake requests to the requested agent/session", async () => {
vi.useFakeTimers();
vi.setSystemTime(new Date(0));

View File

@@ -1190,8 +1190,10 @@ export function startHeartbeatRunner(opts: {
continue;
}
if (res.status === "skipped" && res.reason === "requests-in-flight") {
advanceAgentSchedule(agent, now);
scheduleNext();
// Do not advance the schedule — the main lane is busy and the wake
// layer will retry shortly (DEFAULT_RETRY_MS = 1 s). Calling
// scheduleNext() here would register a 0 ms timer that races with
// the wake layer's 1 s retry and wins, bypassing the cooldown.
return res;
}
if (res.status !== "skipped" || res.reason !== "disabled") {