fix(cron): retry busy recurring wake-now jobs

This commit is contained in:
Rui Xu
2026-05-02 21:24:58 +08:00
committed by Ayaan Zaidi
parent 91055b9a37
commit 8db83f241e
4 changed files with 22 additions and 23 deletions

View File

@@ -33,6 +33,7 @@ Docs: https://docs.openclaw.ai
### Fixes
- Cron: retry recurring wake-now main-session jobs through temporary heartbeat busy skips before recording success, so queued cron events no longer appear as ok ghost runs while the main lane is still busy. Fixes #75964.
- Control UI: contain the standalone iOS PWA viewport with safe-area-aware document locking, so Add-to-Home-Screen launches cannot scroll past the device bounds. Refs #76072. Thanks @kvncrw.
- Agents/restart recovery: match cleaned transcript locks by exact transcript lock paths plus the canonical session fallback, so interrupted main sessions using topic-suffixed transcripts resume after gateway restart. Refs #76052. Thanks @anyech.
- Agents/runtime: cache the stable system-prompt prefix and reuse prompt-report tool schema stats during dispatch prep, reducing repeated CPU work before streaming starts. Fixes #75999; supersedes #76061. Thanks @zackchiutw and @STLI69.

View File

@@ -47,9 +47,10 @@ describe("cron main job passes heartbeat target=last", () => {
}
async function runSingleTick(cron: CronService) {
await cron.start();
const startPromise = cron.start();
await vi.advanceTimersByTimeAsync(2_000);
await vi.advanceTimersByTimeAsync(1_000);
await startPromise;
cron.stop();
}
@@ -102,7 +103,7 @@ describe("cron main job passes heartbeat target=last", () => {
const runHeartbeatOnce = vi.fn<RunHeartbeatOnce>(async () => ({
status: "skipped" as const,
reason: "requests-in-flight",
reason: "cron-in-progress",
}));
const { cron, requestHeartbeatNow } = createCronWithSpies({

View File

@@ -11,7 +11,7 @@ import {
setupCronRegressionFixtures,
writeCronJobs,
} from "../../../test/helpers/cron/service-regression-fixtures.js";
import type { HeartbeatRunResult } from "../../infra/heartbeat-wake.js";
import { HEARTBEAT_SKIP_LANES_BUSY, type HeartbeatRunResult } from "../../infra/heartbeat-wake.js";
import * as schedule from "../schedule.js";
import type { CronAgentExecutionStarted, CronJob } from "../types.js";
import { computeJobNextRunAtMs } from "./jobs.js";
@@ -827,18 +827,16 @@ describe("cron service timer regressions", () => {
expect(requestHeartbeatNow).not.toHaveBeenCalled();
});
it("finishes recurring wake-now main jobs quickly when the main lane is busy (#58833)", async () => {
it("retries recurring wake-now main jobs until temporary lane pressure clears (#75964)", async () => {
let now = 0;
const nowMs = () => {
now += 10;
return now;
};
const runHeartbeatOnce = vi.fn(
async (): Promise<HeartbeatRunResult> => ({
status: "skipped",
reason: "requests-in-flight",
}),
);
const runHeartbeatOnce = vi
.fn<() => Promise<HeartbeatRunResult>>()
.mockResolvedValueOnce({ status: "skipped", reason: HEARTBEAT_SKIP_LANES_BUSY })
.mockResolvedValueOnce({ status: "ran", durationMs: 12 });
const enqueueSystemEvent = vi.fn();
const requestHeartbeatNow = vi.fn();
const job: CronJob = {
@@ -862,20 +860,19 @@ describe("cron service timer regressions", () => {
requestHeartbeatNow,
runHeartbeatOnce,
wakeNowHeartbeatBusyMaxWaitMs: 120_000,
wakeNowHeartbeatBusyRetryDelayMs: 250,
wakeNowHeartbeatBusyRetryDelayMs: 1,
runIsolatedAgentJob: createDefaultIsolatedRunner(),
});
state.store = { version: 1, jobs: [job] };
await executeJob(state, job, nowMs(), { forced: false });
const runPromise = executeJob(state, job, nowMs(), { forced: false });
await vi.advanceTimersByTimeAsync(1);
await runPromise;
expect(enqueueSystemEvent).toHaveBeenCalledTimes(1);
expect(runHeartbeatOnce).toHaveBeenCalledTimes(1);
expect(requestHeartbeatNow).toHaveBeenCalledWith(
expect.objectContaining({ reason: "cron:busy-recurring-main" }),
);
expect(runHeartbeatOnce).toHaveBeenCalledTimes(2);
expect(requestHeartbeatNow).not.toHaveBeenCalled();
expect(job.state.lastStatus).toBe("ok");
expect(job.state.lastDurationMs).toBeLessThan(100);
expect(job.state.runningAtMs).toBeUndefined();
});

View File

@@ -1374,7 +1374,6 @@ async function executeMainSessionCronJob(
});
if (job.wakeMode === "now" && state.deps.runHeartbeatOnce) {
const reason = `cron:${job.id}`;
const isRecurringJob = job.schedule.kind !== "at";
const maxWaitMs = state.deps.wakeNowHeartbeatBusyMaxWaitMs ?? 2 * 60_000;
const retryDelayMs = state.deps.wakeNowHeartbeatBusyRetryDelayMs ?? 250;
const waitStartedAt = state.deps.nowMs();
@@ -1396,11 +1395,12 @@ async function executeMainSessionCronJob(
) {
break;
}
if (isRecurringJob || heartbeatResult.reason === HEARTBEAT_SKIP_CRON_IN_PROGRESS) {
// Recurring main-session cron jobs should not hold the cron lane open
// while runtime lanes are busy. A cron-in-progress skip is caused by
// this job's own active marker, so direct wake-now cannot succeed until
// the cron job returns and clears it (#50773).
if (heartbeatResult.reason === HEARTBEAT_SKIP_CRON_IN_PROGRESS) {
// A cron-in-progress skip is caused by this job's own active marker, so
// direct wake-now cannot succeed until the cron job returns and clears
// it (#50773). Other retryable busy reasons can clear while this job is
// still active, so let the bounded retry loop observe a real heartbeat
// run before recording recurring jobs as successful (#75964).
state.deps.requestHeartbeatNow({
reason,
agentId: job.agentId,