diff --git a/CHANGELOG.md b/CHANGELOG.md index 83c6870d157..8fa41138696 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -33,6 +33,7 @@ Docs: https://docs.openclaw.ai ### Fixes +- Cron: retry recurring wake-now main-session jobs through temporary heartbeat busy skips before recording success, so queued cron events no longer appear as ok ghost runs while the main lane is still busy. Fixes #75964. - Control UI: contain the standalone iOS PWA viewport with safe-area-aware document locking, so Add-to-Home-Screen launches cannot scroll past the device bounds. Refs #76072. Thanks @kvncrw. - Agents/restart recovery: match cleaned transcript locks by exact transcript lock paths plus the canonical session fallback, so interrupted main sessions using topic-suffixed transcripts resume after gateway restart. Refs #76052. Thanks @anyech. - Agents/runtime: cache the stable system-prompt prefix and reuse prompt-report tool schema stats during dispatch prep, reducing repeated CPU work before streaming starts. Fixes #75999; supersedes #76061. Thanks @zackchiutw and @STLI69. diff --git a/src/cron/service.main-job-passes-heartbeat-target-last.test.ts b/src/cron/service.main-job-passes-heartbeat-target-last.test.ts index f4662883b01..460d44f3787 100644 --- a/src/cron/service.main-job-passes-heartbeat-target-last.test.ts +++ b/src/cron/service.main-job-passes-heartbeat-target-last.test.ts @@ -47,9 +47,10 @@ describe("cron main job passes heartbeat target=last", () => { } async function runSingleTick(cron: CronService) { - await cron.start(); + const startPromise = cron.start(); await vi.advanceTimersByTimeAsync(2_000); await vi.advanceTimersByTimeAsync(1_000); + await startPromise; cron.stop(); } @@ -102,7 +103,7 @@ describe("cron main job passes heartbeat target=last", () => { const runHeartbeatOnce = vi.fn(async () => ({ status: "skipped" as const, - reason: "requests-in-flight", + reason: "cron-in-progress", })); const { cron, requestHeartbeatNow } = createCronWithSpies({ diff --git a/src/cron/service/timer.regression.test.ts b/src/cron/service/timer.regression.test.ts index 4a7f8dfe0e1..9aeb0339f96 100644 --- a/src/cron/service/timer.regression.test.ts +++ b/src/cron/service/timer.regression.test.ts @@ -11,7 +11,7 @@ import { setupCronRegressionFixtures, writeCronJobs, } from "../../../test/helpers/cron/service-regression-fixtures.js"; -import type { HeartbeatRunResult } from "../../infra/heartbeat-wake.js"; +import { HEARTBEAT_SKIP_LANES_BUSY, type HeartbeatRunResult } from "../../infra/heartbeat-wake.js"; import * as schedule from "../schedule.js"; import type { CronAgentExecutionStarted, CronJob } from "../types.js"; import { computeJobNextRunAtMs } from "./jobs.js"; @@ -827,18 +827,16 @@ describe("cron service timer regressions", () => { expect(requestHeartbeatNow).not.toHaveBeenCalled(); }); - it("finishes recurring wake-now main jobs quickly when the main lane is busy (#58833)", async () => { + it("retries recurring wake-now main jobs until temporary lane pressure clears (#75964)", async () => { let now = 0; const nowMs = () => { now += 10; return now; }; - const runHeartbeatOnce = vi.fn( - async (): Promise => ({ - status: "skipped", - reason: "requests-in-flight", - }), - ); + const runHeartbeatOnce = vi + .fn<() => Promise>() + .mockResolvedValueOnce({ status: "skipped", reason: HEARTBEAT_SKIP_LANES_BUSY }) + .mockResolvedValueOnce({ status: "ran", durationMs: 12 }); const enqueueSystemEvent = vi.fn(); const requestHeartbeatNow = vi.fn(); const job: CronJob = { @@ -862,20 +860,19 @@ describe("cron service timer regressions", () => { requestHeartbeatNow, runHeartbeatOnce, wakeNowHeartbeatBusyMaxWaitMs: 120_000, - wakeNowHeartbeatBusyRetryDelayMs: 250, + wakeNowHeartbeatBusyRetryDelayMs: 1, runIsolatedAgentJob: createDefaultIsolatedRunner(), }); state.store = { version: 1, jobs: [job] }; - await executeJob(state, job, nowMs(), { forced: false }); + const runPromise = executeJob(state, job, nowMs(), { forced: false }); + await vi.advanceTimersByTimeAsync(1); + await runPromise; expect(enqueueSystemEvent).toHaveBeenCalledTimes(1); - expect(runHeartbeatOnce).toHaveBeenCalledTimes(1); - expect(requestHeartbeatNow).toHaveBeenCalledWith( - expect.objectContaining({ reason: "cron:busy-recurring-main" }), - ); + expect(runHeartbeatOnce).toHaveBeenCalledTimes(2); + expect(requestHeartbeatNow).not.toHaveBeenCalled(); expect(job.state.lastStatus).toBe("ok"); - expect(job.state.lastDurationMs).toBeLessThan(100); expect(job.state.runningAtMs).toBeUndefined(); }); diff --git a/src/cron/service/timer.ts b/src/cron/service/timer.ts index d0909d4a984..5476f05e41d 100644 --- a/src/cron/service/timer.ts +++ b/src/cron/service/timer.ts @@ -1374,7 +1374,6 @@ async function executeMainSessionCronJob( }); if (job.wakeMode === "now" && state.deps.runHeartbeatOnce) { const reason = `cron:${job.id}`; - const isRecurringJob = job.schedule.kind !== "at"; const maxWaitMs = state.deps.wakeNowHeartbeatBusyMaxWaitMs ?? 2 * 60_000; const retryDelayMs = state.deps.wakeNowHeartbeatBusyRetryDelayMs ?? 250; const waitStartedAt = state.deps.nowMs(); @@ -1396,11 +1395,12 @@ async function executeMainSessionCronJob( ) { break; } - if (isRecurringJob || heartbeatResult.reason === HEARTBEAT_SKIP_CRON_IN_PROGRESS) { - // Recurring main-session cron jobs should not hold the cron lane open - // while runtime lanes are busy. A cron-in-progress skip is caused by - // this job's own active marker, so direct wake-now cannot succeed until - // the cron job returns and clears it (#50773). + if (heartbeatResult.reason === HEARTBEAT_SKIP_CRON_IN_PROGRESS) { + // A cron-in-progress skip is caused by this job's own active marker, so + // direct wake-now cannot succeed until the cron job returns and clears + // it (#50773). Other retryable busy reasons can clear while this job is + // still active, so let the bounded retry loop observe a real heartbeat + // run before recording recurring jobs as successful (#75964). state.deps.requestHeartbeatNow({ reason, agentId: job.agentId,