mirror of
https://github.com/openclaw/openclaw.git
synced 2026-04-25 08:02:04 +00:00
* fix: prune stale session entries, cap entry count, and rotate sessions.json
The sessions.json file grows unbounded over time. Every heartbeat tick (default: 30m)
triggers multiple full rewrites, and session keys from groups, threads, and DMs
accumulate indefinitely with large embedded objects (skillsSnapshot,
systemPromptReport). At >50MB the synchronous JSON parse blocks the event loop,
causing Telegram webhook timeouts and effectively taking the bot down.
Three mitigations, all running inside saveSessionStoreUnlocked() on every write:
1. Prune stale entries: remove entries with updatedAt older than 30 days
(configurable via session.maintenance.pruneDays in openclaw.json)
2. Cap entry count: keep only the 500 most recently updated entries
(configurable via session.maintenance.maxEntries). Entries without updatedAt
are evicted first.
3. File rotation: if the existing sessions.json exceeds 10MB before a write,
rename it to sessions.json.bak.{timestamp} and keep only the 3 most recent
backups (configurable via session.maintenance.rotateBytes).
All three thresholds are configurable under session.maintenance in openclaw.json
with Zod validation. No env vars.
Existing tests updated to use Date.now() instead of epoch-relative timestamps
(1, 2, 3) that would be incorrectly pruned as stale.
27 new tests covering pruning, capping, rotation, and integration scenarios.
* feat: auto-prune expired cron run sessions (#12289)
Add TTL-based reaper for isolated cron run sessions that accumulate
indefinitely in sessions.json.
New config option:
cron.sessionRetention: string | false (default: '24h')
The reaper runs piggy-backed on the cron timer tick, self-throttled
to sweep at most every 5 minutes. It removes session entries matching
the pattern cron:<jobId>:run:<uuid> whose updatedAt + retention < now.
Design follows the Kubernetes ttlSecondsAfterFinished pattern:
- Sessions are persisted normally (observability/debugging)
- A periodic reaper prunes expired entries
- Configurable retention with sensible default
- Set to false to disable pruning entirely
Files changed:
- src/config/types.cron.ts: Add sessionRetention to CronConfig
- src/config/zod-schema.ts: Add Zod validation for sessionRetention
- src/cron/session-reaper.ts: New reaper module (sweepCronRunSessions)
- src/cron/session-reaper.test.ts: 12 tests covering all paths
- src/cron/service/state.ts: Add cronConfig/sessionStorePath to deps
- src/cron/service/timer.ts: Wire reaper into onTimer tick
- src/gateway/server-cron.ts: Pass config and session store path to deps
Closes #12289
* fix: sweep cron session stores per agent
* docs: add changelog for session maintenance (#13083) (thanks @skyfallsin, @Glucksberg)
* fix: add warn-only session maintenance mode
* fix: warn-only maintenance defaults to active session
* fix: deliver maintenance warnings to active session
* docs: add session maintenance examples
* fix: accept duration and size maintenance thresholds
* refactor: share cron run session key check
* fix: format issues and replace defaultRuntime.warn with console.warn
---------
Co-authored-by: Pradeep Elankumaran <pradeepe@gmail.com>
Co-authored-by: Glucksberg <markuscontasul@gmail.com>
Co-authored-by: max <40643627+quotentiroler@users.noreply.github.com>
Co-authored-by: quotentiroler <max.nussbaumer@maxhealth.tech>
116 lines
3.5 KiB
TypeScript
116 lines
3.5 KiB
TypeScript
/**
|
|
* Cron session reaper — prunes completed isolated cron run sessions
|
|
* from the session store after a configurable retention period.
|
|
*
|
|
* Pattern: sessions keyed as `...:cron:<jobId>:run:<uuid>` are ephemeral
|
|
* run records. The base session (`...:cron:<jobId>`) is kept as-is.
|
|
*/
|
|
|
|
import type { CronConfig } from "../config/types.cron.js";
|
|
import type { Logger } from "./service/state.js";
|
|
import { parseDurationMs } from "../cli/parse-duration.js";
|
|
import { updateSessionStore } from "../config/sessions.js";
|
|
import { isCronRunSessionKey } from "../sessions/session-key-utils.js";
|
|
|
|
const DEFAULT_RETENTION_MS = 24 * 3_600_000; // 24 hours
|
|
|
|
/** Minimum interval between reaper sweeps (avoid running every timer tick). */
|
|
const MIN_SWEEP_INTERVAL_MS = 5 * 60_000; // 5 minutes
|
|
|
|
const lastSweepAtMsByStore = new Map<string, number>();
|
|
|
|
export function resolveRetentionMs(cronConfig?: CronConfig): number | null {
|
|
if (cronConfig?.sessionRetention === false) {
|
|
return null; // pruning disabled
|
|
}
|
|
const raw = cronConfig?.sessionRetention;
|
|
if (typeof raw === "string" && raw.trim()) {
|
|
try {
|
|
return parseDurationMs(raw.trim(), { defaultUnit: "h" });
|
|
} catch {
|
|
return DEFAULT_RETENTION_MS;
|
|
}
|
|
}
|
|
return DEFAULT_RETENTION_MS;
|
|
}
|
|
|
|
export type ReaperResult = {
|
|
swept: boolean;
|
|
pruned: number;
|
|
};
|
|
|
|
/**
|
|
* Sweep the session store and prune expired cron run sessions.
|
|
* Designed to be called from the cron timer tick — self-throttles via
|
|
* MIN_SWEEP_INTERVAL_MS to avoid excessive I/O.
|
|
*
|
|
* Lock ordering: this function acquires the session-store file lock via
|
|
* `updateSessionStore`. It must be called OUTSIDE of the cron service's
|
|
* own `locked()` section to avoid lock-order inversions. The cron timer
|
|
* calls this after all `locked()` sections have been released.
|
|
*/
|
|
export async function sweepCronRunSessions(params: {
|
|
cronConfig?: CronConfig;
|
|
/** Resolved path to sessions.json — required. */
|
|
sessionStorePath: string;
|
|
nowMs?: number;
|
|
log: Logger;
|
|
/** Override for testing — skips the min-interval throttle. */
|
|
force?: boolean;
|
|
}): Promise<ReaperResult> {
|
|
const now = params.nowMs ?? Date.now();
|
|
const storePath = params.sessionStorePath;
|
|
const lastSweepAtMs = lastSweepAtMsByStore.get(storePath) ?? 0;
|
|
|
|
// Throttle: don't sweep more often than every 5 minutes.
|
|
if (!params.force && now - lastSweepAtMs < MIN_SWEEP_INTERVAL_MS) {
|
|
return { swept: false, pruned: 0 };
|
|
}
|
|
|
|
const retentionMs = resolveRetentionMs(params.cronConfig);
|
|
if (retentionMs === null) {
|
|
lastSweepAtMsByStore.set(storePath, now);
|
|
return { swept: false, pruned: 0 };
|
|
}
|
|
|
|
let pruned = 0;
|
|
try {
|
|
await updateSessionStore(storePath, (store) => {
|
|
const cutoff = now - retentionMs;
|
|
for (const key of Object.keys(store)) {
|
|
if (!isCronRunSessionKey(key)) {
|
|
continue;
|
|
}
|
|
const entry = store[key];
|
|
if (!entry) {
|
|
continue;
|
|
}
|
|
const updatedAt = entry.updatedAt ?? 0;
|
|
if (updatedAt < cutoff) {
|
|
delete store[key];
|
|
pruned++;
|
|
}
|
|
}
|
|
});
|
|
} catch (err) {
|
|
params.log.warn({ err: String(err) }, "cron-reaper: failed to sweep session store");
|
|
return { swept: false, pruned: 0 };
|
|
}
|
|
|
|
lastSweepAtMsByStore.set(storePath, now);
|
|
|
|
if (pruned > 0) {
|
|
params.log.info(
|
|
{ pruned, retentionMs },
|
|
`cron-reaper: pruned ${pruned} expired cron run session(s)`,
|
|
);
|
|
}
|
|
|
|
return { swept: true, pruned };
|
|
}
|
|
|
|
/** Reset the throttle timer (for tests). */
|
|
export function resetReaperThrottle(): void {
|
|
lastSweepAtMsByStore.clear();
|
|
}
|