Files
openclaw/src/cron/session-reaper.test.ts
Gustavo Madeira Santana e19a23520c fix: unify session maintenance and cron run pruning (#13083)
* fix: prune stale session entries, cap entry count, and rotate sessions.json

The sessions.json file grows unbounded over time. Every heartbeat tick (default: 30m)
triggers multiple full rewrites, and session keys from groups, threads, and DMs
accumulate indefinitely with large embedded objects (skillsSnapshot,
systemPromptReport). At >50MB the synchronous JSON parse blocks the event loop,
causing Telegram webhook timeouts and effectively taking the bot down.

Three mitigations, all running inside saveSessionStoreUnlocked() on every write:

1. Prune stale entries: remove entries with updatedAt older than 30 days
   (configurable via session.maintenance.pruneDays in openclaw.json)

2. Cap entry count: keep only the 500 most recently updated entries
   (configurable via session.maintenance.maxEntries). Entries without updatedAt
   are evicted first.

3. File rotation: if the existing sessions.json exceeds 10MB before a write,
   rename it to sessions.json.bak.{timestamp} and keep only the 3 most recent
   backups (configurable via session.maintenance.rotateBytes).

All three thresholds are configurable under session.maintenance in openclaw.json
with Zod validation. No env vars.

Existing tests updated to use Date.now() instead of epoch-relative timestamps
(1, 2, 3) that would be incorrectly pruned as stale.

27 new tests covering pruning, capping, rotation, and integration scenarios.

* feat: auto-prune expired cron run sessions (#12289)

Add TTL-based reaper for isolated cron run sessions that accumulate
indefinitely in sessions.json.

New config option:
  cron.sessionRetention: string | false  (default: '24h')

The reaper runs piggy-backed on the cron timer tick, self-throttled
to sweep at most every 5 minutes. It removes session entries matching
the pattern cron:<jobId>:run:<uuid> whose updatedAt + retention < now.

Design follows the Kubernetes ttlSecondsAfterFinished pattern:
- Sessions are persisted normally (observability/debugging)
- A periodic reaper prunes expired entries
- Configurable retention with sensible default
- Set to false to disable pruning entirely

Files changed:
- src/config/types.cron.ts: Add sessionRetention to CronConfig
- src/config/zod-schema.ts: Add Zod validation for sessionRetention
- src/cron/session-reaper.ts: New reaper module (sweepCronRunSessions)
- src/cron/session-reaper.test.ts: 12 tests covering all paths
- src/cron/service/state.ts: Add cronConfig/sessionStorePath to deps
- src/cron/service/timer.ts: Wire reaper into onTimer tick
- src/gateway/server-cron.ts: Pass config and session store path to deps

Closes #12289

* fix: sweep cron session stores per agent

* docs: add changelog for session maintenance (#13083) (thanks @skyfallsin, @Glucksberg)

* fix: add warn-only session maintenance mode

* fix: warn-only maintenance defaults to active session

* fix: deliver maintenance warnings to active session

* docs: add session maintenance examples

* fix: accept duration and size maintenance thresholds

* refactor: share cron run session key check

* fix: format issues and replace defaultRuntime.warn with console.warn

---------

Co-authored-by: Pradeep Elankumaran <pradeepe@gmail.com>
Co-authored-by: Glucksberg <markuscontasul@gmail.com>
Co-authored-by: max <40643627+quotentiroler@users.noreply.github.com>
Co-authored-by: quotentiroler <max.nussbaumer@maxhealth.tech>
2026-02-09 20:42:35 -08:00

204 lines
6.0 KiB
TypeScript

import fs from "node:fs";
import os from "node:os";
import path from "node:path";
import { describe, it, expect, beforeEach } from "vitest";
import type { Logger } from "./service/state.js";
import { isCronRunSessionKey } from "../sessions/session-key-utils.js";
import { sweepCronRunSessions, resolveRetentionMs, resetReaperThrottle } from "./session-reaper.js";
function createTestLogger(): Logger {
return {
debug: () => {},
info: () => {},
warn: () => {},
error: () => {},
};
}
describe("resolveRetentionMs", () => {
it("returns 24h default when no config", () => {
expect(resolveRetentionMs()).toBe(24 * 3_600_000);
});
it("returns 24h default when config is empty", () => {
expect(resolveRetentionMs({})).toBe(24 * 3_600_000);
});
it("parses duration string", () => {
expect(resolveRetentionMs({ sessionRetention: "1h" })).toBe(3_600_000);
expect(resolveRetentionMs({ sessionRetention: "7d" })).toBe(7 * 86_400_000);
expect(resolveRetentionMs({ sessionRetention: "30m" })).toBe(30 * 60_000);
});
it("returns null when disabled", () => {
expect(resolveRetentionMs({ sessionRetention: false })).toBeNull();
});
it("falls back to default on invalid string", () => {
expect(resolveRetentionMs({ sessionRetention: "abc" })).toBe(24 * 3_600_000);
});
});
describe("isCronRunSessionKey", () => {
it("matches cron run session keys", () => {
expect(isCronRunSessionKey("agent:main:cron:abc-123:run:def-456")).toBe(true);
expect(isCronRunSessionKey("agent:debugger:cron:249ecf82:run:1102aabb")).toBe(true);
});
it("does not match base cron session keys", () => {
expect(isCronRunSessionKey("agent:main:cron:abc-123")).toBe(false);
});
it("does not match regular session keys", () => {
expect(isCronRunSessionKey("agent:main:telegram:dm:123")).toBe(false);
});
it("does not match non-canonical cron-like keys", () => {
expect(isCronRunSessionKey("agent:main:slack:cron:job:run:uuid")).toBe(false);
expect(isCronRunSessionKey("cron:job:run:uuid")).toBe(false);
});
});
describe("sweepCronRunSessions", () => {
let tmpDir: string;
let storePath: string;
const log = createTestLogger();
beforeEach(async () => {
resetReaperThrottle();
tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "cron-reaper-"));
storePath = path.join(tmpDir, "sessions.json");
});
it("prunes expired cron run sessions", async () => {
const now = Date.now();
const store: Record<string, { sessionId: string; updatedAt: number }> = {
"agent:main:cron:job1": {
sessionId: "base-session",
updatedAt: now,
},
"agent:main:cron:job1:run:old-run": {
sessionId: "old-run",
updatedAt: now - 25 * 3_600_000, // 25h ago — expired
},
"agent:main:cron:job1:run:recent-run": {
sessionId: "recent-run",
updatedAt: now - 1 * 3_600_000, // 1h ago — not expired
},
"agent:main:telegram:dm:123": {
sessionId: "regular-session",
updatedAt: now - 100 * 3_600_000, // old but not a cron run
},
};
fs.writeFileSync(storePath, JSON.stringify(store));
const result = await sweepCronRunSessions({
sessionStorePath: storePath,
nowMs: now,
log,
force: true,
});
expect(result.swept).toBe(true);
expect(result.pruned).toBe(1);
const updated = JSON.parse(fs.readFileSync(storePath, "utf-8"));
expect(updated["agent:main:cron:job1"]).toBeDefined();
expect(updated["agent:main:cron:job1:run:old-run"]).toBeUndefined();
expect(updated["agent:main:cron:job1:run:recent-run"]).toBeDefined();
expect(updated["agent:main:telegram:dm:123"]).toBeDefined();
});
it("respects custom retention", async () => {
const now = Date.now();
const store: Record<string, { sessionId: string; updatedAt: number }> = {
"agent:main:cron:job1:run:run1": {
sessionId: "run1",
updatedAt: now - 2 * 3_600_000, // 2h ago
},
};
fs.writeFileSync(storePath, JSON.stringify(store));
const result = await sweepCronRunSessions({
cronConfig: { sessionRetention: "1h" },
sessionStorePath: storePath,
nowMs: now,
log,
force: true,
});
expect(result.pruned).toBe(1);
});
it("does nothing when pruning is disabled", async () => {
const now = Date.now();
const store: Record<string, { sessionId: string; updatedAt: number }> = {
"agent:main:cron:job1:run:run1": {
sessionId: "run1",
updatedAt: now - 100 * 3_600_000,
},
};
fs.writeFileSync(storePath, JSON.stringify(store));
const result = await sweepCronRunSessions({
cronConfig: { sessionRetention: false },
sessionStorePath: storePath,
nowMs: now,
log,
force: true,
});
expect(result.swept).toBe(false);
expect(result.pruned).toBe(0);
});
it("throttles sweeps without force", async () => {
const now = Date.now();
fs.writeFileSync(storePath, JSON.stringify({}));
// First sweep runs
const r1 = await sweepCronRunSessions({
sessionStorePath: storePath,
nowMs: now,
log,
});
expect(r1.swept).toBe(true);
// Second sweep (1 second later) is throttled
const r2 = await sweepCronRunSessions({
sessionStorePath: storePath,
nowMs: now + 1000,
log,
});
expect(r2.swept).toBe(false);
});
it("throttles per store path", async () => {
const now = Date.now();
const otherPath = path.join(tmpDir, "sessions-other.json");
fs.writeFileSync(storePath, JSON.stringify({}));
fs.writeFileSync(otherPath, JSON.stringify({}));
const r1 = await sweepCronRunSessions({
sessionStorePath: storePath,
nowMs: now,
log,
});
expect(r1.swept).toBe(true);
const r2 = await sweepCronRunSessions({
sessionStorePath: otherPath,
nowMs: now + 1000,
log,
});
expect(r2.swept).toBe(true);
const r3 = await sweepCronRunSessions({
sessionStorePath: storePath,
nowMs: now + 1000,
log,
});
expect(r3.swept).toBe(false);
});
});