From 2fe50f69db7c0df41d2182b8071fd8f84a403b16 Mon Sep 17 00:00:00 2001 From: wanglu241 Date: Wed, 24 Jun 2026 13:00:11 +0800 Subject: [PATCH] fix(sessions): align forced model-run prune with cap eviction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Forced maintenance (sessions cleanup / maintenanceOverride) caps immediately to maxEntries, but the unset model-run default was high-water gated. In the (maxEntries, high-water) window stale model-run probes survived while the forced cap evicted real sessions — the inverse of #88632. shouldRunModelRunPrune now takes a force flag: when the caller caps immediately, the unset default prunes once entryCount > maxEntries. Wire force at the two forced call sites (applyEnforcedMaintenance, previewStoreCleanup). Make the SDK runtime config field modelRunPruneAfterConfigured optional (additive). Add force-gate unit test + forced-apply regression test. --- src/config/sessions/cleanup-service.ts | 4 + src/config/sessions/runtime-types.ts | 2 +- .../sessions/store-maintenance-operations.ts | 1 + src/config/sessions/store-maintenance.ts | 12 ++ src/config/sessions/store.pruning.test.ts | 105 ++++++++++++++++++ 5 files changed, 123 insertions(+), 1 deletion(-) diff --git a/src/config/sessions/cleanup-service.ts b/src/config/sessions/cleanup-service.ts index 8821ea41a01..9bd5141a8d7 100644 --- a/src/config/sessions/cleanup-service.ts +++ b/src/config/sessions/cleanup-service.ts @@ -370,6 +370,10 @@ async function previewStoreCleanup(params: { const modelRunPruned = shouldRunModelRunPrune({ maintenance: params.maintenance, entryCount: Object.keys(previewStore).length, + // `sessions cleanup` applies the cap immediately (apply path forces maintenance and the + // preview caps unconditionally below), so mirror that here: prune stale probes before the + // forced cap can evict real sessions in their place. + force: true, }) ? pruneStaleModelRunEntries(previewStore, params.maintenance.modelRunPruneAfterMs, { log: false, diff --git a/src/config/sessions/runtime-types.ts b/src/config/sessions/runtime-types.ts index dbd543cf547..9248b5791bb 100644 --- a/src/config/sessions/runtime-types.ts +++ b/src/config/sessions/runtime-types.ts @@ -26,7 +26,7 @@ export type ResolvedSessionMaintenanceConfigRuntime = { pruneAfterMs: number; maxEntries: number; modelRunPruneAfterMs: number | null; - modelRunPruneAfterConfigured: boolean; + modelRunPruneAfterConfigured?: boolean; resetArchiveRetentionMs: number | null; maxDiskBytes: number | null; highWaterBytes: number | null; diff --git a/src/config/sessions/store-maintenance-operations.ts b/src/config/sessions/store-maintenance-operations.ts index 51c23ecd503..76eeed8f632 100644 --- a/src/config/sessions/store-maintenance-operations.ts +++ b/src/config/sessions/store-maintenance-operations.ts @@ -202,6 +202,7 @@ async function applyEnforcedMaintenance(params: { const modelRunPruned = shouldRunModelRunPrune({ maintenance: params.maintenance, entryCount: params.beforeCount, + force: params.forceMaintenance, }) ? pruneStaleModelRunEntries(params.operation.store, params.maintenance.modelRunPruneAfterMs, { onPruned: ({ entry }) => { diff --git a/src/config/sessions/store-maintenance.ts b/src/config/sessions/store-maintenance.ts index 683907cbfeb..18c6584c749 100644 --- a/src/config/sessions/store-maintenance.ts +++ b/src/config/sessions/store-maintenance.ts @@ -200,6 +200,11 @@ export function shouldRunModelRunPrune(params: { "maxEntries" | "modelRunPruneAfterConfigured" | "modelRunPruneAfterMs" >; entryCount: number; + /** + * True when the caller caps immediately to `maxEntries` in the same pass (forced + * maintenance / `sessions cleanup`) rather than using the batched high-water trigger. + */ + force?: boolean; }): boolean { if (params.maintenance.modelRunPruneAfterMs == null) { return false; @@ -207,6 +212,13 @@ export function shouldRunModelRunPrune(params: { if (params.maintenance.modelRunPruneAfterConfigured) { return true; } + // Unset default is pressure-gated, and must align with whichever cap step runs alongside it. + // Forced maintenance caps immediately down to `maxEntries`, so prune stale probes first whenever + // that cap would actually evict; otherwise stale probes would survive while real sessions get + // capped (the inverse of #88632). Batched runtime writes instead use the high-water trigger. + if (params.force) { + return params.entryCount > params.maintenance.maxEntries; + } return shouldRunSessionEntryMaintenance({ entryCount: params.entryCount, maxEntries: params.maintenance.maxEntries, diff --git a/src/config/sessions/store.pruning.test.ts b/src/config/sessions/store.pruning.test.ts index e2c246f0327..4d135f28cfe 100644 --- a/src/config/sessions/store.pruning.test.ts +++ b/src/config/sessions/store.pruning.test.ts @@ -222,6 +222,56 @@ describe("applyFileBackedSessionStoreMaintenance", () => { ]); expect(trajectoryCleanupReferencedIds).toEqual(new Set(["shared-session", "active-session"])); }); + + it("forced cleanup prunes stale model-run probes before the cap evicts real sessions", async () => { + const now = Date.now(); + const staleProbe = "agent:main:explicit:model-run-123e4567-e89b-12d3-a456-426614174099"; + const store: Record = { + [staleProbe]: makeEntry(now - 2 * DAY_MS), + }; + for (let i = 0; i < 50; i++) { + store[`agent:main:explicit:real-${i}`] = makeEntry(now - 3 * DAY_MS); + } + let report: { modelRunPruned: number; pruned: number; capped: number } | undefined; + + const result = await applyFileBackedSessionStoreMaintenance({ + storePath: "/tmp/openclaw-sessions/sessions.json", + store, + maintenanceConfig: { + mode: "enforce", + pruneAfterMs: 7 * DAY_MS, + maxEntries: 50, + modelRunPruneAfterMs: DAY_MS, + modelRunPruneAfterConfigured: false, + resetArchiveRetentionMs: null, + maxDiskBytes: null, + highWaterBytes: null, + }, + maintenanceOverride: { mode: "enforce" }, + onMaintenanceApplied: (applied) => { + report = { + modelRunPruned: applied.modelRunPruned, + pruned: applied.pruned, + capped: applied.capped, + }; + }, + log: { warn: () => {}, info: () => {} }, + artifacts: { + archiveRemovedSessionTranscripts: async () => new Set(), + removeRemovedSessionTrajectoryArtifacts: async () => {}, + cleanupArchivedSessionTranscripts: async () => {}, + }, + }); + + expect(result.changedStore).toBe(true); + expect(report?.modelRunPruned).toBe(1); + expect(report?.capped).toBe(0); + expect(store[staleProbe]).toBeUndefined(); + expect(Object.keys(store)).toHaveLength(50); + for (let i = 0; i < 50; i++) { + expect(store).toHaveProperty(`agent:main:explicit:real-${i}`); + } + }); }); describe("pruneStaleModelRunEntries", () => { @@ -520,6 +570,61 @@ describe("resolveMaintenanceConfigFromInput", () => { expect(maintenance.mode).toBe("enforce"); }); + it("defaults gateway model-run probes to 24h retention with override and disable support", () => { + expect(resolveMaintenanceConfigFromInput().modelRunPruneAfterMs).toBe(DAY_MS); + expect( + resolveMaintenanceConfigFromInput({ modelRunPruneAfter: "48h" }).modelRunPruneAfterMs, + ).toBe(2 * DAY_MS); + expect( + resolveMaintenanceConfigFromInput({ modelRunPruneAfter: false }).modelRunPruneAfterMs, + ).toBe(null); + expect(resolveMaintenanceConfigFromInput().modelRunPruneAfterConfigured).toBe(false); + expect( + resolveMaintenanceConfigFromInput({ modelRunPruneAfter: "48h" }).modelRunPruneAfterConfigured, + ).toBe(true); + expect( + resolveMaintenanceConfigFromInput({ modelRunPruneAfter: false }).modelRunPruneAfterConfigured, + ).toBe(true); + expect( + resolveMaintenanceConfigFromInput({ modelRunPruneAfter: "bad" }).modelRunPruneAfterMs, + ).toBe(null); + expect( + resolveMaintenanceConfigFromInput({ modelRunPruneAfter: "bad" }).modelRunPruneAfterConfigured, + ).toBe(true); + }); + + it("force-gates the unset model-run prune default to the cap-eviction threshold", () => { + const defaultMaintenance = resolveMaintenanceConfigFromInput({ maxEntries: 50 }); + expect(resolveSessionEntryMaintenanceHighWater(50)).toBe(75); + expect(shouldRunModelRunPrune({ maintenance: defaultMaintenance, entryCount: 60 })).toBe(false); + expect( + shouldRunModelRunPrune({ maintenance: defaultMaintenance, entryCount: 60, force: true }), + ).toBe(true); + expect( + shouldRunModelRunPrune({ maintenance: defaultMaintenance, entryCount: 50, force: true }), + ).toBe(false); + expect( + shouldRunModelRunPrune({ + maintenance: resolveMaintenanceConfigFromInput({ + maxEntries: 50, + modelRunPruneAfter: "24h", + }), + entryCount: 1, + force: true, + }), + ).toBe(true); + expect( + shouldRunModelRunPrune({ + maintenance: resolveMaintenanceConfigFromInput({ + maxEntries: 50, + modelRunPruneAfter: false, + }), + entryCount: 60, + force: true, + }), + ).toBe(false); + }); + it("batches normal entry-count maintenance for production-sized caps", () => { expect(resolveSessionEntryMaintenanceHighWater(2)).toBe(3); expect(resolveSessionEntryMaintenanceHighWater(50)).toBe(75);