fix(sessions): align forced model-run prune with cap eviction

Forced maintenance (sessions cleanup / maintenanceOverride) caps immediately to
maxEntries, but the unset model-run default was high-water gated. In the
(maxEntries, high-water) window stale model-run probes survived while the forced
cap evicted real sessions — the inverse of #88632. shouldRunModelRunPrune now
takes a force flag: when the caller caps immediately, the unset default prunes
once entryCount > maxEntries. Wire force at the two forced call sites
(applyEnforcedMaintenance, previewStoreCleanup). Make the SDK runtime config
field modelRunPruneAfterConfigured optional (additive). Add force-gate unit
test + forced-apply regression test.
This commit is contained in:
wanglu241
2026-06-24 13:00:11 +08:00
committed by Josh Lehman
parent fc198d862a
commit 2fe50f69db
5 changed files with 123 additions and 1 deletions

View File

@@ -370,6 +370,10 @@ async function previewStoreCleanup(params: {
const modelRunPruned = shouldRunModelRunPrune({
maintenance: params.maintenance,
entryCount: Object.keys(previewStore).length,
// `sessions cleanup` applies the cap immediately (apply path forces maintenance and the
// preview caps unconditionally below), so mirror that here: prune stale probes before the
// forced cap can evict real sessions in their place.
force: true,
})
? pruneStaleModelRunEntries(previewStore, params.maintenance.modelRunPruneAfterMs, {
log: false,

View File

@@ -26,7 +26,7 @@ export type ResolvedSessionMaintenanceConfigRuntime = {
pruneAfterMs: number;
maxEntries: number;
modelRunPruneAfterMs: number | null;
modelRunPruneAfterConfigured: boolean;
modelRunPruneAfterConfigured?: boolean;
resetArchiveRetentionMs: number | null;
maxDiskBytes: number | null;
highWaterBytes: number | null;

View File

@@ -202,6 +202,7 @@ async function applyEnforcedMaintenance(params: {
const modelRunPruned = shouldRunModelRunPrune({
maintenance: params.maintenance,
entryCount: params.beforeCount,
force: params.forceMaintenance,
})
? pruneStaleModelRunEntries(params.operation.store, params.maintenance.modelRunPruneAfterMs, {
onPruned: ({ entry }) => {

View File

@@ -200,6 +200,11 @@ export function shouldRunModelRunPrune(params: {
"maxEntries" | "modelRunPruneAfterConfigured" | "modelRunPruneAfterMs"
>;
entryCount: number;
/**
* True when the caller caps immediately to `maxEntries` in the same pass (forced
* maintenance / `sessions cleanup`) rather than using the batched high-water trigger.
*/
force?: boolean;
}): boolean {
if (params.maintenance.modelRunPruneAfterMs == null) {
return false;
@@ -207,6 +212,13 @@ export function shouldRunModelRunPrune(params: {
if (params.maintenance.modelRunPruneAfterConfigured) {
return true;
}
// Unset default is pressure-gated, and must align with whichever cap step runs alongside it.
// Forced maintenance caps immediately down to `maxEntries`, so prune stale probes first whenever
// that cap would actually evict; otherwise stale probes would survive while real sessions get
// capped (the inverse of #88632). Batched runtime writes instead use the high-water trigger.
if (params.force) {
return params.entryCount > params.maintenance.maxEntries;
}
return shouldRunSessionEntryMaintenance({
entryCount: params.entryCount,
maxEntries: params.maintenance.maxEntries,

View File

@@ -222,6 +222,56 @@ describe("applyFileBackedSessionStoreMaintenance", () => {
]);
expect(trajectoryCleanupReferencedIds).toEqual(new Set(["shared-session", "active-session"]));
});
it("forced cleanup prunes stale model-run probes before the cap evicts real sessions", async () => {
const now = Date.now();
const staleProbe = "agent:main:explicit:model-run-123e4567-e89b-12d3-a456-426614174099";
const store: Record<string, SessionEntry> = {
[staleProbe]: makeEntry(now - 2 * DAY_MS),
};
for (let i = 0; i < 50; i++) {
store[`agent:main:explicit:real-${i}`] = makeEntry(now - 3 * DAY_MS);
}
let report: { modelRunPruned: number; pruned: number; capped: number } | undefined;
const result = await applyFileBackedSessionStoreMaintenance({
storePath: "/tmp/openclaw-sessions/sessions.json",
store,
maintenanceConfig: {
mode: "enforce",
pruneAfterMs: 7 * DAY_MS,
maxEntries: 50,
modelRunPruneAfterMs: DAY_MS,
modelRunPruneAfterConfigured: false,
resetArchiveRetentionMs: null,
maxDiskBytes: null,
highWaterBytes: null,
},
maintenanceOverride: { mode: "enforce" },
onMaintenanceApplied: (applied) => {
report = {
modelRunPruned: applied.modelRunPruned,
pruned: applied.pruned,
capped: applied.capped,
};
},
log: { warn: () => {}, info: () => {} },
artifacts: {
archiveRemovedSessionTranscripts: async () => new Set(),
removeRemovedSessionTrajectoryArtifacts: async () => {},
cleanupArchivedSessionTranscripts: async () => {},
},
});
expect(result.changedStore).toBe(true);
expect(report?.modelRunPruned).toBe(1);
expect(report?.capped).toBe(0);
expect(store[staleProbe]).toBeUndefined();
expect(Object.keys(store)).toHaveLength(50);
for (let i = 0; i < 50; i++) {
expect(store).toHaveProperty(`agent:main:explicit:real-${i}`);
}
});
});
describe("pruneStaleModelRunEntries", () => {
@@ -520,6 +570,61 @@ describe("resolveMaintenanceConfigFromInput", () => {
expect(maintenance.mode).toBe("enforce");
});
it("defaults gateway model-run probes to 24h retention with override and disable support", () => {
expect(resolveMaintenanceConfigFromInput().modelRunPruneAfterMs).toBe(DAY_MS);
expect(
resolveMaintenanceConfigFromInput({ modelRunPruneAfter: "48h" }).modelRunPruneAfterMs,
).toBe(2 * DAY_MS);
expect(
resolveMaintenanceConfigFromInput({ modelRunPruneAfter: false }).modelRunPruneAfterMs,
).toBe(null);
expect(resolveMaintenanceConfigFromInput().modelRunPruneAfterConfigured).toBe(false);
expect(
resolveMaintenanceConfigFromInput({ modelRunPruneAfter: "48h" }).modelRunPruneAfterConfigured,
).toBe(true);
expect(
resolveMaintenanceConfigFromInput({ modelRunPruneAfter: false }).modelRunPruneAfterConfigured,
).toBe(true);
expect(
resolveMaintenanceConfigFromInput({ modelRunPruneAfter: "bad" }).modelRunPruneAfterMs,
).toBe(null);
expect(
resolveMaintenanceConfigFromInput({ modelRunPruneAfter: "bad" }).modelRunPruneAfterConfigured,
).toBe(true);
});
it("force-gates the unset model-run prune default to the cap-eviction threshold", () => {
const defaultMaintenance = resolveMaintenanceConfigFromInput({ maxEntries: 50 });
expect(resolveSessionEntryMaintenanceHighWater(50)).toBe(75);
expect(shouldRunModelRunPrune({ maintenance: defaultMaintenance, entryCount: 60 })).toBe(false);
expect(
shouldRunModelRunPrune({ maintenance: defaultMaintenance, entryCount: 60, force: true }),
).toBe(true);
expect(
shouldRunModelRunPrune({ maintenance: defaultMaintenance, entryCount: 50, force: true }),
).toBe(false);
expect(
shouldRunModelRunPrune({
maintenance: resolveMaintenanceConfigFromInput({
maxEntries: 50,
modelRunPruneAfter: "24h",
}),
entryCount: 1,
force: true,
}),
).toBe(true);
expect(
shouldRunModelRunPrune({
maintenance: resolveMaintenanceConfigFromInput({
maxEntries: 50,
modelRunPruneAfter: false,
}),
entryCount: 60,
force: true,
}),
).toBe(false);
});
it("batches normal entry-count maintenance for production-sized caps", () => {
expect(resolveSessionEntryMaintenanceHighWater(2)).toBe(3);
expect(resolveSessionEntryMaintenanceHighWater(50)).toBe(75);