fix: ignore compaction checkpoints in session usage

This commit is contained in:
Peter Steinberger
2026-04-26 00:11:14 +01:00
parent 956cb1c7db
commit 1c6911c01f
11 changed files with 346 additions and 15 deletions

View File

@@ -1,9 +1,11 @@
import { describe, expect, it } from "vitest";
import {
formatSessionArchiveTimestamp,
isCompactionCheckpointTranscriptFileName,
isPrimarySessionTranscriptFileName,
isSessionArchiveArtifactName,
isUsageCountedSessionTranscriptFileName,
parseCompactionCheckpointTranscriptFileName,
parseUsageCountedSessionIdFromFileName,
parseSessionArchiveTimestamp,
} from "./artifacts.js";
@@ -21,6 +23,11 @@ describe("session artifact helpers", () => {
it("classifies primary transcript files", () => {
expect(isPrimarySessionTranscriptFileName("abc.jsonl")).toBe(true);
expect(isPrimarySessionTranscriptFileName("keep.deleted.keep.jsonl")).toBe(true);
expect(
isPrimarySessionTranscriptFileName(
"abc.checkpoint.11111111-1111-4111-8111-111111111111.jsonl",
),
).toBe(false);
expect(isPrimarySessionTranscriptFileName("abc.jsonl.deleted.2026-01-01T00-00-00.000Z")).toBe(
false,
);
@@ -38,6 +45,11 @@ describe("session artifact helpers", () => {
expect(isUsageCountedSessionTranscriptFileName("abc.jsonl.bak.2026-01-01T00-00-00.000Z")).toBe(
false,
);
expect(
isUsageCountedSessionTranscriptFileName(
"abc.checkpoint.11111111-1111-4111-8111-111111111111.jsonl",
),
).toBe(false);
});
it("parses usage-counted session ids from file names", () => {
@@ -51,6 +63,28 @@ describe("session artifact helpers", () => {
expect(parseUsageCountedSessionIdFromFileName("abc.jsonl.bak.2026-01-01T00-00-00.000Z")).toBe(
null,
);
expect(
parseUsageCountedSessionIdFromFileName(
"abc.checkpoint.11111111-1111-4111-8111-111111111111.jsonl",
),
).toBeNull();
});
it("parses exact compaction checkpoint transcript file names", () => {
expect(
parseCompactionCheckpointTranscriptFileName(
"abc.checkpoint.11111111-1111-4111-8111-111111111111.jsonl",
),
).toEqual({
sessionId: "abc",
checkpointId: "11111111-1111-4111-8111-111111111111",
});
expect(isCompactionCheckpointTranscriptFileName("abc.checkpoint.not-a-uuid.jsonl")).toBe(false);
expect(
isCompactionCheckpointTranscriptFileName(
"abc.checkpoint.11111111-1111-4111-8111-111111111111.jsonl.deleted.2026-01-01T00-00-00.000Z",
),
).toBe(false);
});
it("formats and parses archive timestamps", () => {

View File

@@ -2,6 +2,8 @@ export type SessionArchiveReason = "bak" | "reset" | "deleted";
const ARCHIVE_TIMESTAMP_RE = /^\d{4}-\d{2}-\d{2}T\d{2}-\d{2}-\d{2}(?:\.\d{3})?Z$/;
const LEGACY_STORE_BACKUP_RE = /^sessions\.json\.bak\.\d+$/;
const COMPACTION_CHECKPOINT_TRANSCRIPT_RE =
/^(.+)\.checkpoint\.([0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12})\.jsonl$/i;
function hasArchiveSuffix(fileName: string, reason: SessionArchiveReason): boolean {
const marker = `.${reason}.`;
@@ -24,6 +26,20 @@ export function isSessionArchiveArtifactName(fileName: string): boolean {
);
}
export function parseCompactionCheckpointTranscriptFileName(fileName: string): {
sessionId: string;
checkpointId: string;
} | null {
const match = COMPACTION_CHECKPOINT_TRANSCRIPT_RE.exec(fileName);
const sessionId = match?.[1];
const checkpointId = match?.[2];
return sessionId && checkpointId ? { sessionId, checkpointId } : null;
}
export function isCompactionCheckpointTranscriptFileName(fileName: string): boolean {
return parseCompactionCheckpointTranscriptFileName(fileName) !== null;
}
export function isPrimarySessionTranscriptFileName(fileName: string): boolean {
if (fileName === "sessions.json") {
return false;
@@ -31,6 +47,9 @@ export function isPrimarySessionTranscriptFileName(fileName: string): boolean {
if (!fileName.endsWith(".jsonl")) {
return false;
}
if (isCompactionCheckpointTranscriptFileName(fileName)) {
return false;
}
return !isSessionArchiveArtifactName(fileName);
}

View File

@@ -81,4 +81,65 @@ describe("enforceSessionDiskBudget", () => {
);
});
});
it("removes unreferenced compaction checkpoint artifacts under pressure", async () => {
await withTempDir({ prefix: "openclaw-disk-budget-" }, async (dir) => {
const storePath = path.join(dir, "sessions.json");
const sessionId = "keep";
const transcriptPath = path.join(dir, `${sessionId}.jsonl`);
const checkpointPath = path.join(
dir,
"keep.checkpoint.11111111-1111-4111-8111-111111111111.jsonl",
);
const referencedCheckpointPath = path.join(
dir,
"keep.checkpoint.22222222-2222-4222-8222-222222222222.jsonl",
);
const store: Record<string, SessionEntry> = {
"agent:main:main": {
sessionId,
updatedAt: Date.now(),
compactionCheckpoints: [
{
checkpointId: "referenced",
sessionKey: "agent:main:main",
sessionId,
createdAt: Date.now(),
reason: "manual",
preCompaction: {
sessionId,
sessionFile: referencedCheckpointPath,
leafId: "leaf",
},
postCompaction: { sessionId },
},
],
},
};
await fs.writeFile(storePath, JSON.stringify(store, null, 2), "utf-8");
await fs.writeFile(transcriptPath, "k".repeat(80), "utf-8");
await fs.writeFile(checkpointPath, "c".repeat(5000), "utf-8");
await fs.writeFile(referencedCheckpointPath, "r".repeat(260), "utf-8");
const result = await enforceSessionDiskBudget({
store,
storePath,
maintenance: {
maxDiskBytes: 4000,
highWaterBytes: 3000,
},
warnOnly: false,
});
await expect(fs.stat(transcriptPath)).resolves.toBeDefined();
await expect(fs.stat(checkpointPath)).rejects.toThrow();
await expect(fs.stat(referencedCheckpointPath)).resolves.toBeDefined();
expect(result).toEqual(
expect.objectContaining({
removedFiles: 1,
removedEntries: 0,
}),
);
});
});
});

View File

@@ -4,7 +4,11 @@ import {
normalizeLowercaseStringOrEmpty,
normalizeOptionalLowercaseString,
} from "../../shared/string-coerce.js";
import { isPrimarySessionTranscriptFileName, isSessionArchiveArtifactName } from "./artifacts.js";
import {
isCompactionCheckpointTranscriptFileName,
isPrimarySessionTranscriptFileName,
isSessionArchiveArtifactName,
} from "./artifacts.js";
import { resolveSessionFilePath } from "./paths.js";
import type { SessionEntry } from "./types.js";
@@ -120,6 +124,7 @@ function resolveReferencedSessionTranscriptPaths(params: {
store: Record<string, SessionEntry>;
}): Set<string> {
const referenced = new Set<string>();
const resolvedSessionsDir = canonicalizePathForComparison(params.sessionsDir);
for (const entry of Object.values(params.store)) {
const resolved = resolveSessionTranscriptPathForEntry({
sessionsDir: params.sessionsDir,
@@ -128,6 +133,17 @@ function resolveReferencedSessionTranscriptPaths(params: {
if (resolved) {
referenced.add(canonicalizePathForComparison(resolved));
}
for (const checkpoint of entry.compactionCheckpoints ?? []) {
const checkpointFile = checkpoint.preCompaction.sessionFile?.trim();
if (!checkpointFile) {
continue;
}
const resolvedCheckpointPath = canonicalizePathForComparison(checkpointFile);
const relative = path.relative(resolvedSessionsDir, resolvedCheckpointPath);
if (relative && !relative.startsWith("..") && !path.isAbsolute(relative)) {
referenced.add(resolvedCheckpointPath);
}
}
}
return referenced;
}
@@ -259,6 +275,8 @@ export async function enforceSessionDiskBudget(params: {
.filter(
(file) =>
isSessionArchiveArtifactName(file.name) ||
(isCompactionCheckpointTranscriptFileName(file.name) &&
!referencedPaths.has(file.canonicalPath)) ||
(isPrimarySessionTranscriptFileName(file.name) && !referencedPaths.has(file.canonicalPath)),
)
.toSorted((a, b) => a.mtimeMs - b.mtimeMs);

View File

@@ -5,6 +5,7 @@ import { expandHomePrefix, resolveRequiredHomeDir } from "../../infra/home-dir.j
import { DEFAULT_AGENT_ID, normalizeAgentId } from "../../routing/session-key.js";
import { normalizeLowercaseStringOrEmpty } from "../../shared/string-coerce.js";
import { resolveStateDir } from "../paths.js";
import { isCompactionCheckpointTranscriptFileName } from "./artifacts.js";
function resolveAgentSessionsDir(
agentId?: string,
@@ -62,7 +63,10 @@ export const SAFE_SESSION_ID_RE = /^[a-z0-9][a-z0-9._-]{0,127}$/i;
export function validateSessionId(sessionId: string): string {
const trimmed = sessionId.trim();
if (!SAFE_SESSION_ID_RE.test(trimmed)) {
if (
!SAFE_SESSION_ID_RE.test(trimmed) ||
isCompactionCheckpointTranscriptFileName(`${trimmed}.jsonl`)
) {
throw new Error(`Invalid session ID: ${sessionId}`);
}
return trimmed;

View File

@@ -21,7 +21,13 @@ import { mergeSessionEntry, type SessionEntry } from "./types.js";
describe("session path safety", () => {
it("rejects unsafe session IDs", () => {
const unsafeSessionIds = ["../etc/passwd", "a/b", "a\\b", "/abs"];
const unsafeSessionIds = [
"../etc/passwd",
"a/b",
"a\\b",
"/abs",
"sess.checkpoint.11111111-1111-4111-8111-111111111111",
];
for (const sessionId of unsafeSessionIds) {
expect(() => validateSessionId(sessionId), sessionId).toThrow(/Invalid session ID/);
}

View File

@@ -5,9 +5,11 @@ import path from "node:path";
import type { AssistantMessage, UserMessage } from "@mariozechner/pi-ai";
import { SessionManager } from "@mariozechner/pi-coding-agent";
import { afterEach, describe, expect, test } from "vitest";
import type { OpenClawConfig } from "../config/types.openclaw.js";
import {
captureCompactionCheckpointSnapshot,
cleanupCompactionCheckpointSnapshot,
persistSessionCompactionCheckpoint,
} from "./session-compaction-checkpoints.js";
const tempDirs: string[] = [];
@@ -81,4 +83,83 @@ describe("session-compaction-checkpoints", () => {
expect(fsSync.existsSync(snapshot!.sessionFile)).toBe(false);
expect(fsSync.existsSync(sessionFile!)).toBe(true);
});
test("persist trims old checkpoint metadata and removes trimmed snapshot files", async () => {
const dir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-checkpoint-trim-"));
tempDirs.push(dir);
const storePath = path.join(dir, "sessions.json");
const sessionId = "sess";
const sessionKey = "agent:main:main";
const now = Date.now();
const existingCheckpoints = Array.from({ length: 26 }, (_, index) => {
const uuid = `${String(index + 1).padStart(8, "0")}-1111-4111-8111-111111111111`;
const sessionFile = path.join(dir, `sess.checkpoint.${uuid}.jsonl`);
fsSync.writeFileSync(sessionFile, `checkpoint ${index}`, "utf-8");
return {
checkpointId: `old-${index}`,
sessionKey,
sessionId,
createdAt: now + index,
reason: "manual" as const,
preCompaction: {
sessionId,
sessionFile,
leafId: `old-leaf-${index}`,
},
postCompaction: { sessionId },
};
});
await fs.writeFile(
storePath,
JSON.stringify(
{
[sessionKey]: {
sessionId,
updatedAt: now,
compactionCheckpoints: existingCheckpoints,
},
},
null,
2,
),
"utf-8",
);
const currentSnapshotFile = path.join(
dir,
"sess.checkpoint.99999999-9999-4999-8999-999999999999.jsonl",
);
await fs.writeFile(currentSnapshotFile, "current", "utf-8");
const stored = await persistSessionCompactionCheckpoint({
cfg: {
session: { store: storePath },
agents: { list: [{ id: "main", default: true }] },
} as OpenClawConfig,
sessionKey: "main",
sessionId,
reason: "manual",
snapshot: {
sessionId,
sessionFile: currentSnapshotFile,
leafId: "current-leaf",
},
createdAt: now + 100,
});
expect(stored).not.toBeNull();
expect(fsSync.existsSync(existingCheckpoints[0].preCompaction.sessionFile)).toBe(false);
expect(fsSync.existsSync(existingCheckpoints[1].preCompaction.sessionFile)).toBe(false);
expect(fsSync.existsSync(existingCheckpoints[2].preCompaction.sessionFile)).toBe(true);
expect(fsSync.existsSync(currentSnapshotFile)).toBe(true);
const nextStore = JSON.parse(await fs.readFile(storePath, "utf-8")) as Record<
string,
{ compactionCheckpoints?: unknown[] }
>;
expect(
Object.values(nextStore).find((entry) => entry.compactionCheckpoints)?.compactionCheckpoints,
).toHaveLength(25);
});
});

View File

@@ -9,6 +9,7 @@ import type {
SessionCompactionCheckpointReason,
SessionEntry,
} from "../config/sessions.js";
import { isCompactionCheckpointTranscriptFileName } from "../config/sessions/artifacts.js";
import type { OpenClawConfig } from "../config/types.openclaw.js";
import { createSubsystemLogger } from "../logging/subsystem.js";
import { resolveGatewaySessionStoreTarget } from "./session-utils.js";
@@ -22,13 +23,18 @@ export type CapturedCompactionCheckpointSnapshot = {
leafId: string;
};
function trimSessionCheckpoints(
checkpoints: SessionCompactionCheckpoint[] | undefined,
): SessionCompactionCheckpoint[] | undefined {
function trimSessionCheckpoints(checkpoints: SessionCompactionCheckpoint[] | undefined): {
kept: SessionCompactionCheckpoint[] | undefined;
removed: SessionCompactionCheckpoint[];
} {
if (!Array.isArray(checkpoints) || checkpoints.length === 0) {
return undefined;
return { kept: undefined, removed: [] };
}
return checkpoints.slice(-MAX_COMPACTION_CHECKPOINTS_PER_SESSION);
const kept = checkpoints.slice(-MAX_COMPACTION_CHECKPOINTS_PER_SESSION);
return {
kept,
removed: checkpoints.slice(0, Math.max(0, checkpoints.length - kept.length)),
};
}
function sessionStoreCheckpoints(
@@ -117,6 +123,40 @@ export async function cleanupCompactionCheckpointSnapshot(
}
}
async function cleanupTrimmedCompactionCheckpointFiles(params: {
removed: SessionCompactionCheckpoint[];
retained: SessionCompactionCheckpoint[] | undefined;
currentSnapshotFile: string;
}): Promise<void> {
if (params.removed.length === 0) {
return;
}
const retainedPaths = new Set(
(params.retained ?? [])
.map((checkpoint) => checkpoint.preCompaction.sessionFile?.trim())
.filter((filePath): filePath is string => Boolean(filePath)),
);
const snapshotDir = path.resolve(path.dirname(params.currentSnapshotFile));
for (const checkpoint of params.removed) {
const sessionFile = checkpoint.preCompaction.sessionFile?.trim();
if (!sessionFile || retainedPaths.has(sessionFile)) {
continue;
}
const resolvedSessionFile = path.resolve(sessionFile);
if (
path.dirname(resolvedSessionFile) !== snapshotDir ||
!isCompactionCheckpointTranscriptFileName(path.basename(resolvedSessionFile))
) {
continue;
}
try {
await fs.unlink(resolvedSessionFile);
} catch {
// Best-effort cleanup; disk budget can still collect old checkpoint artifacts.
}
}
}
export async function persistSessionCompactionCheckpoint(params: {
cfg: OpenClawConfig;
sessionKey: string;
@@ -163,6 +203,12 @@ export async function persistSessionCompactionCheckpoint(params: {
};
let stored = false;
let trimmedCheckpoints:
| {
kept: SessionCompactionCheckpoint[] | undefined;
removed: SessionCompactionCheckpoint[];
}
| undefined;
await updateSessionStore(target.storePath, (store) => {
const existing = store[target.canonicalKey];
if (!existing?.sessionId) {
@@ -170,10 +216,11 @@ export async function persistSessionCompactionCheckpoint(params: {
}
const checkpoints = sessionStoreCheckpoints(existing);
checkpoints.push(checkpoint);
trimmedCheckpoints = trimSessionCheckpoints(checkpoints);
store[target.canonicalKey] = {
...existing,
updatedAt: Math.max(existing.updatedAt ?? 0, createdAt),
compactionCheckpoints: trimSessionCheckpoints(checkpoints),
compactionCheckpoints: trimmedCheckpoints.kept,
};
stored = true;
});
@@ -184,6 +231,11 @@ export async function persistSessionCompactionCheckpoint(params: {
});
return null;
}
await cleanupTrimmedCompactionCheckpointFiles({
removed: trimmedCheckpoints?.removed ?? [],
retained: trimmedCheckpoints?.kept,
currentSnapshotFile: params.snapshot.sessionFile,
});
return checkpoint;
}

View File

@@ -18,6 +18,12 @@ describe("session cost usage", () => {
await withEnvAsync({ OPENCLAW_STATE_DIR: stateDir }, fn);
const makeSessionCostRoot = async (prefix: string): Promise<string> =>
await suiteRootTracker.make(prefix);
const transcriptText = (sessionId: string, entry: unknown): string =>
[
JSON.stringify({ type: "session", version: 1, id: sessionId }),
JSON.stringify(entry),
"",
].join("\n");
beforeAll(async () => {
await suiteRootTracker.setup();
@@ -121,6 +127,54 @@ describe("session cost usage", () => {
});
});
it("ignores compaction checkpoint transcript snapshots in daily totals and discovery", async () => {
const root = await makeSessionCostRoot("cost-checkpoint");
const sessionsDir = path.join(root, "agents", "main", "sessions");
await fs.mkdir(sessionsDir, { recursive: true });
const now = new Date();
const assistantEntry = {
type: "message",
timestamp: now.toISOString(),
message: {
role: "assistant",
provider: "openai",
model: "gpt-5.4",
usage: {
input: 10,
output: 20,
cacheRead: 0,
cacheWrite: 0,
totalTokens: 30,
cost: { total: 0.03 },
},
},
};
await fs.writeFile(
path.join(sessionsDir, "sess-1.jsonl"),
transcriptText("sess-1", assistantEntry),
"utf-8",
);
await fs.writeFile(
path.join(sessionsDir, "sess-1.checkpoint.11111111-1111-4111-8111-111111111111.jsonl"),
transcriptText("sess-1", assistantEntry),
"utf-8",
);
await withStateDir(root, async () => {
const summary = await loadCostUsageSummary({ days: 30 });
expect(summary.daily.length).toBe(1);
expect(summary.totals.totalTokens).toBe(30);
expect(summary.totals.totalCost).toBeCloseTo(0.03, 5);
const sessions = await discoverAllSessions();
expect(sessions).toHaveLength(1);
expect(sessions[0]?.sessionId).toBe("sess-1");
expect(sessions[0]?.sessionFile.endsWith("sess-1.jsonl")).toBe(true);
});
});
it("summarizes a single session file", async () => {
const root = await makeSessionCostRoot("cost-session");
const sessionFile = path.join(root, "session.jsonl");

View File

@@ -643,7 +643,10 @@ describe("buildSessionEntry", () => {
it("skips deleted and checkpoint transcripts for dreaming ingestion", async () => {
const deletedPath = path.join(tmpDir, "ordinary.jsonl.deleted.2026-02-16T22-27-33.000Z");
const checkpointPath = path.join(tmpDir, "ordinary.checkpoint.abc123.jsonl");
const checkpointPath = path.join(
tmpDir,
"ordinary.checkpoint.11111111-1111-4111-8111-111111111111.jsonl",
);
const content = JSON.stringify({
type: "message",
message: { role: "user", content: "This should never reach the dreaming corpus." },

View File

@@ -7,6 +7,7 @@ import { HEARTBEAT_PROMPT } from "../../auto-reply/heartbeat.js";
import { stripInboundMetadata } from "../../auto-reply/reply/strip-inbound-meta.js";
import { HEARTBEAT_TOKEN, isSilentReplyPayloadText } from "../../auto-reply/tokens.js";
import {
isCompactionCheckpointTranscriptFileName,
isSessionArchiveArtifactName,
isUsageCountedSessionTranscriptFileName,
} from "../../config/sessions/artifacts.js";
@@ -58,13 +59,11 @@ type SessionTranscriptStoreEntry = {
sessionId?: unknown;
};
function isCheckpointTranscriptFileName(fileName: string): boolean {
return fileName.endsWith(".jsonl") && fileName.includes(".checkpoint.");
}
function shouldSkipTranscriptFileForDreaming(absPath: string): boolean {
const fileName = path.basename(absPath);
return isSessionArchiveArtifactName(fileName) || isCheckpointTranscriptFileName(fileName);
return (
isSessionArchiveArtifactName(fileName) || isCompactionCheckpointTranscriptFileName(fileName)
);
}
function isDreamingNarrativeBootstrapRecord(record: unknown): boolean {