diff --git a/CHANGELOG.md b/CHANGELOG.md index f5d93e00d71..aa9198687f9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ Docs: https://docs.openclaw.ai - CLI/models: keep route-first `models status --json` stdout reserved for the JSON payload by routing auth-profile and startup diagnostics to stderr. Fixes #72962. Thanks @vishutdhar. - Sessions: ignore future-dated session activity timestamps during reset freshness checks and cap future `updatedAt` values at the merge boundary so clock-skewed messages cannot keep stale sessions alive forever. Fixes #72989. Thanks @martingarramon. - Sessions: apply search, activity filters, and limits before gateway row enrichment so bounded session lists avoid scanning discarded transcripts. Carries forward #72978. Thanks @yeager. +- Sessions: remove trajectory runtime and pointer sidecars when session maintenance prunes, caps, or disk-evicts their owning session, while preserving sidecars still referenced by live rows. Fixes #73000. Thanks @jared-rebel. - Plugins/CLI: allow managed plugin installs when the active extensions root is a symlink to a real state directory, while keeping nested target symlinks blocked and suppressing misleading hook-pack fallback errors for install-boundary failures. Fixes #72946. Thanks @mayank6136. - Providers/Ollama: mark discovered Ollama catalog models as supporting streaming usage metadata so token accounting stays enabled for local models. (#72976) Thanks @sdeyang. - Gateway/startup: keep hot Gateway boot paths on leaf config imports and add max-RSS reporting to the gateway startup bench so low-memory startup regressions are visible before release. Thanks @vincentkoc. diff --git a/docs/cli/sessions.md b/docs/cli/sessions.md index 719ba782e58..721024fc1b3 100644 --- a/docs/cli/sessions.md +++ b/docs/cli/sessions.md @@ -68,7 +68,7 @@ openclaw sessions cleanup --json `openclaw sessions cleanup` uses `session.maintenance` settings from config: -- Scope note: `openclaw sessions cleanup` maintains session stores/transcripts only. It does not prune cron run logs (`cron/runs/.jsonl`), which are managed by `cron.runLog.maxBytes` and `cron.runLog.keepLines` in [Cron configuration](/automation/cron-jobs#configuration) and explained in [Cron maintenance](/automation/cron-jobs#maintenance). +- Scope note: `openclaw sessions cleanup` maintains session stores, transcripts, and trajectory sidecars. It does not prune cron run logs (`cron/runs/.jsonl`), which are managed by `cron.runLog.maxBytes` and `cron.runLog.keepLines` in [Cron configuration](/automation/cron-jobs#configuration) and explained in [Cron maintenance](/automation/cron-jobs#maintenance). - `--dry-run`: preview how many entries would be pruned/capped without writing. - In text mode, dry-run prints a per-session action table (`Action`, `Key`, `Age`, `Model`, `Flags`) so you can see what would be kept vs removed. diff --git a/docs/reference/session-management-compaction.md b/docs/reference/session-management-compaction.md index 26f9ed61590..e6efb47b29b 100644 --- a/docs/reference/session-management-compaction.md +++ b/docs/reference/session-management-compaction.md @@ -70,7 +70,7 @@ OpenClaw resolves these via `src/config/sessions.ts`. ## Store maintenance and disk controls -Session persistence has automatic maintenance controls (`session.maintenance`) for `sessions.json` and transcript artifacts: +Session persistence has automatic maintenance controls (`session.maintenance`) for `sessions.json`, transcript artifacts, and trajectory sidecars: - `mode`: `warn` (default) or `enforce` - `pruneAfter`: stale-entry age cutoff (default `30d`) @@ -84,8 +84,8 @@ Normal Gateway writes batch `maxEntries` cleanup for production-sized caps, so a Enforcement order for disk budget cleanup (`mode: "enforce"`): -1. Remove oldest archived or orphan transcript artifacts first. -2. If still above the target, evict oldest session entries and their transcript files. +1. Remove oldest archived, orphan transcript, or orphan trajectory artifacts first. +2. If still above the target, evict oldest session entries and their transcript/trajectory files. 3. Keep going until usage is at or below `highWaterBytes`. In `mode: "warn"`, OpenClaw reports potential evictions but does not mutate the store/files. diff --git a/docs/tools/trajectory.md b/docs/tools/trajectory.md index 8e22dd5bebc..c6678ae2c6a 100644 --- a/docs/tools/trajectory.md +++ b/docs/tools/trajectory.md @@ -129,6 +129,11 @@ export OPENCLAW_TRAJECTORY_DIR=/var/lib/openclaw/trajectories When this variable is set, OpenClaw writes one JSONL file per session id in that directory. +Session maintenance removes trajectory sidecars when their owning session entry +is pruned, capped, or evicted by the sessions disk budget. Runtime files outside +the sessions directory are removed only when the pointer target still proves it +belongs to that session. + ## Disable capture Set `OPENCLAW_TRAJECTORY=0` before starting OpenClaw: diff --git a/src/config/sessions/artifacts.test.ts b/src/config/sessions/artifacts.test.ts index 8b7d27bdf2f..e0d00e2a945 100644 --- a/src/config/sessions/artifacts.test.ts +++ b/src/config/sessions/artifacts.test.ts @@ -4,6 +4,9 @@ import { isCompactionCheckpointTranscriptFileName, isPrimarySessionTranscriptFileName, isSessionArchiveArtifactName, + isTrajectoryPointerArtifactName, + isTrajectoryRuntimeArtifactName, + isTrajectorySessionArtifactName, isUsageCountedSessionTranscriptFileName, parseCompactionCheckpointTranscriptFileName, parseUsageCountedSessionIdFromFileName, @@ -31,9 +34,18 @@ describe("session artifact helpers", () => { expect(isPrimarySessionTranscriptFileName("abc.jsonl.deleted.2026-01-01T00-00-00.000Z")).toBe( false, ); + expect(isPrimarySessionTranscriptFileName("abc.trajectory.jsonl")).toBe(false); expect(isPrimarySessionTranscriptFileName("sessions.json")).toBe(false); }); + it("classifies trajectory sidecar artifacts", () => { + expect(isTrajectoryRuntimeArtifactName("abc.trajectory.jsonl")).toBe(true); + expect(isTrajectoryPointerArtifactName("abc.trajectory-path.json")).toBe(true); + expect(isTrajectorySessionArtifactName("abc.trajectory.jsonl")).toBe(true); + expect(isTrajectorySessionArtifactName("abc.trajectory-path.json")).toBe(true); + expect(isTrajectorySessionArtifactName("abc.jsonl")).toBe(false); + }); + it("classifies usage-counted transcript files", () => { expect(isUsageCountedSessionTranscriptFileName("abc.jsonl")).toBe(true); expect( @@ -50,6 +62,7 @@ describe("session artifact helpers", () => { "abc.checkpoint.11111111-1111-4111-8111-111111111111.jsonl", ), ).toBe(false); + expect(isUsageCountedSessionTranscriptFileName("abc.trajectory.jsonl")).toBe(false); }); it("parses usage-counted session ids from file names", () => { @@ -68,6 +81,7 @@ describe("session artifact helpers", () => { "abc.checkpoint.11111111-1111-4111-8111-111111111111.jsonl", ), ).toBeNull(); + expect(parseUsageCountedSessionIdFromFileName("abc.trajectory.jsonl")).toBeNull(); }); it("parses exact compaction checkpoint transcript file names", () => { diff --git a/src/config/sessions/artifacts.ts b/src/config/sessions/artifacts.ts index 73d47874afc..305316b0346 100644 --- a/src/config/sessions/artifacts.ts +++ b/src/config/sessions/artifacts.ts @@ -40,6 +40,18 @@ export function isCompactionCheckpointTranscriptFileName(fileName: string): bool return parseCompactionCheckpointTranscriptFileName(fileName) !== null; } +export function isTrajectoryRuntimeArtifactName(fileName: string): boolean { + return fileName.endsWith(".trajectory.jsonl"); +} + +export function isTrajectoryPointerArtifactName(fileName: string): boolean { + return fileName.endsWith(".trajectory-path.json"); +} + +export function isTrajectorySessionArtifactName(fileName: string): boolean { + return isTrajectoryRuntimeArtifactName(fileName) || isTrajectoryPointerArtifactName(fileName); +} + export function isPrimarySessionTranscriptFileName(fileName: string): boolean { if (fileName === "sessions.json") { return false; @@ -47,6 +59,9 @@ export function isPrimarySessionTranscriptFileName(fileName: string): boolean { if (!fileName.endsWith(".jsonl")) { return false; } + if (isTrajectoryRuntimeArtifactName(fileName)) { + return false; + } if (isCompactionCheckpointTranscriptFileName(fileName)) { return false; } diff --git a/src/config/sessions/disk-budget.test.ts b/src/config/sessions/disk-budget.test.ts index 6c214625c0b..175e9facb59 100644 --- a/src/config/sessions/disk-budget.test.ts +++ b/src/config/sessions/disk-budget.test.ts @@ -2,6 +2,10 @@ import fs from "node:fs/promises"; import path from "node:path"; import { describe, expect, it } from "vitest"; import { withTempDir } from "../../test-helpers/temp-dir.js"; +import { + resolveTrajectoryFilePath, + resolveTrajectoryPointerFilePath, +} from "../../trajectory/paths.js"; import { formatSessionArchiveTimestamp } from "./artifacts.js"; import { enforceSessionDiskBudget } from "./disk-budget.js"; import type { SessionEntry } from "./types.js"; @@ -142,4 +146,54 @@ describe("enforceSessionDiskBudget", () => { ); }); }); + + it("removes unreferenced trajectory sidecars while preserving referenced ones", async () => { + await withTempDir({ prefix: "openclaw-disk-budget-" }, async (dir) => { + const storePath = path.join(dir, "sessions.json"); + const sessionId = "keep"; + const transcriptPath = path.join(dir, `${sessionId}.jsonl`); + const referencedRuntime = resolveTrajectoryFilePath({ + env: {}, + sessionFile: transcriptPath, + sessionId, + }); + const referencedPointer = resolveTrajectoryPointerFilePath(transcriptPath); + const orphanRuntime = path.join(dir, "old.trajectory.jsonl"); + const orphanPointer = path.join(dir, "old.trajectory-path.json"); + const store: Record = { + "agent:main:main": { + sessionId, + updatedAt: Date.now(), + }, + }; + await fs.writeFile(storePath, JSON.stringify(store, null, 2), "utf-8"); + await fs.writeFile(transcriptPath, "k".repeat(80), "utf-8"); + await fs.writeFile(referencedRuntime, "r".repeat(80), "utf-8"); + await fs.writeFile(referencedPointer, "p".repeat(80), "utf-8"); + await fs.writeFile(orphanRuntime, "o".repeat(5000), "utf-8"); + await fs.writeFile(orphanPointer, "q".repeat(5000), "utf-8"); + + const result = await enforceSessionDiskBudget({ + store, + storePath, + maintenance: { + maxDiskBytes: 7000, + highWaterBytes: 2000, + }, + warnOnly: false, + }); + + await expect(fs.stat(transcriptPath)).resolves.toBeDefined(); + await expect(fs.stat(referencedRuntime)).resolves.toBeDefined(); + await expect(fs.stat(referencedPointer)).resolves.toBeDefined(); + await expect(fs.stat(orphanRuntime)).rejects.toThrow(); + await expect(fs.stat(orphanPointer)).rejects.toThrow(); + expect(result).toEqual( + expect.objectContaining({ + removedFiles: 2, + removedEntries: 0, + }), + ); + }); + }); }); diff --git a/src/config/sessions/disk-budget.ts b/src/config/sessions/disk-budget.ts index d17155aa16b..eacfb515d5a 100644 --- a/src/config/sessions/disk-budget.ts +++ b/src/config/sessions/disk-budget.ts @@ -4,10 +4,15 @@ import { normalizeLowercaseStringOrEmpty, normalizeOptionalLowercaseString, } from "../../shared/string-coerce.js"; +import { + resolveTrajectoryFilePath, + resolveTrajectoryPointerFilePath, +} from "../../trajectory/paths.js"; import { isCompactionCheckpointTranscriptFileName, isPrimarySessionTranscriptFileName, isSessionArchiveArtifactName, + isTrajectorySessionArtifactName, } from "./artifacts.js"; import { resolveSessionFilePath } from "./paths.js"; import type { SessionEntry } from "./types.js"; @@ -119,18 +124,39 @@ function resolveSessionTranscriptPathForEntry(params: { } } -function resolveReferencedSessionTranscriptPaths(params: { +function resolveSessionArtifactPathsForEntry(params: { + sessionsDir: string; + entry: SessionEntry; +}): string[] { + const transcriptPath = resolveSessionTranscriptPathForEntry(params); + if (!transcriptPath) { + return []; + } + const paths = [transcriptPath]; + if (params.entry.sessionId) { + paths.push(resolveTrajectoryPointerFilePath(transcriptPath)); + paths.push( + resolveTrajectoryFilePath({ + env: {}, + sessionFile: transcriptPath, + sessionId: params.entry.sessionId, + }), + ); + } + return paths; +} + +function resolveReferencedSessionArtifactPaths(params: { sessionsDir: string; store: Record; }): Set { const referenced = new Set(); const resolvedSessionsDir = canonicalizePathForComparison(params.sessionsDir); for (const entry of Object.values(params.store)) { - const resolved = resolveSessionTranscriptPathForEntry({ + for (const resolved of resolveSessionArtifactPathsForEntry({ sessionsDir: params.sessionsDir, entry, - }); - if (resolved) { + })) { referenced.add(canonicalizePathForComparison(resolved)); } for (const checkpoint of entry.compactionCheckpoints ?? []) { @@ -267,7 +293,7 @@ export async function enforceSessionDiskBudget(params: { let removedEntries = 0; let freedBytes = 0; - const referencedPaths = resolveReferencedSessionTranscriptPaths({ + const referencedPaths = resolveReferencedSessionArtifactPaths({ sessionsDir, store: params.store, }); @@ -277,6 +303,7 @@ export async function enforceSessionDiskBudget(params: { isSessionArchiveArtifactName(file.name) || (isCompactionCheckpointTranscriptFileName(file.name) && !referencedPaths.has(file.canonicalPath)) || + (isTrajectorySessionArtifactName(file.name) && !referencedPaths.has(file.canonicalPath)) || (isPrimarySessionTranscriptFileName(file.name) && !referencedPaths.has(file.canonicalPath)), ) .toSorted((a, b) => a.mtimeMs - b.mtimeMs); @@ -342,22 +369,20 @@ export async function enforceSessionDiskBudget(params: { continue; } sessionIdRefCounts.delete(sessionId); - const transcriptPath = resolveSessionTranscriptPathForEntry({ sessionsDir, entry }); - if (!transcriptPath) { - continue; + for (const artifactPath of resolveSessionArtifactPathsForEntry({ sessionsDir, entry })) { + const deletedBytes = await removeFileForBudget({ + filePath: artifactPath, + dryRun, + fileSizesByPath, + simulatedRemovedPaths, + }); + if (deletedBytes <= 0) { + continue; + } + total -= deletedBytes; + freedBytes += deletedBytes; + removedFiles += 1; } - const deletedBytes = await removeFileForBudget({ - filePath: transcriptPath, - dryRun, - fileSizesByPath, - simulatedRemovedPaths, - }); - if (deletedBytes <= 0) { - continue; - } - total -= deletedBytes; - freedBytes += deletedBytes; - removedFiles += 1; } } diff --git a/src/config/sessions/store.pruning.integration.test.ts b/src/config/sessions/store.pruning.integration.test.ts index 70a8209a87a..0ec493367d7 100644 --- a/src/config/sessions/store.pruning.integration.test.ts +++ b/src/config/sessions/store.pruning.integration.test.ts @@ -3,6 +3,10 @@ import fs from "node:fs/promises"; import path from "node:path"; import { afterAll, afterEach, beforeAll, beforeEach, describe, expect, it, vi } from "vitest"; import { createSuiteTempRootTracker } from "../../test-helpers/temp-dir.js"; +import { + resolveTrajectoryFilePath, + resolveTrajectoryPointerFilePath, +} from "../../trajectory/paths.js"; import type { SessionEntry } from "./types.js"; // Keep integration tests deterministic: never read a real openclaw.json. @@ -153,6 +157,63 @@ describe("Integration: saveSessionStore with pruning", () => { expect(archived).toHaveLength(1); }); + it("removes trajectory sidecars for stale sessions pruned on write", async () => { + applyEnforcedMaintenanceConfig(mockLoadConfig); + + const now = Date.now(); + const staleSessionId = "stale-trajectory-session"; + const freshSessionId = "fresh-trajectory-session"; + const store: Record = { + stale: { sessionId: staleSessionId, updatedAt: now - 30 * DAY_MS }, + fresh: { sessionId: freshSessionId, updatedAt: now }, + }; + const staleTranscript = path.join(testDir, `${staleSessionId}.jsonl`); + const freshTranscript = path.join(testDir, `${freshSessionId}.jsonl`); + const staleRuntime = resolveTrajectoryFilePath({ + env: {}, + sessionFile: staleTranscript, + sessionId: staleSessionId, + }); + const freshRuntime = resolveTrajectoryFilePath({ + env: {}, + sessionFile: freshTranscript, + sessionId: freshSessionId, + }); + const stalePointer = resolveTrajectoryPointerFilePath(staleTranscript); + const freshPointer = resolveTrajectoryPointerFilePath(freshTranscript); + await fs.writeFile(staleTranscript, '{"type":"session"}\n', "utf-8"); + await fs.writeFile(freshTranscript, '{"type":"session"}\n', "utf-8"); + await fs.writeFile(staleRuntime, '{"traceSchema":"openclaw-trajectory"}\n', "utf-8"); + await fs.writeFile(freshRuntime, '{"traceSchema":"openclaw-trajectory"}\n', "utf-8"); + await fs.writeFile( + stalePointer, + JSON.stringify({ + traceSchema: "openclaw-trajectory-pointer", + schemaVersion: 1, + sessionId: staleSessionId, + runtimeFile: staleRuntime, + }), + "utf-8", + ); + await fs.writeFile( + freshPointer, + JSON.stringify({ + traceSchema: "openclaw-trajectory-pointer", + schemaVersion: 1, + sessionId: freshSessionId, + runtimeFile: freshRuntime, + }), + "utf-8", + ); + + await saveSessionStore(storePath, store); + + await expect(fs.stat(staleRuntime)).rejects.toThrow(); + await expect(fs.stat(stalePointer)).rejects.toThrow(); + await expect(fs.stat(freshRuntime)).resolves.toBeDefined(); + await expect(fs.stat(freshPointer)).resolves.toBeDefined(); + }); + it("cleans up archived transcripts older than the prune window", async () => { applyEnforcedMaintenanceConfig(mockLoadConfig); diff --git a/src/config/sessions/store.ts b/src/config/sessions/store.ts index da26c8e8b11..4da81b18da1 100644 --- a/src/config/sessions/store.ts +++ b/src/config/sessions/store.ts @@ -62,6 +62,8 @@ const log = createSubsystemLogger("sessions/store"); let sessionArchiveRuntimePromise: Promise< typeof import("../../gateway/session-archive.runtime.js") > | null = null; +let trajectoryCleanupRuntimePromise: Promise | null = + null; let sessionWriteLockAcquirerForTests: typeof acquireSessionWriteLock | null = null; function loadSessionArchiveRuntime() { @@ -69,6 +71,11 @@ function loadSessionArchiveRuntime() { return sessionArchiveRuntimePromise; } +function loadTrajectoryCleanupRuntime() { + trajectoryCleanupRuntimePromise ??= import("../../trajectory/cleanup.js"); + return trajectoryCleanupRuntimePromise; +} + function removeThreadFromDeliveryContext(context?: DeliveryContext): DeliveryContext | undefined { if (!context || context.threadId == null) { return context; @@ -327,6 +334,15 @@ async function saveSessionStoreUnlocked( reason: "deleted", restrictToStoreDir: true, }); + if (removedSessionFiles.size > 0) { + const { removeRemovedSessionTrajectoryArtifacts } = await loadTrajectoryCleanupRuntime(); + await removeRemovedSessionTrajectoryArtifacts({ + removedSessionFiles, + referencedSessionIds, + storePath, + restrictToStoreDir: true, + }); + } for (const archivedDir of archivedForDeletedSessions) { archivedDirs.add(archivedDir); } diff --git a/src/trajectory/cleanup.test.ts b/src/trajectory/cleanup.test.ts new file mode 100644 index 00000000000..19b933314a1 --- /dev/null +++ b/src/trajectory/cleanup.test.ts @@ -0,0 +1,118 @@ +import fs from "node:fs/promises"; +import path from "node:path"; +import { describe, expect, it } from "vitest"; +import { withTempDir } from "../test-helpers/temp-dir.js"; +import { + removeRemovedSessionTrajectoryArtifacts, + removeSessionTrajectoryArtifacts, +} from "./cleanup.js"; +import { resolveTrajectoryFilePath, resolveTrajectoryPointerFilePath } from "./paths.js"; + +function runtimeEvent(sessionId: string): string { + return `${JSON.stringify({ + traceSchema: "openclaw-trajectory", + schemaVersion: 1, + traceId: sessionId, + source: "runtime", + type: "session.started", + ts: "2026-04-22T08:00:00.000Z", + seq: 1, + sourceSeq: 1, + sessionId, + })}\n`; +} + +function pointerFile(sessionId: string, runtimeFile: string): string { + return `${JSON.stringify({ + traceSchema: "openclaw-trajectory-pointer", + schemaVersion: 1, + sessionId, + runtimeFile, + })}\n`; +} + +describe("trajectory cleanup", () => { + it("removes adjacent trajectory sidecars for a deleted session", async () => { + await withTempDir({ prefix: "openclaw-trajectory-cleanup-" }, async (dir) => { + const sessionId = "session-1"; + const storePath = path.join(dir, "sessions.json"); + const sessionFile = path.join(dir, `${sessionId}.jsonl`); + const runtimeFile = resolveTrajectoryFilePath({ env: {}, sessionFile, sessionId }); + const pointerPath = resolveTrajectoryPointerFilePath(sessionFile); + await fs.writeFile(runtimeFile, runtimeEvent(sessionId), "utf8"); + await fs.writeFile(pointerPath, pointerFile(sessionId, runtimeFile), "utf8"); + + const removed = await removeSessionTrajectoryArtifacts({ + sessionId, + sessionFile, + storePath, + restrictToStoreDir: true, + }); + + expect(removed.map((entry) => entry.kind).toSorted()).toEqual(["pointer", "runtime"]); + await expect(fs.stat(runtimeFile)).rejects.toThrow(); + await expect(fs.stat(pointerPath)).rejects.toThrow(); + }); + }); + + it("skips removed sessions still referenced by surviving store rows", async () => { + await withTempDir({ prefix: "openclaw-trajectory-cleanup-" }, async (dir) => { + const sessionId = "shared-session"; + const storePath = path.join(dir, "sessions.json"); + const sessionFile = path.join(dir, `${sessionId}.jsonl`); + const runtimeFile = resolveTrajectoryFilePath({ env: {}, sessionFile, sessionId }); + const pointerPath = resolveTrajectoryPointerFilePath(sessionFile); + await fs.writeFile(runtimeFile, runtimeEvent(sessionId), "utf8"); + await fs.writeFile(pointerPath, pointerFile(sessionId, runtimeFile), "utf8"); + + const removed = await removeRemovedSessionTrajectoryArtifacts({ + removedSessionFiles: [[sessionId, sessionFile]], + referencedSessionIds: new Set([sessionId]), + storePath, + restrictToStoreDir: true, + }); + + expect(removed).toEqual([]); + await expect(fs.stat(runtimeFile)).resolves.toBeDefined(); + await expect(fs.stat(pointerPath)).resolves.toBeDefined(); + }); + }); + + it("only removes external pointer targets that prove they belong to the session", async () => { + await withTempDir({ prefix: "openclaw-trajectory-cleanup-" }, async (dir) => { + const sessionId = "session-2"; + const sessionsDir = path.join(dir, "sessions"); + const storePath = path.join(sessionsDir, "sessions.json"); + const sessionFile = path.join(sessionsDir, `${sessionId}.jsonl`); + const externalDir = path.join(dir, "external"); + await fs.mkdir(sessionsDir); + await fs.mkdir(externalDir); + const safeExternalRuntime = path.join(externalDir, `${sessionId}.jsonl`); + const unsafeExternalRuntime = path.join(externalDir, "unsafe.jsonl"); + await fs.writeFile(safeExternalRuntime, runtimeEvent(sessionId), "utf8"); + await fs.writeFile(unsafeExternalRuntime, runtimeEvent(sessionId), "utf8"); + + const pointerPath = resolveTrajectoryPointerFilePath(sessionFile); + await fs.writeFile(pointerPath, pointerFile(sessionId, safeExternalRuntime), "utf8"); + await removeSessionTrajectoryArtifacts({ + sessionId, + sessionFile, + storePath, + restrictToStoreDir: true, + }); + + await expect(fs.stat(safeExternalRuntime)).rejects.toThrow(); + await expect(fs.stat(pointerPath)).rejects.toThrow(); + + await fs.writeFile(pointerPath, pointerFile(sessionId, unsafeExternalRuntime), "utf8"); + await removeSessionTrajectoryArtifacts({ + sessionId, + sessionFile, + storePath, + restrictToStoreDir: true, + }); + + await expect(fs.stat(unsafeExternalRuntime)).resolves.toBeDefined(); + }); + }); +}); diff --git a/src/trajectory/cleanup.ts b/src/trajectory/cleanup.ts new file mode 100644 index 00000000000..43e0ea66836 --- /dev/null +++ b/src/trajectory/cleanup.ts @@ -0,0 +1,252 @@ +import fs from "node:fs"; +import path from "node:path"; +import { resolveSessionFilePath } from "../config/sessions/paths.js"; +import { + resolveTrajectoryFilePath, + resolveTrajectoryPointerFilePath, + safeTrajectorySessionFileName, +} from "./paths.js"; + +export type RemovedTrajectoryArtifact = { + kind: "pointer" | "runtime"; + path: string; +}; + +type TrajectoryPointer = { + runtimeFile: string; +}; + +function isRecord(value: unknown): value is Record { + return !!value && typeof value === "object" && !Array.isArray(value); +} + +function canonicalizePathForComparison(filePath: string): string { + const resolved = path.resolve(filePath); + try { + return fs.realpathSync(resolved); + } catch { + return resolved; + } +} + +function isPathWithinDir(parentDir: string, filePath: string): boolean { + const resolvedParent = canonicalizePathForComparison(parentDir); + const resolvedFile = canonicalizePathForComparison(filePath); + const relative = path.relative(resolvedParent, resolvedFile); + return Boolean(relative) && !relative.startsWith("..") && !path.isAbsolute(relative); +} + +function isRegularNonSymlinkFile(filePath: string): boolean { + try { + const lst = fs.lstatSync(filePath); + if (!lst.isFile() || lst.isSymbolicLink()) { + return false; + } + return fs.statSync(filePath).isFile(); + } catch { + return false; + } +} + +function readTrajectoryPointerFile( + pointerPath: string, + sessionId: string, +): TrajectoryPointer | null { + if (!isRegularNonSymlinkFile(pointerPath)) { + return null; + } + try { + const parsed: unknown = JSON.parse(fs.readFileSync(pointerPath, "utf8")); + if (!isRecord(parsed)) { + return null; + } + if ( + parsed.traceSchema !== "openclaw-trajectory-pointer" || + parsed.schemaVersion !== 1 || + parsed.sessionId !== sessionId || + typeof parsed.runtimeFile !== "string" || + !parsed.runtimeFile.trim() + ) { + return null; + } + return { runtimeFile: path.resolve(parsed.runtimeFile) }; + } catch { + return null; + } +} + +function readFirstNonEmptyLine(filePath: string): string | null { + let fd: number | null = null; + try { + fd = fs.openSync(filePath, "r"); + const buffer = Buffer.alloc(64 * 1024); + const bytesRead = fs.readSync(fd, buffer, 0, buffer.length, 0); + if (bytesRead <= 0) { + return null; + } + for (const line of buffer.subarray(0, bytesRead).toString("utf8").split(/\r?\n/u)) { + const trimmed = line.trim(); + if (trimmed) { + return trimmed; + } + } + return null; + } catch { + return null; + } finally { + if (fd !== null) { + try { + fs.closeSync(fd); + } catch { + // Ignore best-effort cleanup close failures. + } + } + } +} + +function runtimeFileStartsWithSessionEvent(filePath: string, sessionId: string): boolean { + if (!isRegularNonSymlinkFile(filePath)) { + return false; + } + const firstLine = readFirstNonEmptyLine(filePath); + if (!firstLine) { + return false; + } + try { + const parsed: unknown = JSON.parse(firstLine); + return ( + isRecord(parsed) && + parsed.traceSchema === "openclaw-trajectory" && + parsed.schemaVersion === 1 && + parsed.source === "runtime" && + parsed.sessionId === sessionId + ); + } catch { + return false; + } +} + +async function removeRegularFile( + filePath: string, + kind: RemovedTrajectoryArtifact["kind"], +): Promise { + if (!isRegularNonSymlinkFile(filePath)) { + return null; + } + await fs.promises.rm(filePath, { force: true }); + return { kind, path: path.resolve(filePath) }; +} + +function resolveRemovedSessionFile(params: { + sessionId: string; + sessionFile?: string; + storePath: string; +}): string | null { + try { + return resolveSessionFilePath( + params.sessionId, + params.sessionFile ? { sessionFile: params.sessionFile } : undefined, + { sessionsDir: path.dirname(params.storePath) }, + ); + } catch { + return null; + } +} + +function mayRemoveRuntimeTarget(params: { + defaultRuntimePath: string; + filePath: string; + sessionId: string; + storeDir: string; + restrictToStoreDir: boolean; +}): boolean { + const resolved = canonicalizePathForComparison(params.filePath); + const withinStoreDir = isPathWithinDir(params.storeDir, resolved); + if (canonicalizePathForComparison(params.defaultRuntimePath) === resolved) { + return !params.restrictToStoreDir || withinStoreDir; + } + if (params.restrictToStoreDir && withinStoreDir) { + return true; + } + const expectedName = `${safeTrajectorySessionFileName(params.sessionId)}.jsonl`; + if (path.basename(resolved) !== expectedName) { + return false; + } + return runtimeFileStartsWithSessionEvent(resolved, params.sessionId); +} + +export async function removeSessionTrajectoryArtifacts(params: { + sessionId: string; + sessionFile?: string; + storePath: string; + restrictToStoreDir?: boolean; +}): Promise { + const sessionFile = resolveRemovedSessionFile(params); + if (!sessionFile) { + return []; + } + const storeDir = path.dirname(path.resolve(params.storePath)); + const restrictToStoreDir = params.restrictToStoreDir === true; + const removed: RemovedTrajectoryArtifact[] = []; + const pointerPath = resolveTrajectoryPointerFilePath(sessionFile); + const pointer = readTrajectoryPointerFile(pointerPath, params.sessionId); + const defaultRuntimePath = resolveTrajectoryFilePath({ + env: {}, + sessionFile, + sessionId: params.sessionId, + }); + const runtimeCandidates = new Set([defaultRuntimePath]); + if (pointer?.runtimeFile) { + runtimeCandidates.add(pointer.runtimeFile); + } + + for (const runtimePath of runtimeCandidates) { + if ( + !mayRemoveRuntimeTarget({ + defaultRuntimePath, + filePath: runtimePath, + sessionId: params.sessionId, + storeDir, + restrictToStoreDir, + }) + ) { + continue; + } + const deleted = await removeRegularFile(runtimePath, "runtime"); + if (deleted) { + removed.push(deleted); + } + } + + if (!restrictToStoreDir || isPathWithinDir(storeDir, pointerPath)) { + const deletedPointer = await removeRegularFile(pointerPath, "pointer"); + if (deletedPointer) { + removed.push(deletedPointer); + } + } + + return removed; +} + +export async function removeRemovedSessionTrajectoryArtifacts(params: { + removedSessionFiles: Iterable<[string, string | undefined]>; + referencedSessionIds: ReadonlySet; + storePath: string; + restrictToStoreDir?: boolean; +}): Promise { + const removed: RemovedTrajectoryArtifact[] = []; + for (const [sessionId, sessionFile] of params.removedSessionFiles) { + if (params.referencedSessionIds.has(sessionId)) { + continue; + } + removed.push( + ...(await removeSessionTrajectoryArtifacts({ + sessionId, + sessionFile, + storePath: params.storePath, + restrictToStoreDir: params.restrictToStoreDir, + })), + ); + } + return removed; +}