mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 19:40:42 +00:00
fix(memory): keep archive transcript visibility safe
Keep reset/deleted session archives searchable while preserving visibility filtering, and keep internal cron-run archives opaque when live ownership metadata is gone.\n\nRefs #56131.\nThanks @buyitsydney.
This commit is contained in:
@@ -2,7 +2,11 @@ import fsSync from "node:fs";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import { afterAll, afterEach, beforeAll, beforeEach, describe, expect, it } from "vitest";
|
||||
import { buildSessionEntry, listSessionFilesForAgent } from "./session-files.js";
|
||||
import {
|
||||
buildSessionEntry,
|
||||
listSessionFilesForAgent,
|
||||
sessionPathForFile,
|
||||
} from "./session-files.js";
|
||||
|
||||
let fixtureRoot: string;
|
||||
let tmpDir: string;
|
||||
@@ -61,6 +65,28 @@ describe("listSessionFilesForAgent", () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe("sessionPathForFile", () => {
|
||||
it("includes the owning agent id when the transcript lives under an agent sessions dir", () => {
|
||||
const absPath = path.join(
|
||||
tmpDir,
|
||||
"agents",
|
||||
"main",
|
||||
"sessions",
|
||||
"deleted-session.jsonl.deleted.2026-02-16T22-27-33.000Z",
|
||||
);
|
||||
|
||||
expect(sessionPathForFile(absPath)).toBe(
|
||||
"sessions/main/deleted-session.jsonl.deleted.2026-02-16T22-27-33.000Z",
|
||||
);
|
||||
});
|
||||
|
||||
it("keeps the legacy basename-only path when the agent owner cannot be derived", () => {
|
||||
expect(sessionPathForFile(path.join(tmpDir, "loose-session.jsonl"))).toBe(
|
||||
"sessions/loose-session.jsonl",
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe("buildSessionEntry", () => {
|
||||
it("returns lineMap tracking original JSONL line numbers", async () => {
|
||||
// Simulate a real session JSONL file with metadata records interspersed
|
||||
@@ -116,30 +142,92 @@ describe("buildSessionEntry", () => {
|
||||
expect(entry!.lineMap).toEqual([]);
|
||||
});
|
||||
|
||||
it("skips deleted and checkpoint transcripts for dreaming ingestion", async () => {
|
||||
it("indexes usage-counted reset/deleted archives but still skips bak and checkpoint artifacts", async () => {
|
||||
const resetPath = path.join(tmpDir, "ordinary.jsonl.reset.2026-02-16T22-26-33.000Z");
|
||||
const deletedPath = path.join(tmpDir, "ordinary.jsonl.deleted.2026-02-16T22-27-33.000Z");
|
||||
const bakPath = path.join(tmpDir, "ordinary.jsonl.bak.2026-02-16T22-28-33.000Z");
|
||||
const checkpointPath = path.join(
|
||||
tmpDir,
|
||||
"ordinary.checkpoint.11111111-1111-4111-8111-111111111111.jsonl",
|
||||
);
|
||||
const content = JSON.stringify({
|
||||
type: "message",
|
||||
message: { role: "user", content: "This should never reach the dreaming corpus." },
|
||||
message: { role: "user", content: "Archived hello" },
|
||||
});
|
||||
fsSync.writeFileSync(resetPath, content);
|
||||
fsSync.writeFileSync(deletedPath, content);
|
||||
fsSync.writeFileSync(bakPath, content);
|
||||
fsSync.writeFileSync(checkpointPath, content);
|
||||
|
||||
const resetEntry = await buildSessionEntry(resetPath);
|
||||
const deletedEntry = await buildSessionEntry(deletedPath);
|
||||
const bakEntry = await buildSessionEntry(bakPath);
|
||||
const checkpointEntry = await buildSessionEntry(checkpointPath);
|
||||
|
||||
expect(deletedEntry).not.toBeNull();
|
||||
expect(deletedEntry?.content).toBe("");
|
||||
expect(deletedEntry?.lineMap).toEqual([]);
|
||||
// Usage-counted archives (reset, deleted) must surface real content so
|
||||
// post-reset memory_search can recover prior session history.
|
||||
expect(resetEntry?.content).toContain("User: Archived hello");
|
||||
expect(resetEntry?.lineMap).toEqual([1]);
|
||||
expect(deletedEntry?.content).toContain("User: Archived hello");
|
||||
expect(deletedEntry?.lineMap).toEqual([1]);
|
||||
|
||||
// .bak and compaction checkpoints remain opaque pre-archive / snapshot
|
||||
// artifacts and stay empty so they do not get double-indexed.
|
||||
expect(bakEntry).not.toBeNull();
|
||||
expect(bakEntry?.content).toBe("");
|
||||
expect(bakEntry?.lineMap).toEqual([]);
|
||||
expect(checkpointEntry).not.toBeNull();
|
||||
expect(checkpointEntry?.content).toBe("");
|
||||
expect(checkpointEntry?.lineMap).toEqual([]);
|
||||
});
|
||||
|
||||
it("keeps cron-run deleted archives opaque when the live session store entry is gone", async () => {
|
||||
const archivePath = path.join(tmpDir, "cron-run.jsonl.deleted.2026-02-16T22-27-33.000Z");
|
||||
const jsonlLines = [
|
||||
JSON.stringify({
|
||||
type: "message",
|
||||
message: {
|
||||
role: "user",
|
||||
content: "[cron:job-1 Codex Sessions Sync] Run internal sync.",
|
||||
},
|
||||
}),
|
||||
JSON.stringify({
|
||||
type: "message",
|
||||
message: { role: "assistant", content: "Internal cron output that must stay out." },
|
||||
}),
|
||||
];
|
||||
fsSync.writeFileSync(archivePath, jsonlLines.join("\n"));
|
||||
|
||||
const entry = await buildSessionEntry(archivePath);
|
||||
|
||||
expect(entry).not.toBeNull();
|
||||
expect(entry?.content).toBe("");
|
||||
expect(entry?.lineMap).toEqual([]);
|
||||
expect(entry?.generatedByCronRun).toBe(true);
|
||||
});
|
||||
|
||||
it("keeps cron-run reset archives opaque when session metadata preserves the cron key", async () => {
|
||||
const archivePath = path.join(tmpDir, "cron-run.jsonl.reset.2026-02-16T22-26-33.000Z");
|
||||
const jsonlLines = [
|
||||
JSON.stringify({
|
||||
type: "session-meta",
|
||||
data: { sessionKey: "agent:main:cron:job-1:run:run-1" },
|
||||
}),
|
||||
JSON.stringify({
|
||||
type: "message",
|
||||
message: { role: "assistant", content: "Internal cron output that must stay out." },
|
||||
}),
|
||||
];
|
||||
fsSync.writeFileSync(archivePath, jsonlLines.join("\n"));
|
||||
|
||||
const entry = await buildSessionEntry(archivePath);
|
||||
|
||||
expect(entry).not.toBeNull();
|
||||
expect(entry?.content).toBe("");
|
||||
expect(entry?.lineMap).toEqual([]);
|
||||
expect(entry?.generatedByCronRun).toBe(true);
|
||||
});
|
||||
|
||||
it("skips blank lines and invalid JSON without breaking lineMap", async () => {
|
||||
const jsonlLines = [
|
||||
"",
|
||||
|
||||
@@ -14,6 +14,7 @@ import {
|
||||
isSessionArchiveArtifactName,
|
||||
isSilentReplyPayloadText,
|
||||
isUsageCountedSessionTranscriptFileName,
|
||||
parseUsageCountedSessionIdFromFileName,
|
||||
resolveSessionTranscriptsDirForAgent,
|
||||
stripInboundMetadata,
|
||||
stripInternalRuntimeContext,
|
||||
@@ -62,9 +63,32 @@ type SessionTranscriptStoreEntry = {
|
||||
};
|
||||
|
||||
function shouldSkipTranscriptFileForDreaming(absPath: string): boolean {
|
||||
const fileName = path.basename(absPath);
|
||||
// Compaction checkpoints are always skipped: they are derived snapshots of an
|
||||
// active session and would double-index the same content.
|
||||
if (isCompactionCheckpointTranscriptFileName(fileName)) {
|
||||
return true;
|
||||
}
|
||||
// Legacy backups and `.jsonl.bak.<iso>` rotations are opaque pre-archive
|
||||
// copies, not a user-facing session artifact; skip them too.
|
||||
if (
|
||||
isSessionArchiveArtifactName(fileName) &&
|
||||
!isUsageCountedSessionTranscriptFileName(fileName)
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
// Usage-counted archives (`.jsonl.reset.<iso>` / `.jsonl.deleted.<iso>`) are
|
||||
// the rotated-but-retained copies of real sessions and must stay indexed so
|
||||
// `memory_search` can surface hits on post-reset / post-delete history.
|
||||
return false;
|
||||
}
|
||||
|
||||
function isUsageCountedSessionArchiveTranscriptPath(absPath: string): boolean {
|
||||
const fileName = path.basename(absPath);
|
||||
return (
|
||||
isSessionArchiveArtifactName(fileName) || isCompactionCheckpointTranscriptFileName(fileName)
|
||||
isUsageCountedSessionTranscriptFileName(fileName) &&
|
||||
isSessionArchiveArtifactName(fileName) &&
|
||||
parseUsageCountedSessionIdFromFileName(fileName) !== null
|
||||
);
|
||||
}
|
||||
|
||||
@@ -136,6 +160,30 @@ function isDreamingNarrativeSessionStoreKey(sessionKey: string): boolean {
|
||||
return sessionSegment.startsWith(DREAMING_NARRATIVE_RUN_PREFIX);
|
||||
}
|
||||
|
||||
function hasCronRunSessionKey(value: unknown): boolean {
|
||||
return typeof value === "string" && isCronRunSessionKey(value);
|
||||
}
|
||||
|
||||
function isCronRunGeneratedRecord(record: unknown): boolean {
|
||||
if (!record || typeof record !== "object" || Array.isArray(record)) {
|
||||
return false;
|
||||
}
|
||||
const candidate = record as {
|
||||
sessionKey?: unknown;
|
||||
data?: unknown;
|
||||
};
|
||||
if (hasCronRunSessionKey(candidate.sessionKey)) {
|
||||
return true;
|
||||
}
|
||||
if (!candidate.data || typeof candidate.data !== "object" || Array.isArray(candidate.data)) {
|
||||
return false;
|
||||
}
|
||||
const nested = candidate.data as {
|
||||
sessionKey?: unknown;
|
||||
};
|
||||
return hasCronRunSessionKey(nested.sessionKey);
|
||||
}
|
||||
|
||||
function normalizeComparablePath(pathname: string): string {
|
||||
const resolved = path.resolve(pathname);
|
||||
return process.platform === "win32" ? resolved.toLowerCase() : resolved;
|
||||
@@ -228,11 +276,20 @@ function classifySessionTranscriptFromSessionStore(absPath: string): {
|
||||
} {
|
||||
const sessionsDir = path.dirname(absPath);
|
||||
const normalizedAbsPath = normalizeComparablePath(absPath);
|
||||
const primarySessionId = parseUsageCountedSessionIdFromFileName(path.basename(absPath));
|
||||
const normalizedPrimaryPath =
|
||||
primarySessionId && isSessionArchiveArtifactName(path.basename(absPath))
|
||||
? normalizeComparablePath(path.join(sessionsDir, `${primarySessionId}.jsonl`))
|
||||
: null;
|
||||
const classification = loadSessionTranscriptClassificationForSessionsDir(sessionsDir);
|
||||
const hasClassifiedPath = (paths: ReadonlySet<string>) =>
|
||||
paths.has(normalizedAbsPath) ||
|
||||
(normalizedPrimaryPath !== null && paths.has(normalizedPrimaryPath));
|
||||
return {
|
||||
generatedByDreamingNarrative:
|
||||
classification.dreamingNarrativeTranscriptPaths.has(normalizedAbsPath),
|
||||
generatedByCronRun: classification.cronRunTranscriptPaths.has(normalizedAbsPath),
|
||||
generatedByDreamingNarrative: hasClassifiedPath(
|
||||
classification.dreamingNarrativeTranscriptPaths,
|
||||
),
|
||||
generatedByCronRun: hasClassifiedPath(classification.cronRunTranscriptPaths),
|
||||
};
|
||||
}
|
||||
|
||||
@@ -250,8 +307,20 @@ export async function listSessionFilesForAgent(agentId: string): Promise<string[
|
||||
}
|
||||
}
|
||||
|
||||
function extractAgentIdFromSessionPath(absPath: string): string | null {
|
||||
const parts = path.normalize(path.resolve(absPath)).split(path.sep).filter(Boolean);
|
||||
const sessionsIndex = parts.lastIndexOf("sessions");
|
||||
if (sessionsIndex < 2 || parts[sessionsIndex - 2] !== "agents") {
|
||||
return null;
|
||||
}
|
||||
return parts[sessionsIndex - 1] || null;
|
||||
}
|
||||
|
||||
export function sessionPathForFile(absPath: string): string {
|
||||
return path.join("sessions", path.basename(absPath)).replace(/\\/g, "/");
|
||||
const agentId = extractAgentIdFromSessionPath(absPath);
|
||||
return path
|
||||
.join("sessions", ...(agentId ? [agentId] : []), path.basename(absPath))
|
||||
.replace(/\\/g, "/");
|
||||
}
|
||||
|
||||
async function logSessionFileReadFailure(absPath: string, err: unknown): Promise<void> {
|
||||
@@ -481,8 +550,10 @@ export async function buildSessionEntry(
|
||||
opts.generatedByDreamingNarrative ??
|
||||
sessionStoreClassification?.generatedByDreamingNarrative ??
|
||||
false;
|
||||
const generatedByCronRun =
|
||||
let generatedByCronRun =
|
||||
opts.generatedByCronRun ?? sessionStoreClassification?.generatedByCronRun ?? false;
|
||||
const allowArchiveContentCronClassification =
|
||||
isUsageCountedSessionArchiveTranscriptPath(absPath);
|
||||
for (let jsonlIdx = 0; jsonlIdx < lines.length; jsonlIdx++) {
|
||||
const line = lines[jsonlIdx];
|
||||
if (!line.trim()) {
|
||||
@@ -497,6 +568,16 @@ export async function buildSessionEntry(
|
||||
if (!generatedByDreamingNarrative && isDreamingNarrativeGeneratedRecord(record)) {
|
||||
generatedByDreamingNarrative = true;
|
||||
}
|
||||
if (
|
||||
!generatedByCronRun &&
|
||||
allowArchiveContentCronClassification &&
|
||||
isCronRunGeneratedRecord(record)
|
||||
) {
|
||||
generatedByCronRun = true;
|
||||
collected.length = 0;
|
||||
lineMap.length = 0;
|
||||
messageTimestampsMs.length = 0;
|
||||
}
|
||||
if (
|
||||
!record ||
|
||||
typeof record !== "object" ||
|
||||
@@ -520,6 +601,16 @@ export async function buildSessionEntry(
|
||||
if (rawText === null) {
|
||||
continue;
|
||||
}
|
||||
if (
|
||||
!generatedByCronRun &&
|
||||
allowArchiveContentCronClassification &&
|
||||
isGeneratedCronPromptMessage(normalizeSessionText(rawText), message.role)
|
||||
) {
|
||||
generatedByCronRun = true;
|
||||
collected.length = 0;
|
||||
lineMap.length = 0;
|
||||
messageTimestampsMs.length = 0;
|
||||
}
|
||||
const text = sanitizeSessionText(rawText, message.role);
|
||||
if (!text) {
|
||||
// Assistant-side machinery (silent replies, system wrappers) is already
|
||||
|
||||
Reference in New Issue
Block a user