fix(memory): keep archive transcript visibility safe

Keep reset/deleted session archives searchable while preserving visibility filtering, and keep internal cron-run archives opaque when live ownership metadata is gone.\n\nRefs #56131.\nThanks @buyitsydney.
This commit is contained in:
buyitsydney
2026-05-03 16:17:45 +08:00
committed by GitHub
parent d583662fd9
commit 2ffdb5d248
7 changed files with 361 additions and 25 deletions

View File

@@ -2,7 +2,11 @@ import fsSync from "node:fs";
import os from "node:os";
import path from "node:path";
import { afterAll, afterEach, beforeAll, beforeEach, describe, expect, it } from "vitest";
import { buildSessionEntry, listSessionFilesForAgent } from "./session-files.js";
import {
buildSessionEntry,
listSessionFilesForAgent,
sessionPathForFile,
} from "./session-files.js";
let fixtureRoot: string;
let tmpDir: string;
@@ -61,6 +65,28 @@ describe("listSessionFilesForAgent", () => {
});
});
describe("sessionPathForFile", () => {
it("includes the owning agent id when the transcript lives under an agent sessions dir", () => {
const absPath = path.join(
tmpDir,
"agents",
"main",
"sessions",
"deleted-session.jsonl.deleted.2026-02-16T22-27-33.000Z",
);
expect(sessionPathForFile(absPath)).toBe(
"sessions/main/deleted-session.jsonl.deleted.2026-02-16T22-27-33.000Z",
);
});
it("keeps the legacy basename-only path when the agent owner cannot be derived", () => {
expect(sessionPathForFile(path.join(tmpDir, "loose-session.jsonl"))).toBe(
"sessions/loose-session.jsonl",
);
});
});
describe("buildSessionEntry", () => {
it("returns lineMap tracking original JSONL line numbers", async () => {
// Simulate a real session JSONL file with metadata records interspersed
@@ -116,30 +142,92 @@ describe("buildSessionEntry", () => {
expect(entry!.lineMap).toEqual([]);
});
it("skips deleted and checkpoint transcripts for dreaming ingestion", async () => {
it("indexes usage-counted reset/deleted archives but still skips bak and checkpoint artifacts", async () => {
const resetPath = path.join(tmpDir, "ordinary.jsonl.reset.2026-02-16T22-26-33.000Z");
const deletedPath = path.join(tmpDir, "ordinary.jsonl.deleted.2026-02-16T22-27-33.000Z");
const bakPath = path.join(tmpDir, "ordinary.jsonl.bak.2026-02-16T22-28-33.000Z");
const checkpointPath = path.join(
tmpDir,
"ordinary.checkpoint.11111111-1111-4111-8111-111111111111.jsonl",
);
const content = JSON.stringify({
type: "message",
message: { role: "user", content: "This should never reach the dreaming corpus." },
message: { role: "user", content: "Archived hello" },
});
fsSync.writeFileSync(resetPath, content);
fsSync.writeFileSync(deletedPath, content);
fsSync.writeFileSync(bakPath, content);
fsSync.writeFileSync(checkpointPath, content);
const resetEntry = await buildSessionEntry(resetPath);
const deletedEntry = await buildSessionEntry(deletedPath);
const bakEntry = await buildSessionEntry(bakPath);
const checkpointEntry = await buildSessionEntry(checkpointPath);
expect(deletedEntry).not.toBeNull();
expect(deletedEntry?.content).toBe("");
expect(deletedEntry?.lineMap).toEqual([]);
// Usage-counted archives (reset, deleted) must surface real content so
// post-reset memory_search can recover prior session history.
expect(resetEntry?.content).toContain("User: Archived hello");
expect(resetEntry?.lineMap).toEqual([1]);
expect(deletedEntry?.content).toContain("User: Archived hello");
expect(deletedEntry?.lineMap).toEqual([1]);
// .bak and compaction checkpoints remain opaque pre-archive / snapshot
// artifacts and stay empty so they do not get double-indexed.
expect(bakEntry).not.toBeNull();
expect(bakEntry?.content).toBe("");
expect(bakEntry?.lineMap).toEqual([]);
expect(checkpointEntry).not.toBeNull();
expect(checkpointEntry?.content).toBe("");
expect(checkpointEntry?.lineMap).toEqual([]);
});
it("keeps cron-run deleted archives opaque when the live session store entry is gone", async () => {
const archivePath = path.join(tmpDir, "cron-run.jsonl.deleted.2026-02-16T22-27-33.000Z");
const jsonlLines = [
JSON.stringify({
type: "message",
message: {
role: "user",
content: "[cron:job-1 Codex Sessions Sync] Run internal sync.",
},
}),
JSON.stringify({
type: "message",
message: { role: "assistant", content: "Internal cron output that must stay out." },
}),
];
fsSync.writeFileSync(archivePath, jsonlLines.join("\n"));
const entry = await buildSessionEntry(archivePath);
expect(entry).not.toBeNull();
expect(entry?.content).toBe("");
expect(entry?.lineMap).toEqual([]);
expect(entry?.generatedByCronRun).toBe(true);
});
it("keeps cron-run reset archives opaque when session metadata preserves the cron key", async () => {
const archivePath = path.join(tmpDir, "cron-run.jsonl.reset.2026-02-16T22-26-33.000Z");
const jsonlLines = [
JSON.stringify({
type: "session-meta",
data: { sessionKey: "agent:main:cron:job-1:run:run-1" },
}),
JSON.stringify({
type: "message",
message: { role: "assistant", content: "Internal cron output that must stay out." },
}),
];
fsSync.writeFileSync(archivePath, jsonlLines.join("\n"));
const entry = await buildSessionEntry(archivePath);
expect(entry).not.toBeNull();
expect(entry?.content).toBe("");
expect(entry?.lineMap).toEqual([]);
expect(entry?.generatedByCronRun).toBe(true);
});
it("skips blank lines and invalid JSON without breaking lineMap", async () => {
const jsonlLines = [
"",

View File

@@ -14,6 +14,7 @@ import {
isSessionArchiveArtifactName,
isSilentReplyPayloadText,
isUsageCountedSessionTranscriptFileName,
parseUsageCountedSessionIdFromFileName,
resolveSessionTranscriptsDirForAgent,
stripInboundMetadata,
stripInternalRuntimeContext,
@@ -62,9 +63,32 @@ type SessionTranscriptStoreEntry = {
};
function shouldSkipTranscriptFileForDreaming(absPath: string): boolean {
const fileName = path.basename(absPath);
// Compaction checkpoints are always skipped: they are derived snapshots of an
// active session and would double-index the same content.
if (isCompactionCheckpointTranscriptFileName(fileName)) {
return true;
}
// Legacy backups and `.jsonl.bak.<iso>` rotations are opaque pre-archive
// copies, not a user-facing session artifact; skip them too.
if (
isSessionArchiveArtifactName(fileName) &&
!isUsageCountedSessionTranscriptFileName(fileName)
) {
return true;
}
// Usage-counted archives (`.jsonl.reset.<iso>` / `.jsonl.deleted.<iso>`) are
// the rotated-but-retained copies of real sessions and must stay indexed so
// `memory_search` can surface hits on post-reset / post-delete history.
return false;
}
function isUsageCountedSessionArchiveTranscriptPath(absPath: string): boolean {
const fileName = path.basename(absPath);
return (
isSessionArchiveArtifactName(fileName) || isCompactionCheckpointTranscriptFileName(fileName)
isUsageCountedSessionTranscriptFileName(fileName) &&
isSessionArchiveArtifactName(fileName) &&
parseUsageCountedSessionIdFromFileName(fileName) !== null
);
}
@@ -136,6 +160,30 @@ function isDreamingNarrativeSessionStoreKey(sessionKey: string): boolean {
return sessionSegment.startsWith(DREAMING_NARRATIVE_RUN_PREFIX);
}
function hasCronRunSessionKey(value: unknown): boolean {
return typeof value === "string" && isCronRunSessionKey(value);
}
function isCronRunGeneratedRecord(record: unknown): boolean {
if (!record || typeof record !== "object" || Array.isArray(record)) {
return false;
}
const candidate = record as {
sessionKey?: unknown;
data?: unknown;
};
if (hasCronRunSessionKey(candidate.sessionKey)) {
return true;
}
if (!candidate.data || typeof candidate.data !== "object" || Array.isArray(candidate.data)) {
return false;
}
const nested = candidate.data as {
sessionKey?: unknown;
};
return hasCronRunSessionKey(nested.sessionKey);
}
function normalizeComparablePath(pathname: string): string {
const resolved = path.resolve(pathname);
return process.platform === "win32" ? resolved.toLowerCase() : resolved;
@@ -228,11 +276,20 @@ function classifySessionTranscriptFromSessionStore(absPath: string): {
} {
const sessionsDir = path.dirname(absPath);
const normalizedAbsPath = normalizeComparablePath(absPath);
const primarySessionId = parseUsageCountedSessionIdFromFileName(path.basename(absPath));
const normalizedPrimaryPath =
primarySessionId && isSessionArchiveArtifactName(path.basename(absPath))
? normalizeComparablePath(path.join(sessionsDir, `${primarySessionId}.jsonl`))
: null;
const classification = loadSessionTranscriptClassificationForSessionsDir(sessionsDir);
const hasClassifiedPath = (paths: ReadonlySet<string>) =>
paths.has(normalizedAbsPath) ||
(normalizedPrimaryPath !== null && paths.has(normalizedPrimaryPath));
return {
generatedByDreamingNarrative:
classification.dreamingNarrativeTranscriptPaths.has(normalizedAbsPath),
generatedByCronRun: classification.cronRunTranscriptPaths.has(normalizedAbsPath),
generatedByDreamingNarrative: hasClassifiedPath(
classification.dreamingNarrativeTranscriptPaths,
),
generatedByCronRun: hasClassifiedPath(classification.cronRunTranscriptPaths),
};
}
@@ -250,8 +307,20 @@ export async function listSessionFilesForAgent(agentId: string): Promise<string[
}
}
function extractAgentIdFromSessionPath(absPath: string): string | null {
const parts = path.normalize(path.resolve(absPath)).split(path.sep).filter(Boolean);
const sessionsIndex = parts.lastIndexOf("sessions");
if (sessionsIndex < 2 || parts[sessionsIndex - 2] !== "agents") {
return null;
}
return parts[sessionsIndex - 1] || null;
}
export function sessionPathForFile(absPath: string): string {
return path.join("sessions", path.basename(absPath)).replace(/\\/g, "/");
const agentId = extractAgentIdFromSessionPath(absPath);
return path
.join("sessions", ...(agentId ? [agentId] : []), path.basename(absPath))
.replace(/\\/g, "/");
}
async function logSessionFileReadFailure(absPath: string, err: unknown): Promise<void> {
@@ -481,8 +550,10 @@ export async function buildSessionEntry(
opts.generatedByDreamingNarrative ??
sessionStoreClassification?.generatedByDreamingNarrative ??
false;
const generatedByCronRun =
let generatedByCronRun =
opts.generatedByCronRun ?? sessionStoreClassification?.generatedByCronRun ?? false;
const allowArchiveContentCronClassification =
isUsageCountedSessionArchiveTranscriptPath(absPath);
for (let jsonlIdx = 0; jsonlIdx < lines.length; jsonlIdx++) {
const line = lines[jsonlIdx];
if (!line.trim()) {
@@ -497,6 +568,16 @@ export async function buildSessionEntry(
if (!generatedByDreamingNarrative && isDreamingNarrativeGeneratedRecord(record)) {
generatedByDreamingNarrative = true;
}
if (
!generatedByCronRun &&
allowArchiveContentCronClassification &&
isCronRunGeneratedRecord(record)
) {
generatedByCronRun = true;
collected.length = 0;
lineMap.length = 0;
messageTimestampsMs.length = 0;
}
if (
!record ||
typeof record !== "object" ||
@@ -520,6 +601,16 @@ export async function buildSessionEntry(
if (rawText === null) {
continue;
}
if (
!generatedByCronRun &&
allowArchiveContentCronClassification &&
isGeneratedCronPromptMessage(normalizeSessionText(rawText), message.role)
) {
generatedByCronRun = true;
collected.length = 0;
lineMap.length = 0;
messageTimestampsMs.length = 0;
}
const text = sanitizeSessionText(rawText, message.role);
if (!text) {
// Assistant-side machinery (silent replies, system wrappers) is already