Memory/dreaming: harden grounded backfill follow-ups

This commit is contained in:
Mariano Belinky
2026-04-08 23:40:34 +02:00
parent 868d03d6d0
commit a71ad12044
10 changed files with 239 additions and 14 deletions

View File

@@ -122,7 +122,15 @@ const DAILY_MEMORY_FILE_NAME_RE = /^(\d{4}-\d{2}-\d{2})\.md$/;
async function listHistoricalDailyFiles(inputPath: string): Promise<string[]> {
const resolvedPath = path.resolve(inputPath);
const stat = await fs.stat(resolvedPath);
let stat;
try {
stat = await fs.stat(resolvedPath);
} catch (err) {
if ((err as NodeJS.ErrnoException | undefined)?.code === "ENOENT") {
return [];
}
throw err;
}
if (stat.isFile()) {
return DAILY_MEMORY_FILE_NAME_RE.test(path.basename(resolvedPath)) ? [resolvedPath] : [];
}
@@ -1734,6 +1742,11 @@ export async function runMemoryRemBackfill(opts: MemoryRemBackfillOptions) {
workspaceDir: scratchDir,
inputPaths: workspaceSourceFiles,
});
const sourcePathByDay = new Map(
sourceFiles
.map((sourcePath) => [extractIsoDayFromPath(sourcePath), sourcePath] as const)
.filter((entry): entry is [string, string] => Boolean(entry[0])),
);
const entries = grounded.files
.map((file) => {
const isoDay = extractIsoDayFromPath(file.path);
@@ -1742,7 +1755,7 @@ export async function runMemoryRemBackfill(opts: MemoryRemBackfillOptions) {
}
return {
isoDay,
sourcePath: file.path,
sourcePath: sourcePathByDay.get(isoDay) ?? file.path,
bodyLines: groundedMarkdownToDiaryLines(file.renderedMarkdown),
};
})

View File

@@ -1066,6 +1066,7 @@ describe("memory cli", () => {
const dreams = await fs.readFile(path.join(workspaceDir, "DREAMS.md"), "utf-8");
expect(dreams).toContain("openclaw:dreaming:backfill-entry");
expect(dreams).toContain(`source=${historyPath}`);
expect(dreams).toContain("January 1, 2025");
expect(dreams).toContain("What Happened");
expect(dreams).toContain("Possible Lasting Updates");
@@ -1074,6 +1075,26 @@ describe("memory cli", () => {
});
});
it("treats a missing historical path as a controlled empty-source error", async () => {
await withTempWorkspace(async (workspaceDir) => {
const close = vi.fn(async () => {});
mockManager({
status: () => makeMemoryStatus({ workspaceDir }),
close,
});
const errors = spyRuntimeErrors(defaultRuntime);
await runMemoryCli(["rem-backfill", "--path", path.join(workspaceDir, "missing-history")]);
expect(
errors.mock.calls.some((call) =>
String(call[0]).includes("found no YYYY-MM-DD.md files"),
),
).toBe(true);
expect(close).toHaveBeenCalled();
});
});
it("stages grounded durable candidates into the live short-term store", async () => {
await withTempWorkspace(async (workspaceDir) => {
const historyDir = path.join(workspaceDir, "history");
@@ -1301,6 +1322,44 @@ describe("memory cli", () => {
});
});
it("does not split hyphenated words into malformed grounded candidates", async () => {
await withTempWorkspace(async (workspaceDir) => {
const historyDir = path.join(workspaceDir, "history");
await fs.mkdir(historyDir, { recursive: true });
const historyPath = path.join(historyDir, "2025-02-20.md");
await fs.writeFile(
historyPath,
[
"## Preferences Learned",
"- Use long-term plans, avoid reactive task switching.",
"- A self-aware workflow note should stay intact.",
].join("\n") + "\n",
"utf-8",
);
const close = vi.fn(async () => {});
mockManager({
status: () => makeMemoryStatus({ workspaceDir }),
close,
});
const writeJson = spyRuntimeJson(defaultRuntime);
await runMemoryCli(["rem-harness", "--json", "--grounded", "--path", historyPath]);
const payload = firstWrittenJsonArg<{
grounded?: {
files?: Array<{
renderedMarkdown?: string;
}>;
} | null;
}>(writeJson);
const rendered = payload?.grounded?.files?.[0]?.renderedMarkdown ?? "";
expect(rendered).not.toContain("Use long- term plans");
expect(rendered).not.toContain("A self- aware workflow note");
expect(close).toHaveBeenCalled();
});
});
it("rolls back grounded rem backfill entries from DREAMS.md", async () => {
await withTempWorkspace(async (workspaceDir) => {
const dreamsPath = path.join(workspaceDir, "DREAMS.md");

View File

@@ -126,6 +126,10 @@ describe("backfill diary entries", () => {
expect(formatBackfillDiaryDate("2026-01-01", "UTC")).toBe("January 1, 2026");
});
it("preserves the iso day label in high-positive-offset timezones", () => {
expect(formatBackfillDiaryDate("2026-01-01", "Pacific/Kiritimati")).toBe("January 1, 2026");
});
it("builds a marked backfill diary entry", () => {
const entry = buildBackfillDiaryEntry({
isoDay: "2026-01-01",
@@ -201,6 +205,29 @@ describe("backfill diary entries", () => {
expect(content).toContain("Keep this real dream.");
expect(content).not.toContain("Remove this backfill.");
});
it("refuses to overwrite a symlinked DREAMS.md during backfill writes", async () => {
const workspaceDir = await createTempWorkspace("openclaw-dreaming-backfill-");
const targetPath = path.join(workspaceDir, "outside.txt");
const dreamsPath = path.join(workspaceDir, "DREAMS.md");
await fs.writeFile(targetPath, "outside\n", "utf-8");
await fs.symlink(targetPath, dreamsPath);
await expect(
writeBackfillDiaryEntries({
workspaceDir,
timezone: "UTC",
entries: [
{
isoDay: "2026-01-01",
sourcePath: "memory/2026-01-01.md",
bodyLines: ["What Happened", "1. First pass."],
},
],
}),
).rejects.toThrow("Refusing to write symlinked DREAMS.md");
await expect(fs.readFile(targetPath, "utf-8")).resolves.toBe("outside\n");
});
});
describe("appendNarrativeEntry", () => {

View File

@@ -240,17 +240,53 @@ function stripBackfillDiaryBlocks(existing: string): { updated: string; removed:
};
}
export function formatBackfillDiaryDate(isoDay: string, timezone?: string): string {
export function formatBackfillDiaryDate(isoDay: string, _timezone?: string): string {
const match = /^(\d{4})-(\d{2})-(\d{2})$/.exec(isoDay);
if (!match) {
return isoDay;
}
const [, year, month, day] = match;
const opts: Intl.DateTimeFormatOptions = {
timeZone: timezone ?? "UTC",
// Preserve the source iso day exactly; backfill labels should not drift by timezone.
timeZone: "UTC",
year: "numeric",
month: "long",
day: "numeric",
};
const epochMs = Date.parse(`${isoDay}T12:00:00Z`);
const epochMs = Date.UTC(Number(year), Number(month) - 1, Number(day), 12);
return new Intl.DateTimeFormat("en-US", opts).format(new Date(epochMs));
}
async function assertSafeDreamsPath(dreamsPath: string): Promise<void> {
const stat = await fs.lstat(dreamsPath).catch((err: NodeJS.ErrnoException) => {
if (err.code === "ENOENT") {
return null;
}
throw err;
});
if (!stat) {
return;
}
if (stat.isSymbolicLink()) {
throw new Error("Refusing to write symlinked DREAMS.md");
}
if (!stat.isFile()) {
throw new Error("Refusing to write non-file DREAMS.md");
}
}
async function writeDreamsFileAtomic(dreamsPath: string, content: string): Promise<void> {
await assertSafeDreamsPath(dreamsPath);
const tempPath = `${dreamsPath}.${process.pid}.${Date.now()}.tmp`;
await fs.writeFile(tempPath, content, { encoding: "utf-8", flag: "wx" });
try {
await fs.rename(tempPath, dreamsPath);
} catch (err) {
await fs.rm(tempPath, { force: true }).catch(() => {});
throw err;
}
}
export function buildBackfillDiaryEntry(params: {
isoDay: string;
bodyLines: string[];
@@ -295,7 +331,7 @@ export async function writeBackfillDiaryEntries(params: {
),
];
const updated = replaceDiaryContent(stripped.updated, joinDiaryBlocks(nextBlocks));
await fs.writeFile(dreamsPath, updated, "utf-8");
await writeDreamsFileAtomic(dreamsPath, updated);
return {
dreamsPath,
written: params.entries.length,
@@ -311,7 +347,7 @@ export async function removeBackfillDiaryEntries(params: {
const stripped = stripBackfillDiaryBlocks(existing);
if (stripped.removed > 0 || existing.length > 0) {
await fs.mkdir(path.dirname(dreamsPath), { recursive: true });
await fs.writeFile(dreamsPath, stripped.updated, "utf-8");
await writeDreamsFileAtomic(dreamsPath, stripped.updated);
}
return {
dreamsPath,

View File

@@ -56,6 +56,9 @@ const REM_TIME_PREFIX_RE = /^\d{1,2}:\d{2}\s*-\s*/;
const REM_CODE_FENCE_RE = /^\s*```/;
const REM_TABLE_RE = /^\s*\|.*\|\s*$/;
const REM_TABLE_DIVIDER_RE = /^\s*\|?[\s:-]+\|[\s|:-]*$/;
const MAX_GROUNDED_REM_FILES = 512;
const MAX_GROUNDED_REM_FILE_BYTES = 1_000_000;
const GROUNDED_REM_SKIPPED_DIRS = new Set([".git", "node_modules"]);
const REM_SUMMARY_FACT_LIMIT = 4;
const REM_SUMMARY_REFLECTION_LIMIT = 4;
const REM_SUMMARY_MEMORY_LIMIT = 3;
@@ -605,7 +608,7 @@ function splitTopLevelClauses(text: string, delimiter: string): string[] {
}
function splitSubjectLeadClaim(text: string): string[] {
const match = /^(?<subject>.+?(?:||-))\s*(?<rest>.+)$/u.exec(text);
const match = /^(?<subject>.+?(?:||\s-\s))\s*(?<rest>.+)$/u.exec(text);
if (!match?.groups) {
return [text];
}
@@ -1018,16 +1021,29 @@ function previewGroundedRemForFile(params: {
async function collectMarkdownFiles(inputPaths: string[]): Promise<string[]> {
const found = new Set<string>();
async function walk(targetPath: string): Promise<void> {
if (found.size >= MAX_GROUNDED_REM_FILES) {
return;
}
const resolved = path.resolve(targetPath);
const stat = await fs.stat(resolved);
const stat = await fs.lstat(resolved);
if (stat.isSymbolicLink()) {
return;
}
if (stat.isDirectory()) {
const entries = await fs.readdir(resolved, { withFileTypes: true });
for (const entry of entries) {
if (entry.isDirectory() && GROUNDED_REM_SKIPPED_DIRS.has(entry.name)) {
continue;
}
await walk(path.join(resolved, entry.name));
}
return;
}
if (stat.isFile() && resolved.toLowerCase().endsWith(".md")) {
if (
stat.isFile() &&
stat.size <= MAX_GROUNDED_REM_FILE_BYTES &&
resolved.toLowerCase().endsWith(".md")
) {
found.add(resolved);
}
}