Memory/dreaming: harden grounded backfill follow-ups

This commit is contained in:
Mariano Belinky
2026-04-08 23:40:34 +02:00
parent 868d03d6d0
commit a71ad12044
10 changed files with 239 additions and 14 deletions

View File

@@ -33,6 +33,7 @@ Docs: https://docs.openclaw.ai
- Agents/timeouts: make the LLM idle timeout inherit `agents.defaults.timeoutSeconds` when configured, disable the unconfigured idle watchdog for cron runs, and point idle-timeout errors at `agents.defaults.llm.idleTimeoutSeconds`. Thanks @drvoss.
- Agents/failover: classify Z.ai vendor code `1311` as billing and `1113` as auth, including long wrapped `1311` payloads, so these errors stop falling through to generic failover handling. (#49552) Thanks @1bcMax.
- QQBot/media-tags: support HTML entity-encoded angle brackets (`&lt;`/`&gt;`), URL slashes in attributes, and self-closing media tags so upstream `<qqimg>` payloads are correctly parsed and normalized. (#60493) Thanks @ylc0919.
- Memory/dreaming: harden grounded backfill inputs and diary writes by preserving source-day labels, rejecting missing or symlinked targets cleanly, normalizing diary headings in gateway backfills, and tightening claim splitting plus diary source metadata. Thanks @mbelinky.
- Memory/dreaming: accept embedded heartbeat trigger tokens so light and REM dreaming still run when runtime wrappers include extra heartbeat text.
- QA/live auth: fail fast when live QA scenarios hit classified auth or runtime failure replies, including raw scenario wait paths, and sanitize missing-key guidance so gateway auth problems surface as actionable errors instead of timeouts. (#63333) Thanks @shakkernerd.
- Plugins/onboarding auth choices: prevent untrusted workspace plugins from colliding with bundled provider auth-choice ids during non-interactive onboarding, so bundled provider setup keeps operator secrets out of untrusted workspace plugin handlers unless those plugins are explicitly trusted. (#62368) Thanks @pgondhi987.

View File

@@ -167,4 +167,8 @@ Notes:
- If effectively active memory remote API key fields are configured as SecretRefs, the command resolves those values from the active gateway snapshot. If gateway is unavailable, the command fails fast.
- Gateway version skew note: this command path requires a gateway that supports `secrets.resolve`; older gateways return an unknown-method error.
- Tune scheduled sweep cadence with `dreaming.frequency`. Deep promotion policy is otherwise internal; use CLI flags on `memory promote` when you need one-off manual overrides.
- `memory rem-harness --path <file-or-dir> --grounded` previews grounded `What Happened`, `Reflections`, and `Possible Lasting Updates` from historical daily notes without writing anything.
- `memory rem-backfill --path <file-or-dir>` writes reversible grounded diary entries into `DREAMS.md` for UI review.
- `memory rem-backfill --path <file-or-dir> --stage-short-term` also seeds grounded durable candidates into the live short-term promotion store so the normal deep phase can rank them.
- `memory rem-backfill --rollback` removes previously written grounded diary entries, and `memory rem-backfill --rollback-short-term` removes previously staged grounded short-term candidates.
- See [Dreaming](/concepts/dreaming) for full phase descriptions and configuration reference.

View File

@@ -81,6 +81,15 @@ subagent turn (using the default runtime model) and appends a short diary entry.
This diary is for human reading in the Dreams UI, not a promotion source.
There is also a grounded historical backfill lane for review and recovery work:
- `memory rem-harness --path ... --grounded` previews grounded diary output from historical `YYYY-MM-DD.md` notes.
- `memory rem-backfill --path ...` writes reversible grounded diary entries into `DREAMS.md`.
- `memory rem-backfill --path ... --stage-short-term` stages grounded durable candidates into the same short-term evidence store the normal deep phase already uses.
- `memory rem-backfill --rollback` and `--rollback-short-term` remove those staged backfill artifacts without touching ordinary diary entries or live short-term recall.
The Control UI exposes the same diary backfill/reset flow so you can inspect results in the Dreams scene before deciding whether the grounded candidates deserve promotion.
## Deep ranking signals
Deep ranking uses six weighted base signals plus phase reinforcement:

View File

@@ -122,7 +122,15 @@ const DAILY_MEMORY_FILE_NAME_RE = /^(\d{4}-\d{2}-\d{2})\.md$/;
async function listHistoricalDailyFiles(inputPath: string): Promise<string[]> {
const resolvedPath = path.resolve(inputPath);
const stat = await fs.stat(resolvedPath);
let stat;
try {
stat = await fs.stat(resolvedPath);
} catch (err) {
if ((err as NodeJS.ErrnoException | undefined)?.code === "ENOENT") {
return [];
}
throw err;
}
if (stat.isFile()) {
return DAILY_MEMORY_FILE_NAME_RE.test(path.basename(resolvedPath)) ? [resolvedPath] : [];
}
@@ -1734,6 +1742,11 @@ export async function runMemoryRemBackfill(opts: MemoryRemBackfillOptions) {
workspaceDir: scratchDir,
inputPaths: workspaceSourceFiles,
});
const sourcePathByDay = new Map(
sourceFiles
.map((sourcePath) => [extractIsoDayFromPath(sourcePath), sourcePath] as const)
.filter((entry): entry is [string, string] => Boolean(entry[0])),
);
const entries = grounded.files
.map((file) => {
const isoDay = extractIsoDayFromPath(file.path);
@@ -1742,7 +1755,7 @@ export async function runMemoryRemBackfill(opts: MemoryRemBackfillOptions) {
}
return {
isoDay,
sourcePath: file.path,
sourcePath: sourcePathByDay.get(isoDay) ?? file.path,
bodyLines: groundedMarkdownToDiaryLines(file.renderedMarkdown),
};
})

View File

@@ -1066,6 +1066,7 @@ describe("memory cli", () => {
const dreams = await fs.readFile(path.join(workspaceDir, "DREAMS.md"), "utf-8");
expect(dreams).toContain("openclaw:dreaming:backfill-entry");
expect(dreams).toContain(`source=${historyPath}`);
expect(dreams).toContain("January 1, 2025");
expect(dreams).toContain("What Happened");
expect(dreams).toContain("Possible Lasting Updates");
@@ -1074,6 +1075,26 @@ describe("memory cli", () => {
});
});
it("treats a missing historical path as a controlled empty-source error", async () => {
await withTempWorkspace(async (workspaceDir) => {
const close = vi.fn(async () => {});
mockManager({
status: () => makeMemoryStatus({ workspaceDir }),
close,
});
const errors = spyRuntimeErrors(defaultRuntime);
await runMemoryCli(["rem-backfill", "--path", path.join(workspaceDir, "missing-history")]);
expect(
errors.mock.calls.some((call) =>
String(call[0]).includes("found no YYYY-MM-DD.md files"),
),
).toBe(true);
expect(close).toHaveBeenCalled();
});
});
it("stages grounded durable candidates into the live short-term store", async () => {
await withTempWorkspace(async (workspaceDir) => {
const historyDir = path.join(workspaceDir, "history");
@@ -1301,6 +1322,44 @@ describe("memory cli", () => {
});
});
it("does not split hyphenated words into malformed grounded candidates", async () => {
await withTempWorkspace(async (workspaceDir) => {
const historyDir = path.join(workspaceDir, "history");
await fs.mkdir(historyDir, { recursive: true });
const historyPath = path.join(historyDir, "2025-02-20.md");
await fs.writeFile(
historyPath,
[
"## Preferences Learned",
"- Use long-term plans, avoid reactive task switching.",
"- A self-aware workflow note should stay intact.",
].join("\n") + "\n",
"utf-8",
);
const close = vi.fn(async () => {});
mockManager({
status: () => makeMemoryStatus({ workspaceDir }),
close,
});
const writeJson = spyRuntimeJson(defaultRuntime);
await runMemoryCli(["rem-harness", "--json", "--grounded", "--path", historyPath]);
const payload = firstWrittenJsonArg<{
grounded?: {
files?: Array<{
renderedMarkdown?: string;
}>;
} | null;
}>(writeJson);
const rendered = payload?.grounded?.files?.[0]?.renderedMarkdown ?? "";
expect(rendered).not.toContain("Use long- term plans");
expect(rendered).not.toContain("A self- aware workflow note");
expect(close).toHaveBeenCalled();
});
});
it("rolls back grounded rem backfill entries from DREAMS.md", async () => {
await withTempWorkspace(async (workspaceDir) => {
const dreamsPath = path.join(workspaceDir, "DREAMS.md");

View File

@@ -126,6 +126,10 @@ describe("backfill diary entries", () => {
expect(formatBackfillDiaryDate("2026-01-01", "UTC")).toBe("January 1, 2026");
});
it("preserves the iso day label in high-positive-offset timezones", () => {
expect(formatBackfillDiaryDate("2026-01-01", "Pacific/Kiritimati")).toBe("January 1, 2026");
});
it("builds a marked backfill diary entry", () => {
const entry = buildBackfillDiaryEntry({
isoDay: "2026-01-01",
@@ -201,6 +205,29 @@ describe("backfill diary entries", () => {
expect(content).toContain("Keep this real dream.");
expect(content).not.toContain("Remove this backfill.");
});
it("refuses to overwrite a symlinked DREAMS.md during backfill writes", async () => {
const workspaceDir = await createTempWorkspace("openclaw-dreaming-backfill-");
const targetPath = path.join(workspaceDir, "outside.txt");
const dreamsPath = path.join(workspaceDir, "DREAMS.md");
await fs.writeFile(targetPath, "outside\n", "utf-8");
await fs.symlink(targetPath, dreamsPath);
await expect(
writeBackfillDiaryEntries({
workspaceDir,
timezone: "UTC",
entries: [
{
isoDay: "2026-01-01",
sourcePath: "memory/2026-01-01.md",
bodyLines: ["What Happened", "1. First pass."],
},
],
}),
).rejects.toThrow("Refusing to write symlinked DREAMS.md");
await expect(fs.readFile(targetPath, "utf-8")).resolves.toBe("outside\n");
});
});
describe("appendNarrativeEntry", () => {

View File

@@ -240,17 +240,53 @@ function stripBackfillDiaryBlocks(existing: string): { updated: string; removed:
};
}
export function formatBackfillDiaryDate(isoDay: string, timezone?: string): string {
export function formatBackfillDiaryDate(isoDay: string, _timezone?: string): string {
const match = /^(\d{4})-(\d{2})-(\d{2})$/.exec(isoDay);
if (!match) {
return isoDay;
}
const [, year, month, day] = match;
const opts: Intl.DateTimeFormatOptions = {
timeZone: timezone ?? "UTC",
// Preserve the source iso day exactly; backfill labels should not drift by timezone.
timeZone: "UTC",
year: "numeric",
month: "long",
day: "numeric",
};
const epochMs = Date.parse(`${isoDay}T12:00:00Z`);
const epochMs = Date.UTC(Number(year), Number(month) - 1, Number(day), 12);
return new Intl.DateTimeFormat("en-US", opts).format(new Date(epochMs));
}
async function assertSafeDreamsPath(dreamsPath: string): Promise<void> {
const stat = await fs.lstat(dreamsPath).catch((err: NodeJS.ErrnoException) => {
if (err.code === "ENOENT") {
return null;
}
throw err;
});
if (!stat) {
return;
}
if (stat.isSymbolicLink()) {
throw new Error("Refusing to write symlinked DREAMS.md");
}
if (!stat.isFile()) {
throw new Error("Refusing to write non-file DREAMS.md");
}
}
async function writeDreamsFileAtomic(dreamsPath: string, content: string): Promise<void> {
await assertSafeDreamsPath(dreamsPath);
const tempPath = `${dreamsPath}.${process.pid}.${Date.now()}.tmp`;
await fs.writeFile(tempPath, content, { encoding: "utf-8", flag: "wx" });
try {
await fs.rename(tempPath, dreamsPath);
} catch (err) {
await fs.rm(tempPath, { force: true }).catch(() => {});
throw err;
}
}
export function buildBackfillDiaryEntry(params: {
isoDay: string;
bodyLines: string[];
@@ -295,7 +331,7 @@ export async function writeBackfillDiaryEntries(params: {
),
];
const updated = replaceDiaryContent(stripped.updated, joinDiaryBlocks(nextBlocks));
await fs.writeFile(dreamsPath, updated, "utf-8");
await writeDreamsFileAtomic(dreamsPath, updated);
return {
dreamsPath,
written: params.entries.length,
@@ -311,7 +347,7 @@ export async function removeBackfillDiaryEntries(params: {
const stripped = stripBackfillDiaryBlocks(existing);
if (stripped.removed > 0 || existing.length > 0) {
await fs.mkdir(path.dirname(dreamsPath), { recursive: true });
await fs.writeFile(dreamsPath, stripped.updated, "utf-8");
await writeDreamsFileAtomic(dreamsPath, stripped.updated);
}
return {
dreamsPath,

View File

@@ -56,6 +56,9 @@ const REM_TIME_PREFIX_RE = /^\d{1,2}:\d{2}\s*-\s*/;
const REM_CODE_FENCE_RE = /^\s*```/;
const REM_TABLE_RE = /^\s*\|.*\|\s*$/;
const REM_TABLE_DIVIDER_RE = /^\s*\|?[\s:-]+\|[\s|:-]*$/;
const MAX_GROUNDED_REM_FILES = 512;
const MAX_GROUNDED_REM_FILE_BYTES = 1_000_000;
const GROUNDED_REM_SKIPPED_DIRS = new Set([".git", "node_modules"]);
const REM_SUMMARY_FACT_LIMIT = 4;
const REM_SUMMARY_REFLECTION_LIMIT = 4;
const REM_SUMMARY_MEMORY_LIMIT = 3;
@@ -605,7 +608,7 @@ function splitTopLevelClauses(text: string, delimiter: string): string[] {
}
function splitSubjectLeadClaim(text: string): string[] {
const match = /^(?<subject>.+?(?:||-))\s*(?<rest>.+)$/u.exec(text);
const match = /^(?<subject>.+?(?:||\s-\s))\s*(?<rest>.+)$/u.exec(text);
if (!match?.groups) {
return [text];
}
@@ -1018,16 +1021,29 @@ function previewGroundedRemForFile(params: {
async function collectMarkdownFiles(inputPaths: string[]): Promise<string[]> {
const found = new Set<string>();
async function walk(targetPath: string): Promise<void> {
if (found.size >= MAX_GROUNDED_REM_FILES) {
return;
}
const resolved = path.resolve(targetPath);
const stat = await fs.stat(resolved);
const stat = await fs.lstat(resolved);
if (stat.isSymbolicLink()) {
return;
}
if (stat.isDirectory()) {
const entries = await fs.readdir(resolved, { withFileTypes: true });
for (const entry of entries) {
if (entry.isDirectory() && GROUNDED_REM_SKIPPED_DIRS.has(entry.name)) {
continue;
}
await walk(path.join(resolved, entry.name));
}
return;
}
if (stat.isFile() && resolved.toLowerCase().endsWith(".md")) {
if (
stat.isFile() &&
stat.size <= MAX_GROUNDED_REM_FILE_BYTES &&
resolved.toLowerCase().endsWith(".md")
) {
found.add(resolved);
}
}

View File

@@ -810,7 +810,15 @@ describe("doctor.memory.dreamDiary", () => {
workspaceDir,
inputPaths: [path.join(workspaceDir, "memory", "2026-02-19.md")],
});
expect(writeBackfillDiaryEntries).toHaveBeenCalled();
expect(writeBackfillDiaryEntries).toHaveBeenCalledWith(
expect.objectContaining({
entries: [
expect.objectContaining({
bodyLines: expect.arrayContaining(["What Happened", "1. Bunji — partner"]),
}),
],
}),
);
expect(respond).toHaveBeenCalledWith(
true,
expect.objectContaining({
@@ -827,6 +835,31 @@ describe("doctor.memory.dreamDiary", () => {
}
});
it("no-ops backfill when the workspace has no daily memory files", async () => {
const workspaceDir = await fs.mkdtemp(path.join(os.tmpdir(), "doctor-dream-diary-empty-"));
resolveAgentWorkspaceDir.mockReturnValue(workspaceDir);
const respond = vi.fn();
try {
await invokeDoctorMemoryBackfillDreamDiary(respond);
expect(previewGroundedRemMarkdown).not.toHaveBeenCalled();
expect(writeBackfillDiaryEntries).not.toHaveBeenCalled();
expect(respond).toHaveBeenCalledWith(
true,
expect.objectContaining({
agentId: "main",
action: "backfill",
scannedFiles: 0,
written: 0,
replaced: 0,
}),
undefined,
);
} finally {
await fs.rm(workspaceDir, { recursive: true, force: true });
}
});
it("resets only backfilled dream diary entries", async () => {
const workspaceDir = await fs.mkdtemp(path.join(os.tmpdir(), "doctor-dream-diary-reset-"));
await fs.writeFile(path.join(workspaceDir, "DREAMS.md"), "# Dream Diary\n", "utf-8");

View File

@@ -141,7 +141,7 @@ function extractIsoDayFromPath(filePath: string): string | null {
function groundedMarkdownToDiaryLines(markdown: string): string[] {
return markdown
.split("\n")
.map((line) => line.trimEnd())
.map((line) => line.replace(/^##\s+/, "").trimEnd())
.filter((line, index, lines) => line.length > 0 || (index > 0 && lines[index - 1]?.length > 0));
}
@@ -241,6 +241,18 @@ function normalizeMemoryPath(rawPath: string): string {
return rawPath.replaceAll("\\", "/").replace(/^\.\//, "");
}
function normalizeMemoryPathForWorkspace(workspaceDir: string, rawPath: string): string {
const normalized = normalizeMemoryPath(rawPath);
const workspaceNormalized = normalizeMemoryPath(workspaceDir);
if (
path.isAbsolute(rawPath) &&
normalized.startsWith(`${workspaceNormalized}/`)
) {
return normalized.slice(workspaceNormalized.length + 1);
}
return normalized;
}
function isShortTermMemoryPath(filePath: string): boolean {
const normalized = normalizeMemoryPath(filePath);
if (/(?:^|\/)memory\/(\d{4})-(\d{2})-(\d{2})\.md$/.test(normalized)) {
@@ -403,14 +415,15 @@ async function loadDreamingStoreStats(
const dailyCount = toNonNegativeInt(entry.dailyCount);
const groundedCount = toNonNegativeInt(entry.groundedCount);
const totalEntrySignalCount = recallCount + dailyCount + groundedCount;
const normalizedEntryPath = normalizeMemoryPathForWorkspace(workspaceDir, entryPath);
const snippet =
normalizeTrimmedString(entry.snippet) ??
normalizeTrimmedString(entry.summary) ??
normalizeMemoryPath(entryPath);
normalizedEntryPath;
const lastRecalledAt = normalizeTrimmedString(entry.lastRecalledAt);
const detail: DoctorMemoryDreamingEntryPayload = {
key: entryKey,
path: normalizeMemoryPath(entryPath),
path: normalizedEntryPath,
startLine: range.startLine,
endLine: Math.max(range.startLine, range.endLine),
snippet,
@@ -865,6 +878,20 @@ export const doctorHandlers: GatewayRequestHandlers = {
const workspaceDir = resolveAgentWorkspaceDir(cfg, agentId);
const memoryDir = path.join(workspaceDir, "memory");
const sourceFiles = await listWorkspaceDailyFiles(memoryDir);
if (sourceFiles.length === 0) {
const dreamDiary = await readDreamDiary(workspaceDir);
const payload: DoctorMemoryDreamDiaryActionPayload = {
agentId,
path: dreamDiary.path,
action: "backfill",
found: dreamDiary.found,
scannedFiles: 0,
written: 0,
replaced: 0,
};
respond(true, payload, undefined);
return;
}
const grounded = await previewGroundedRemMarkdown({
workspaceDir,
inputPaths: sourceFiles,