From 0c4e0d703023c93bed101fcf62a92dbfd3537bcc Mon Sep 17 00:00:00 2001 From: Gustavo Madeira Santana Date: Tue, 14 Apr 2026 20:29:12 -0400 Subject: [PATCH] memory: block dreaming self-ingestion (#66852) Merged via squash. Prepared head SHA: 4742656a0d03c90902383213ac0608bcc51c0fbd Co-authored-by: gumadeiras <5599352+gumadeiras@users.noreply.github.com> Co-authored-by: gumadeiras <5599352+gumadeiras@users.noreply.github.com> Reviewed-by: @gumadeiras --- CHANGELOG.md | 1 + docs/concepts/dreaming.md | 3 + .../memory-core/src/dreaming-phases.test.ts | 92 +++++++- extensions/memory-core/src/dreaming-phases.ts | 5 +- .../src/short-term-promotion.test.ts | 216 ++++++++++++++++++ .../memory-core/src/short-term-promotion.ts | 75 +++++- .../host/session-files.test.ts | 54 +++++ src/memory-host-sdk/host/session-files.ts | 40 +++- 8 files changed, 477 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 99a93ee0b0d..c89b72d7eba 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -35,6 +35,7 @@ Docs: https://docs.openclaw.ai - Agents/context engines: preserve prompt-only token counts, not full request totals, when deferred maintenance reuses after-turn runtime context so background compaction bookkeeping matches the active prompt window. (#66820) thanks @jalehman. - BlueBubbles/inbound: add a persistent file-backed GUID dedupe so MessagePoller webhook replays after BB Server restart or reconnect no longer cause the agent to re-reply to already-handled messages. (#19176, #12053, #66816) Thanks @omarshahine. - Secrets/plugins/status: align SecretRef inspect-vs-strict handling across plugin preload, read-only status/agents surfaces, and runtime auth paths so unresolved refs no longer crash read-only CLI flows while runtime-required non-env refs stay strict. (#66818) Thanks @joshavant. +- Memory/dreaming: stop ordinary transcripts that merely quote the dream-diary prompt from being classified as internal dreaming runs and silently dropped from session recall ingestion. (#66852) Thanks @gumadeiras. ## 2026.4.14 diff --git a/docs/concepts/dreaming.md b/docs/concepts/dreaming.md index 0a3795f4bb1..49f548e6af0 100644 --- a/docs/concepts/dreaming.md +++ b/docs/concepts/dreaming.md @@ -80,6 +80,9 @@ After each phase has enough material, `memory-core` runs a best-effort backgroun subagent turn (using the default runtime model) and appends a short diary entry. This diary is for human reading in the Dreams UI, not a promotion source. +Dreaming-generated diary/report artifacts are excluded from short-term +promotion. Only grounded memory snippets are eligible to promote into +`MEMORY.md`. There is also a grounded historical backfill lane for review and recovery work: diff --git a/extensions/memory-core/src/dreaming-phases.test.ts b/extensions/memory-core/src/dreaming-phases.test.ts index 827a3f0402d..4260686cb79 100644 --- a/extensions/memory-core/src/dreaming-phases.test.ts +++ b/extensions/memory-core/src/dreaming-phases.test.ts @@ -713,13 +713,101 @@ describe("memory-core dreaming phases", () => { expect(Object.keys(sessionIngestion.files)).toHaveLength(1); expect(Object.values(sessionIngestion.files)).toEqual([ expect.objectContaining({ - lineCount: 2, - lastContentLine: 2, + lineCount: 0, + lastContentLine: 0, contentHash: expect.any(String), }), ]); }); + it("does not reread unchanged dreaming-generated transcripts after checkpointing skip state", async () => { + const workspaceDir = await createDreamingWorkspace(); + vi.stubEnv("OPENCLAW_TEST_FAST", "1"); + vi.stubEnv("OPENCLAW_STATE_DIR", path.join(workspaceDir, ".state")); + const sessionsDir = resolveSessionTranscriptsDirForAgent("main"); + await fs.mkdir(sessionsDir, { recursive: true }); + const transcriptPath = path.join(sessionsDir, "dreaming-narrative.jsonl"); + await fs.writeFile( + transcriptPath, + [ + JSON.stringify({ + type: "custom", + customType: "openclaw:bootstrap-context:full", + data: { + runId: "dreaming-narrative-light-1775894400455", + sessionId: "dream-session-1", + }, + }), + JSON.stringify({ + type: "message", + message: { + role: "user", + timestamp: "2026-04-05T18:01:00.000Z", + content: [ + { type: "text", text: "Write a dream diary entry from these memory fragments." }, + ], + }, + }), + ].join("\n") + "\n", + "utf-8", + ); + const mtime = new Date("2026-04-05T18:05:00.000Z"); + await fs.utimes(transcriptPath, mtime, mtime); + + const { beforeAgentReply } = createHarness( + { + agents: { + defaults: { + workspace: workspaceDir, + }, + list: [{ id: "main", workspace: workspaceDir }], + }, + plugins: { + entries: { + "memory-core": { + config: { + dreaming: { + enabled: true, + phases: { + light: { + enabled: true, + limit: 20, + lookbackDays: 7, + }, + }, + }, + }, + }, + }, + }, + }, + workspaceDir, + ); + + try { + await beforeAgentReply( + { cleanedBody: "__openclaw_memory_core_light_sleep__" }, + { trigger: "heartbeat", workspaceDir }, + ); + + const readFileSpy = vi.spyOn(fs, "readFile"); + await beforeAgentReply( + { cleanedBody: "__openclaw_memory_core_light_sleep__" }, + { trigger: "heartbeat", workspaceDir }, + ); + + expect( + readFileSpy.mock.calls.some( + ([target]) => typeof target === "string" && target === transcriptPath, + ), + ).toBe(false); + readFileSpy.mockRestore(); + } finally { + vi.restoreAllMocks(); + vi.unstubAllEnvs(); + } + }); + it("dedupes reset/deleted session archives instead of double-ingesting", async () => { const workspaceDir = await createDreamingWorkspace(); vi.stubEnv("OPENCLAW_TEST_FAST", "1"); diff --git a/extensions/memory-core/src/dreaming-phases.ts b/extensions/memory-core/src/dreaming-phases.ts index f20f262df54..29f0de9c1fc 100644 --- a/extensions/memory-core/src/dreaming-phases.ts +++ b/extensions/memory-core/src/dreaming-phases.ts @@ -739,10 +739,7 @@ async function collectSessionIngestionBatches(params: { mtimeMs: Math.floor(Math.max(0, stat.mtimeMs)), size: Math.floor(Math.max(0, stat.size)), }; - const cursorAtEnd = - previous !== undefined && - previous.lineCount > 0 && - previous.lastContentLine >= previous.lineCount; + const cursorAtEnd = previous !== undefined && previous.lastContentLine >= previous.lineCount; const unchanged = Boolean(previous) && previous.mtimeMs === fingerprint.mtimeMs && diff --git a/extensions/memory-core/src/short-term-promotion.test.ts b/extensions/memory-core/src/short-term-promotion.test.ts index 85e384277be..3e0821f0968 100644 --- a/extensions/memory-core/src/short-term-promotion.test.ts +++ b/extensions/memory-core/src/short-term-promotion.test.ts @@ -203,6 +203,95 @@ describe("short-term promotion", () => { }); }); + it("ignores contaminated dreaming snippets when recording short-term recalls", async () => { + await withTempWorkspace(async (workspaceDir) => { + await recordShortTermRecalls({ + workspaceDir, + query: "action preference", + results: [ + { + path: "memory/2026-04-03.md", + source: "memory", + startLine: 1, + endLine: 1, + score: 0.92, + snippet: + "Candidate: Default to action. confidence: 0.76 evidence: memory/.dreams/session-corpus/2026-04-08.txt:1-1 recalls: 3 status: staged", + }, + ], + }); + + expect( + JSON.parse(await fs.readFile(resolveShortTermRecallStorePath(workspaceDir), "utf-8")), + ).toMatchObject({ + version: 1, + entries: {}, + }); + }); + }); + + it("ignores bullet-prefixed dreaming snippets when recording short-term recalls", async () => { + await withTempWorkspace(async (workspaceDir) => { + await recordShortTermRecalls({ + workspaceDir, + query: "action preference", + results: [ + { + path: "memory/2026-04-03.md", + source: "memory", + startLine: 1, + endLine: 5, + score: 0.92, + snippet: [ + "- Candidate: Default to action.", + " - confidence: 0.76", + " - evidence: memory/.dreams/session-corpus/2026-04-08.txt:1-1", + " - recalls: 3", + " - status: staged", + ].join("\n"), + }, + ], + }); + + expect( + JSON.parse(await fs.readFile(resolveShortTermRecallStorePath(workspaceDir), "utf-8")), + ).toMatchObject({ + version: 1, + entries: {}, + }); + }); + }); + + it("keeps ordinary snippets that only quote dreaming prompt markers", async () => { + await withTempWorkspace(async (workspaceDir) => { + await recordShortTermRecalls({ + workspaceDir, + query: "debug note", + results: [ + { + path: "memory/2026-04-03.md", + source: "memory", + startLine: 1, + endLine: 1, + score: 0.75, + snippet: + "Debug note: quote Write a dream diary entry from these memory fragments for docs, but do not use dreaming-narrative-like labels in production.", + }, + ], + }); + + const store = JSON.parse( + await fs.readFile(resolveShortTermRecallStorePath(workspaceDir), "utf-8"), + ) as { entries: Record }; + expect(Object.values(store.entries)).toEqual([ + expect.objectContaining({ + snippet: + "Debug note: quote Write a dream diary entry from these memory fragments for docs, but do not use dreaming-narrative-like labels in production.", + }), + ]); + }); + }); + it("records recalls and ranks candidates with weighted scores", async () => { await withTempWorkspace(async (workspaceDir) => { await recordShortTermRecalls({ @@ -940,6 +1029,86 @@ describe("short-term promotion", () => { }); }); + it("does not rank contaminated dreaming snippets from an existing short-term store", async () => { + await withTempWorkspace(async (workspaceDir) => { + const storePath = resolveShortTermRecallStorePath(workspaceDir); + await fs.writeFile( + storePath, + JSON.stringify( + { + version: 1, + updatedAt: "2026-04-04T00:00:00.000Z", + entries: { + contaminated: { + key: "contaminated", + path: "memory/2026-04-03.md", + startLine: 1, + endLine: 1, + source: "memory", + snippet: + "Reflections: Theme: assistant. confidence: 1.00 evidence: memory/.dreams/session-corpus/2026-04-08.txt:2-2 recalls: 4 status: staged", + recallCount: 4, + dailyCount: 0, + groundedCount: 0, + totalScore: 3.6, + maxScore: 0.95, + firstRecalledAt: "2026-04-03T00:00:00.000Z", + lastRecalledAt: "2026-04-04T00:00:00.000Z", + queryHashes: ["a", "b"], + recallDays: ["2026-04-03", "2026-04-04"], + conceptTags: ["assistant"], + }, + }, + }, + null, + 2, + ), + "utf-8", + ); + + const ranked = await rankShortTermPromotionCandidates({ + workspaceDir, + minScore: 0, + minRecallCount: 0, + minUniqueQueries: 0, + }); + + expect(ranked).toEqual([]); + }); + }); + + it("treats diff-prefixed dreaming snippets as contaminated", () => { + expect( + __testing.isContaminatedDreamingSnippet( + "@@ -1,1 - Candidate: Default to action. confidence: 0.76 evidence: memory/.dreams/session-corpus/2026-04-08.txt:1-1 recalls: 3 status: staged", + ), + ).toBe(true); + }); + + it("treats bracket-prefixed dreaming snippets as contaminated", () => { + expect( + __testing.isContaminatedDreamingSnippet( + "([ Candidate: Default to action. confidence: 0.76 evidence: memory/.dreams/session-corpus/2026-04-08.txt:1-1 recalls: 3 status: staged", + ), + ).toBe(true); + }); + + it("does not treat ordinary candidate notes with daily-memory evidence as contaminated", () => { + expect( + __testing.isContaminatedDreamingSnippet( + "Candidate: move backups weekly. confidence: 0.76 evidence: memory/2026-04-08.md:1-1", + ), + ).toBe(false); + }); + + it("treats transcript-style dreaming prompt echoes as contaminated", () => { + expect( + __testing.isContaminatedDreamingSnippet( + "[main/dreaming-narrative-light.jsonl#L1] User: Write a dream diary entry from these memory fragments:", + ), + ).toBe(true); + }); + it("skips direct candidates that exceed maxAgeDays during apply", async () => { await withTempWorkspace(async (workspaceDir) => { const applied = await applyShortTermPromotions({ @@ -987,6 +1156,53 @@ describe("short-term promotion", () => { }); }); + it("does not append contaminated dreaming snippets during direct apply", async () => { + await withTempWorkspace(async (workspaceDir) => { + const applied = await applyShortTermPromotions({ + workspaceDir, + minScore: 0, + minRecallCount: 0, + minUniqueQueries: 0, + candidates: [ + { + key: "memory:memory/2026-04-03.md:1:1", + path: "memory/2026-04-03.md", + startLine: 1, + endLine: 1, + source: "memory", + snippet: + "Candidate: Default to action. confidence: 0.76 evidence: memory/.dreams/session-corpus/2026-04-08.txt:1-1 recalls: 3 status: staged", + recallCount: 4, + avgScore: 0.97, + maxScore: 0.97, + uniqueQueries: 2, + firstRecalledAt: "2026-04-03T00:00:00.000Z", + lastRecalledAt: "2026-04-04T00:00:00.000Z", + ageDays: 0, + score: 0.99, + recallDays: ["2026-04-03", "2026-04-04"], + conceptTags: ["assistant"], + components: { + frequency: 1, + relevance: 1, + diversity: 1, + recency: 1, + consolidation: 1, + conceptual: 1, + }, + }, + ], + }); + + expect(applied.applied).toBe(0); + await expect( + fs.readFile(path.join(workspaceDir, "MEMORY.md"), "utf-8"), + ).rejects.toMatchObject({ + code: "ENOENT", + }); + }); + }); + it("applies promotion candidates to MEMORY.md and marks them promoted", async () => { await withTempWorkspace(async (workspaceDir) => { await writeDailyMemoryNote(workspaceDir, "2026-04-01", [ diff --git a/extensions/memory-core/src/short-term-promotion.ts b/extensions/memory-core/src/short-term-promotion.ts index e73a9b5816a..b20f21bab3d 100644 --- a/extensions/memory-core/src/short-term-promotion.ts +++ b/extensions/memory-core/src/short-term-promotion.ts @@ -37,6 +37,9 @@ const SHORT_TERM_LOCK_RETRY_DELAY_MS = 40; const PHASE_SIGNAL_LIGHT_BOOST_MAX = 0.06; const PHASE_SIGNAL_REM_BOOST_MAX = 0.09; const PHASE_SIGNAL_HALF_LIFE_DAYS = 14; +const DREAMING_TRANSCRIPT_PROMPT_LINE_RE = + /\[[^\]]*dreaming-narrative[^\]]*]\s*(?:User|Assistant):\s*Write a dream diary entry from these memory fragments:?/i; +const DREAMING_DIFF_PREFIX_RE = /@@\s*-\d+(?:,\d+)?\s+[-*+]\s+/iy; const inProcessShortTermLocks = new Map>(); const ensuredShortTermDirs = new Map>(); @@ -235,6 +238,62 @@ function normalizeSnippet(raw: string): string { return trimmed.replace(/\s+/g, " "); } +function consumeDreamingLeadPrefix(snippet: string): string { + let index = 0; + while (index < snippet.length) { + DREAMING_DIFF_PREFIX_RE.lastIndex = index; + const diffMatch = DREAMING_DIFF_PREFIX_RE.exec(snippet); + if (diffMatch) { + index = DREAMING_DIFF_PREFIX_RE.lastIndex; + continue; + } + const char = snippet[index]; + if (char === "[" || char === "(") { + index += 1; + while (snippet[index] === " ") { + index += 1; + } + continue; + } + if ( + (char === "-" || char === "*" || char === "+" || char === ">") && + snippet[index + 1] === " " + ) { + index += 2; + continue; + } + break; + } + return snippet.slice(index); +} + +function hasDreamingNarrativeLead(snippet: string): boolean { + const withoutPrefix = consumeDreamingLeadPrefix(snippet); + return /^Candidate:/i.test(withoutPrefix) || /^Reflections?:/i.test(withoutPrefix); +} + +function isContaminatedDreamingSnippet(raw: string): boolean { + const snippet = normalizeSnippet(raw); + if (!snippet) { + return false; + } + if ( + /