From a1b01f0281d00d6b716d92fc3b7389b765750941 Mon Sep 17 00:00:00 2001 From: Josh Lehman Date: Wed, 15 Apr 2026 13:09:07 -0700 Subject: [PATCH] fix(memory-core): skip dreaming transcript ingestion via session store (#67315) Merged via squash. Prepared head SHA: 87c09b2a7544ba9349d437cca767d6a6be30b35a Co-authored-by: jalehman <550978+jalehman@users.noreply.github.com> Co-authored-by: jalehman <550978+jalehman@users.noreply.github.com> Reviewed-by: @jalehman --- CHANGELOG.md | 10 ++ .../memory-core/src/dreaming-phases.test.ts | 113 ++++++++++++++++++ extensions/memory-core/src/dreaming-phases.ts | 20 +++- src/memory-host-sdk/engine-qmd.ts | 3 + .../host/session-files.test.ts | 44 +++++++ src/memory-host-sdk/host/session-files.ts | 87 +++++++++++++- 6 files changed, 273 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 310436981b9..f23a7f5858f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,16 @@ Docs: https://docs.openclaw.ai - Gateway/tools: anchor trusted local `MEDIA:` tool-result passthrough on the exact raw name of this run's registered built-in tools, and reject client tool definitions whose names normalize-collide with a built-in or with another client tool in the same request (`400 invalid_request_error` on both JSON and SSE paths), so a client-supplied tool named like a built-in can no longer inherit its local-media trust. (#67303) - Agents/replay recovery: classify the provider wording `401 input item ID does not belong to this connection` as replay-invalid, so users get the existing `/new` session reset guidance instead of a raw 401-style failure. (#66475) Thanks @dallylee. +- fix(gateway): enforce localRoots containment on webchat audio embedding path [AI-assisted]. (#67298) Thanks @pgondhi987. +- fix(matrix): block DM pairing-store entries from authorizing room control commands [AI-assisted]. (#67294) Thanks @pgondhi987. +- Docker/build: verify `@matrix-org/matrix-sdk-crypto-nodejs` native bindings with `find` under `node_modules` instead of a hardcoded `.pnpm/...` path so pnpm v10+ virtual-store layouts no longer fail the image build. (#67143) thanks @ly85206559. +- Matrix/E2EE: keep startup bootstrap conservative for passwordless token-auth bots, still attempt the guarded repair pass without requiring `channels.matrix.password`, and document the remaining password-UIA limitation. (#66228) Thanks @SARAMALI15792. +- Cron/announce delivery: suppress mixed-content isolated cron announce replies that end with `NO_REPLY` so trailing silent sentinels no longer leak summary text to the target channel. (#65004) thanks @neo1027144-creator. +- Plugins/bundled channels: partition bundled channel lazy caches by active bundled root so `OPENCLAW_BUNDLED_PLUGINS_DIR` flips stop reusing stale plugin, setup, secrets, and runtime state. (#67200) Thanks @gumadeiras. +- Packaging/plugins: prune common test/spec cargo from bundled plugin runtime dependencies and fail npm release validation if packaged test cargo reappears, keeping published tarballs leaner without plugin-specific special cases. (#67275) thanks @gumadeiras. +- Agents/context + Memory: trim default startup/skills prompt budgets, cap `memory_get` excerpts by default with explicit continuation metadata, and keep QMD reads aligned with the same bounded excerpt contract so long sessions pull less context by default without losing deterministic follow-up reads. +- Matrix/commands: skip DM pairing-store reads on room traffic now that room control-command authorization ignores pairing-store entries, keeping the room path narrower without changing room auth behavior. (#67325) Thanks @gumadeiras. +- Memory-core/dreaming: skip dreaming narrative transcripts from session-store metadata before bootstrap records land so dream diary prompt/prose lines do not pollute session ingestion. (#67315) thanks @jalehman. ## 2026.4.15-beta.1 diff --git a/extensions/memory-core/src/dreaming-phases.test.ts b/extensions/memory-core/src/dreaming-phases.test.ts index 64f14f0777e..ce5f97ce1cc 100644 --- a/extensions/memory-core/src/dreaming-phases.test.ts +++ b/extensions/memory-core/src/dreaming-phases.test.ts @@ -720,6 +720,119 @@ describe("memory-core dreaming phases", () => { ]); }); + it("skips dreaming transcripts when the session store identifies them before bootstrap lands", async () => { + const workspaceDir = await createDreamingWorkspace(); + vi.stubEnv("OPENCLAW_TEST_FAST", "1"); + vi.stubEnv("OPENCLAW_STATE_DIR", path.join(workspaceDir, ".state")); + const sessionsDir = resolveSessionTranscriptsDirForAgent("main"); + await fs.mkdir(sessionsDir, { recursive: true }); + const transcriptPath = path.join(sessionsDir, "dreaming-narrative.jsonl"); + await fs.writeFile( + transcriptPath, + [ + JSON.stringify({ + type: "message", + message: { + role: "user", + timestamp: "2026-04-05T18:01:00.000Z", + content: [ + { type: "text", text: "Write a dream diary entry from these memory fragments." }, + ], + }, + }), + JSON.stringify({ + type: "message", + message: { + role: "assistant", + timestamp: "2026-04-05T18:02:00.000Z", + content: [{ type: "text", text: "I drift through the same archive again." }], + }, + }), + ].join("\n") + "\n", + "utf-8", + ); + await fs.writeFile( + path.join(sessionsDir, "sessions.json"), + JSON.stringify({ + "agent:main:dreaming-narrative-light-1775894400455": { + sessionId: "dreaming-narrative", + sessionFile: transcriptPath, + updatedAt: Date.parse("2026-04-05T18:05:00.000Z"), + }, + }), + "utf-8", + ); + const mtime = new Date("2026-04-05T18:05:00.000Z"); + await fs.utimes(transcriptPath, mtime, mtime); + + const { beforeAgentReply } = createHarness( + { + agents: { + defaults: { + workspace: workspaceDir, + }, + list: [{ id: "main", workspace: workspaceDir }], + }, + plugins: { + entries: { + "memory-core": { + config: { + dreaming: { + enabled: true, + phases: { + light: { + enabled: true, + limit: 20, + lookbackDays: 7, + }, + }, + }, + }, + }, + }, + }, + }, + workspaceDir, + ); + + try { + await beforeAgentReply( + { cleanedBody: "__openclaw_memory_core_light_sleep__" }, + { trigger: "heartbeat", workspaceDir }, + ); + } finally { + vi.unstubAllEnvs(); + } + + await expect( + fs.access(path.join(workspaceDir, "memory", ".dreams", "session-corpus", "2026-04-05.txt")), + ).rejects.toMatchObject({ code: "ENOENT" }); + + const sessionIngestion = JSON.parse( + await fs.readFile( + path.join(workspaceDir, "memory", ".dreams", "session-ingestion.json"), + "utf-8", + ), + ) as { + files: Record< + string, + { + lineCount: number; + lastContentLine: number; + contentHash: string; + } + >; + }; + expect(Object.keys(sessionIngestion.files)).toHaveLength(1); + expect(Object.values(sessionIngestion.files)).toEqual([ + expect.objectContaining({ + lineCount: 0, + lastContentLine: 0, + contentHash: expect.any(String), + }), + ]); + }); + it("does not reread unchanged dreaming-generated transcripts after checkpointing skip state", async () => { const workspaceDir = await createDreamingWorkspace(); vi.stubEnv("OPENCLAW_TEST_FAST", "1"); diff --git a/extensions/memory-core/src/dreaming-phases.ts b/extensions/memory-core/src/dreaming-phases.ts index d8f7f7295fd..993129c7a70 100644 --- a/extensions/memory-core/src/dreaming-phases.ts +++ b/extensions/memory-core/src/dreaming-phases.ts @@ -6,6 +6,8 @@ import type { OpenClawConfig, OpenClawPluginApi } from "openclaw/plugin-sdk/memo import { buildSessionEntry, listSessionFilesForAgent, + loadDreamingNarrativeTranscriptPathSetForAgent, + normalizeSessionTranscriptPathForComparison, parseUsageCountedSessionIdFromFileName, sessionPathForFile, } from "openclaw/plugin-sdk/memory-core-host-engine-qmd"; @@ -688,13 +690,25 @@ async function collectSessionIngestionBatches(params: { const nextSeenMessages: Record = { ...params.state.seenMessages }; let changed = false; - const sessionFiles: Array<{ agentId: string; absolutePath: string; sessionPath: string }> = []; + const sessionFiles: Array<{ + agentId: string; + absolutePath: string; + generatedByDreamingNarrative: boolean; + sessionPath: string; + }> = []; for (const agentId of agentIds) { const files = await listSessionFilesForAgent(agentId); + const dreamingTranscriptPaths = + files.length > 0 + ? loadDreamingNarrativeTranscriptPathSetForAgent(agentId) + : new Set(); for (const absolutePath of files) { sessionFiles.push({ agentId, absolutePath, + generatedByDreamingNarrative: dreamingTranscriptPaths.has( + normalizeSessionTranscriptPathForComparison(absolutePath), + ), sessionPath: sessionPathForFile(absolutePath), }); } @@ -751,7 +765,9 @@ async function collectSessionIngestionBatches(params: { continue; } - const entry = await buildSessionEntry(file.absolutePath); + const entry = await buildSessionEntry(file.absolutePath, { + generatedByDreamingNarrative: file.generatedByDreamingNarrative, + }); if (!entry) { continue; } diff --git a/src/memory-host-sdk/engine-qmd.ts b/src/memory-host-sdk/engine-qmd.ts index 1c4ac361c91..8ef479b3e4d 100644 --- a/src/memory-host-sdk/engine-qmd.ts +++ b/src/memory-host-sdk/engine-qmd.ts @@ -4,7 +4,10 @@ export { extractKeywords, isQueryStopWordToken } from "./host/query-expansion.js export { buildSessionEntry, listSessionFilesForAgent, + loadDreamingNarrativeTranscriptPathSetForAgent, + normalizeSessionTranscriptPathForComparison, sessionPathForFile, + type BuildSessionEntryOptions, type SessionFileEntry, } from "./host/session-files.js"; export { parseUsageCountedSessionIdFromFileName } from "../config/sessions/artifacts.js"; diff --git a/src/memory-host-sdk/host/session-files.test.ts b/src/memory-host-sdk/host/session-files.test.ts index c585698b96c..fd5d9a22efb 100644 --- a/src/memory-host-sdk/host/session-files.test.ts +++ b/src/memory-host-sdk/host/session-files.test.ts @@ -175,6 +175,50 @@ describe("buildSessionEntry", () => { expect(entry?.generatedByDreamingNarrative).toBe(true); }); + it("flags dreaming narrative transcripts from the sibling session store before bootstrap lands", async () => { + const sessionsDir = path.join(tmpDir, "agents", "main", "sessions"); + await fs.mkdir(sessionsDir, { recursive: true }); + const filePath = path.join(sessionsDir, "dreaming-session.jsonl"); + await fs.writeFile( + filePath, + [ + JSON.stringify({ + type: "message", + message: { + role: "user", + content: + "Write a dream diary entry from these memory fragments:\n- Candidate: durable note", + }, + }), + JSON.stringify({ + type: "message", + message: { + role: "assistant", + content: "A drifting archive breathed in moonlight.", + }, + }), + ].join("\n"), + ); + await fs.writeFile( + path.join(sessionsDir, "sessions.json"), + JSON.stringify({ + "agent:main:dreaming-narrative-light-1775894400455": { + sessionId: "dreaming-session", + sessionFile: filePath, + updatedAt: Date.now(), + }, + }), + "utf-8", + ); + + const entry = await buildSessionEntry(filePath); + + expect(entry).not.toBeNull(); + expect(entry?.generatedByDreamingNarrative).toBe(true); + expect(entry?.content).toBe(""); + expect(entry?.lineMap).toEqual([]); + }); + it("does not flag ordinary transcripts that quote the dream-diary prompt", async () => { const jsonlLines = [ JSON.stringify({ diff --git a/src/memory-host-sdk/host/session-files.ts b/src/memory-host-sdk/host/session-files.ts index 221f21f8ca5..5865cf9bbb3 100644 --- a/src/memory-host-sdk/host/session-files.ts +++ b/src/memory-host-sdk/host/session-files.ts @@ -2,6 +2,7 @@ import fs from "node:fs/promises"; import path from "node:path"; import { isUsageCountedSessionTranscriptFileName } from "../../config/sessions/artifacts.js"; import { resolveSessionTranscriptsDirForAgent } from "../../config/sessions/paths.js"; +import { loadSessionStore } from "../../config/sessions/store-load.js"; import { redactSensitiveText } from "../../logging/redact.js"; import { createSubsystemLogger } from "../../logging/subsystem.js"; import { hashText } from "./internal.js"; @@ -24,6 +25,11 @@ export type SessionFileEntry = { generatedByDreamingNarrative?: boolean; }; +export type BuildSessionEntryOptions = { + /** Optional preclassification from a caller-managed dreaming transcript lookup. */ + generatedByDreamingNarrative?: boolean; +}; + function isDreamingNarrativeBootstrapRecord(record: unknown): boolean { if (!record || typeof record !== "object" || Array.isArray(record)) { return false; @@ -78,6 +84,79 @@ function isDreamingNarrativeGeneratedRecord(record: unknown): boolean { return hasDreamingNarrativeRunId(nested.runId) || hasDreamingNarrativeRunId(nested.sessionKey); } +function isDreamingNarrativeSessionStoreKey(sessionKey: string): boolean { + const trimmed = sessionKey.trim(); + if (!trimmed) { + return false; + } + const firstSeparator = trimmed.indexOf(":"); + if (firstSeparator < 0) { + return trimmed.startsWith(DREAMING_NARRATIVE_RUN_PREFIX); + } + const secondSeparator = trimmed.indexOf(":", firstSeparator + 1); + const sessionSegment = secondSeparator < 0 ? trimmed : trimmed.slice(secondSeparator + 1); + return sessionSegment.startsWith(DREAMING_NARRATIVE_RUN_PREFIX); +} + +function normalizeComparablePath(pathname: string): string { + const resolved = path.resolve(pathname); + return process.platform === "win32" ? resolved.toLowerCase() : resolved; +} + +export function normalizeSessionTranscriptPathForComparison(pathname: string): string { + return normalizeComparablePath(pathname); +} + +function resolveSessionStoreTranscriptPath( + sessionsDir: string, + entry: { sessionFile?: unknown; sessionId?: unknown } | undefined, +): string | null { + if (typeof entry?.sessionFile === "string" && entry.sessionFile.trim().length > 0) { + const sessionFile = entry.sessionFile.trim(); + const resolved = path.isAbsolute(sessionFile) + ? sessionFile + : path.resolve(sessionsDir, sessionFile); + return normalizeComparablePath(resolved); + } + if (typeof entry?.sessionId === "string" && entry.sessionId.trim().length > 0) { + return normalizeComparablePath(path.join(sessionsDir, `${entry.sessionId.trim()}.jsonl`)); + } + return null; +} + +export function loadDreamingNarrativeTranscriptPathSetForSessionsDir( + sessionsDir: string, +): ReadonlySet { + const storePath = path.join(sessionsDir, "sessions.json"); + const store = loadSessionStore(storePath); + const dreamingTranscriptPaths = new Set(); + for (const [sessionKey, entry] of Object.entries(store)) { + if (!isDreamingNarrativeSessionStoreKey(sessionKey)) { + continue; + } + const transcriptPath = resolveSessionStoreTranscriptPath(sessionsDir, entry); + if (transcriptPath) { + dreamingTranscriptPaths.add(transcriptPath); + } + } + return dreamingTranscriptPaths; +} + +export function loadDreamingNarrativeTranscriptPathSetForAgent( + agentId: string, +): ReadonlySet { + return loadDreamingNarrativeTranscriptPathSetForSessionsDir( + resolveSessionTranscriptsDirForAgent(agentId), + ); +} + +function isDreamingNarrativeTranscriptFromSessionStore(absPath: string): boolean { + const sessionsDir = path.dirname(absPath); + const normalizedAbsPath = normalizeComparablePath(absPath); + const dreamingTranscriptPaths = loadDreamingNarrativeTranscriptPathSetForSessionsDir(sessionsDir); + return dreamingTranscriptPaths.has(normalizedAbsPath); +} + export async function listSessionFilesForAgent(agentId: string): Promise { const dir = resolveSessionTranscriptsDirForAgent(agentId); try { @@ -153,7 +232,10 @@ function parseSessionTimestampMs( return 0; } -export async function buildSessionEntry(absPath: string): Promise { +export async function buildSessionEntry( + absPath: string, + opts: BuildSessionEntryOptions = {}, +): Promise { try { const stat = await fs.stat(absPath); const raw = await fs.readFile(absPath, "utf-8"); @@ -161,7 +243,8 @@ export async function buildSessionEntry(absPath: string): Promise