From b2f71db7bb46eda5a45fe932b014046574f3b44a Mon Sep 17 00:00:00 2001 From: Firas Alswihry Date: Sun, 31 May 2026 13:16:33 +0300 Subject: [PATCH] feat(dreaming): add report-only shadow trial runner Adds a report-only memory-core dreaming shadow-trial runner that writes inspectable artifacts without mutating durable memory. The public helper now stores default reports under daily directories with opaque content-hash filenames, so multiple same-day trials coexist without leaking candidate text into paths. Verification: - OPENCLAW_VITEST_MAX_WORKERS=1 node scripts/run-vitest.mjs run --config test/vitest/vitest.extension-memory.config.ts extensions/memory-core/src/dreaming-shadow-trial.test.ts --reporter=verbose --maxWorkers=1 - git diff --check - pnpm exec oxfmt --check extensions/memory-core/src/dreaming-shadow-trial.ts extensions/memory-core/src/dreaming-shadow-trial.test.ts - pnpm tsgo:extensions - autoreview clean: no accepted/actionable findings - GitHub CI run 26709794635 passed - Real behavior proof run 26709798698 passed - Dependency Guard run 26709794113 passed Co-authored-by: Firas Alswihry --- docs/concepts/dreaming.md | 8 + extensions/memory-core/api.ts | 12 + .../src/dreaming-shadow-trial.test.ts | 158 ++++++++++++ .../memory-core/src/dreaming-shadow-trial.ts | 237 ++++++++++++++++++ 4 files changed, 415 insertions(+) create mode 100644 extensions/memory-core/src/dreaming-shadow-trial.test.ts create mode 100644 extensions/memory-core/src/dreaming-shadow-trial.ts diff --git a/docs/concepts/dreaming.md b/docs/concepts/dreaming.md index dcd2c263d64..cd340d8c2fb 100644 --- a/docs/concepts/dreaming.md +++ b/docs/concepts/dreaming.md @@ -119,6 +119,14 @@ stays separate from `MEMORY.md` and that the agent does not claim the candidate was promoted. It does not add production shadow-trial behavior or change the deep-phase promotion engine. +The `memory-core` shadow-trial runner keeps that same report-only contract for +code paths that need a stable artifact. It accepts the candidate, trial prompt, +baseline outcome, candidate outcome, verdict, reason, risk flags, and evidence +references, then writes a report with `promotion action: report-only`. Helpful +verdicts map to a `promote` recommendation, neutral verdicts map to `defer`, and +harmful verdicts map to `reject`; none of those recommendations writes to +`MEMORY.md` or applies deep-phase promotion. + ## Scheduling When enabled, `memory-core` auto-manages one cron job for a full dreaming sweep. Each sweep runs phases in order: light → REM → deep. diff --git a/extensions/memory-core/api.ts b/extensions/memory-core/api.ts index 8da2730bd06..102c7f5e3d0 100644 --- a/extensions/memory-core/api.ts +++ b/extensions/memory-core/api.ts @@ -13,3 +13,15 @@ export { previewGroundedRemMarkdown } from "./src/rem-evidence.js"; export { filterRecallEntriesWithinLookback } from "./src/dreaming-phases.js"; export { previewRemHarness } from "./src/rem-harness.js"; export type { PreviewRemHarnessOptions, PreviewRemHarnessResult } from "./src/rem-harness.js"; +export { + buildDreamingShadowTrialReport, + defaultDreamingShadowTrialReportPath, + resolveDreamingShadowTrialRecommendation, + writeDreamingShadowTrialReport, +} from "./src/dreaming-shadow-trial.js"; +export type { + DreamingShadowTrialInput, + DreamingShadowTrialRecommendation, + DreamingShadowTrialReport, + DreamingShadowTrialVerdict, +} from "./src/dreaming-shadow-trial.js"; diff --git a/extensions/memory-core/src/dreaming-shadow-trial.test.ts b/extensions/memory-core/src/dreaming-shadow-trial.test.ts new file mode 100644 index 00000000000..33886105fa0 --- /dev/null +++ b/extensions/memory-core/src/dreaming-shadow-trial.test.ts @@ -0,0 +1,158 @@ +import fs from "node:fs/promises"; +import path from "node:path"; +import { describe, expect, it } from "vitest"; +import { + buildDreamingShadowTrialReport, + defaultDreamingShadowTrialReportPath, + resolveDreamingShadowTrialRecommendation, + writeDreamingShadowTrialReport, +} from "./dreaming-shadow-trial.js"; +import { createMemoryCoreTestHarness } from "./test-helpers.js"; + +const { createTempWorkspace } = createMemoryCoreTestHarness(); + +const baseInput = { + candidate: "The user prefers release notes with exact verification commands.", + trialPrompt: "Prepare a release readiness note.", + baselineOutcome: "Mentions tests passed without the exact command.", + candidateOutcome: "Includes the exact verification command and remaining risk.", + reason: "The candidate improves the release reply without exposing private data.", + riskFlags: ["no secret exposure", "no outdated preference conflict"], + evidenceRefs: ["memory/2026-05-18.md#L30-L49"], +}; + +describe("dreaming shadow trial runner", () => { + it("maps verdicts to report-only recommendations", () => { + expect(resolveDreamingShadowTrialRecommendation("helpful")).toBe("promote"); + expect(resolveDreamingShadowTrialRecommendation("neutral")).toBe("defer"); + expect(resolveDreamingShadowTrialRecommendation("harmful")).toBe("reject"); + }); + + it("builds the stable shadow-trial report contract", () => { + const report = buildDreamingShadowTrialReport({ + ...baseInput, + verdict: "helpful", + nowMs: Date.parse("2026-05-18T18:00:00.000Z"), + }); + + expect(report.recommendation).toBe("promote"); + expect(report.promotionAction).toBe("report-only"); + expect(report.markdown).toContain("candidate: The user prefers release notes"); + expect(report.markdown).toContain("baseline outcome: Mentions tests passed"); + expect(report.markdown).toContain("candidate outcome: Includes the exact verification command"); + expect(report.markdown).toContain("verdict: helpful"); + expect(report.markdown).toContain("recommendation: promote"); + expect(report.markdown).toContain("risk flags:"); + expect(report.markdown).toContain("- no secret exposure"); + expect(report.markdown).toContain("evidence refs:"); + expect(report.markdown).toContain("promotion action: report-only"); + expect(report.markdown).not.toContain("promoted to MEMORY.md"); + }); + + it("writes only the shadow-trial report and leaves MEMORY.md unchanged", async () => { + const workspaceDir = await createTempWorkspace("openclaw-shadow-trial-"); + const memoryPath = path.join(workspaceDir, "MEMORY.md"); + await fs.writeFile(memoryPath, "# Memory\n\nExisting durable memory.\n", "utf-8"); + + const report = await writeDreamingShadowTrialReport({ + ...baseInput, + verdict: "neutral", + workspaceDir, + nowMs: Date.parse("2026-05-18T18:00:00.000Z"), + }); + + expect(report.recommendation).toBe("defer"); + expect(path.dirname(report.reportPath!)).toBe( + path.join(workspaceDir, "memory", "dreaming", "shadow-trials", "2026-05-18"), + ); + expect(path.basename(report.reportPath!)).toMatch(/^[a-f0-9]{12}\.md$/); + await expect(fs.readFile(memoryPath, "utf-8")).resolves.toBe( + "# Memory\n\nExisting durable memory.\n", + ); + expect(report.reportPath).toBeTruthy(); + await expect(fs.readFile(report.reportPath!, "utf-8")).resolves.toContain( + "promotion action: report-only", + ); + }); + + it("uses the configured dreaming timezone for the default report day", async () => { + const workspaceDir = await createTempWorkspace("openclaw-shadow-trial-timezone-"); + + const report = await writeDreamingShadowTrialReport({ + ...baseInput, + verdict: "helpful", + workspaceDir, + nowMs: Date.parse("2026-05-18T21:30:00.000Z"), + timezone: "Asia/Riyadh", + }); + + expect(path.dirname(report.reportPath!)).toBe( + path.join(workspaceDir, "memory", "dreaming", "shadow-trials", "2026-05-19"), + ); + expect(path.basename(report.reportPath!)).toMatch(/^[a-f0-9]{12}\.md$/); + await expect(fs.readFile(report.reportPath!, "utf-8")).resolves.toContain( + "recommendation: promote", + ); + }); + + it("keeps distinct same-day trials in separate default report files", async () => { + const workspaceDir = await createTempWorkspace("openclaw-shadow-trial-collisions-"); + const nowMs = Date.parse("2026-05-18T18:00:00.000Z"); + + const first = await writeDreamingShadowTrialReport({ + ...baseInput, + verdict: "helpful", + workspaceDir, + nowMs, + }); + const second = await writeDreamingShadowTrialReport({ + ...baseInput, + candidate: "The user prefers terse release notes with exact verification commands.", + verdict: "helpful", + workspaceDir, + nowMs, + }); + + expect(first.reportPath).not.toBe(second.reportPath); + expect(path.dirname(first.reportPath!)).toBe(path.dirname(second.reportPath!)); + await expect(fs.readFile(first.reportPath!, "utf-8")).resolves.toContain( + "candidate: The user prefers release notes", + ); + await expect(fs.readFile(second.reportPath!, "utf-8")).resolves.toContain( + "candidate: The user prefers terse release notes", + ); + }); + + it("keeps risky candidates reject-only without promoting durable memory", async () => { + const workspaceDir = await createTempWorkspace("openclaw-shadow-trial-risk-"); + const reportPath = defaultDreamingShadowTrialReportPath({ + ...baseInput, + candidate: "The user always wants private tokens pasted into status reports.", + candidateOutcome: "Includes a private token in the release reply.", + verdict: "harmful", + reason: "The candidate creates secret exposure risk.", + riskFlags: ["secret exposure"], + workspaceDir, + nowMs: Date.parse("2026-05-19T01:00:00.000Z"), + }); + + const report = await writeDreamingShadowTrialReport({ + ...baseInput, + candidate: "The user always wants private tokens pasted into status reports.", + candidateOutcome: "Includes a private token in the release reply.", + verdict: "harmful", + reason: "The candidate creates secret exposure risk.", + riskFlags: ["secret exposure"], + workspaceDir, + reportPath, + }); + + expect(report.recommendation).toBe("reject"); + expect(report.markdown).toContain("verdict: harmful"); + expect(report.markdown).toContain("recommendation: reject"); + expect(report.markdown).toContain("promotion action: report-only"); + await expect(fs.readFile(path.join(workspaceDir, "MEMORY.md"), "utf-8")).rejects.toMatchObject({ + code: "ENOENT", + }); + }); +}); diff --git a/extensions/memory-core/src/dreaming-shadow-trial.ts b/extensions/memory-core/src/dreaming-shadow-trial.ts new file mode 100644 index 00000000000..56ffae69230 --- /dev/null +++ b/extensions/memory-core/src/dreaming-shadow-trial.ts @@ -0,0 +1,237 @@ +import crypto from "node:crypto"; +import fs from "node:fs/promises"; +import path from "node:path"; +import { formatMemoryDreamingDay } from "openclaw/plugin-sdk/memory-core-host-status"; + +export type DreamingShadowTrialVerdict = "helpful" | "neutral" | "harmful"; +export type DreamingShadowTrialRecommendation = "promote" | "defer" | "reject"; + +export type DreamingShadowTrialInput = { + candidate: string; + trialPrompt: string; + baselineOutcome: string; + candidateOutcome: string; + verdict: DreamingShadowTrialVerdict; + reason: string; + riskFlags?: string[]; + evidenceRefs?: string[]; + workspaceDir?: string; + reportPath?: string; + nowMs?: number; + timezone?: string; +}; + +export type DreamingShadowTrialReport = { + candidate: string; + trialPrompt: string; + baselineOutcome: string; + candidateOutcome: string; + verdict: DreamingShadowTrialVerdict; + recommendation: DreamingShadowTrialRecommendation; + reason: string; + riskFlags: string[]; + evidenceRefs: string[]; + promotionAction: "report-only"; + reportPath?: string; + markdown: string; +}; + +function normalizeRequiredText(value: string, label: string): string { + const normalized = value.trim().replace(/\s+/g, " "); + if (!normalized) { + throw new Error(`dreaming shadow trial requires ${label}`); + } + return normalized; +} + +function normalizeList(values: string[] | undefined, fallback: string): string[] { + const normalized = (values ?? []).map((value) => value.trim()).filter(Boolean); + return normalized.length > 0 ? normalized : [fallback]; +} + +export function resolveDreamingShadowTrialRecommendation( + verdict: DreamingShadowTrialVerdict, +): DreamingShadowTrialRecommendation { + if (verdict === "helpful") { + return "promote"; + } + if (verdict === "harmful") { + return "reject"; + } + return "defer"; +} + +function formatList(values: string[]): string { + return values.map((value) => `- ${value}`).join("\n"); +} + +function resolveReportContentHash(params: { + candidate: string; + trialPrompt: string; + baselineOutcome: string; + candidateOutcome: string; + verdict: DreamingShadowTrialVerdict; + reason: string; + riskFlags: string[]; + evidenceRefs: string[]; +}): string { + const seed = JSON.stringify([ + params.candidate, + params.trialPrompt, + params.baselineOutcome, + params.candidateOutcome, + params.verdict, + params.reason, + params.riskFlags, + params.evidenceRefs, + ]); + return crypto.createHash("sha256").update(seed).digest("hex").slice(0, 12); +} + +export function defaultDreamingShadowTrialReportPath(params: { + workspaceDir: string; + candidate: string; + trialPrompt: string; + baselineOutcome: string; + candidateOutcome: string; + verdict: DreamingShadowTrialVerdict; + reason: string; + riskFlags?: string[]; + evidenceRefs?: string[]; + nowMs?: number; + timezone?: string; +}): string { + const nowMs = Number.isFinite(params.nowMs) ? (params.nowMs as number) : Date.now(); + const day = formatMemoryDreamingDay(nowMs, params.timezone); + const contentHash = resolveReportContentHash({ + candidate: normalizeRequiredText(params.candidate, "candidate"), + trialPrompt: normalizeRequiredText(params.trialPrompt, "trialPrompt"), + baselineOutcome: normalizeRequiredText(params.baselineOutcome, "baselineOutcome"), + candidateOutcome: normalizeRequiredText(params.candidateOutcome, "candidateOutcome"), + verdict: params.verdict, + reason: normalizeRequiredText(params.reason, "reason"), + riskFlags: normalizeList(params.riskFlags, "none recorded"), + evidenceRefs: normalizeList(params.evidenceRefs, "none supplied"), + }); + return path.join( + params.workspaceDir, + "memory", + "dreaming", + "shadow-trials", + day, + `${contentHash}.md`, + ); +} + +function resolveReportPath(params: { + workspaceDir?: string; + candidate: string; + trialPrompt: string; + baselineOutcome: string; + candidateOutcome: string; + verdict: DreamingShadowTrialVerdict; + reason: string; + riskFlags: string[]; + evidenceRefs: string[]; + reportPath?: string; + nowMs?: number; + timezone?: string; +}): string | undefined { + if (params.reportPath) { + if (path.isAbsolute(params.reportPath)) { + return params.reportPath; + } + if (!params.workspaceDir) { + throw new Error("dreaming shadow trial relative reportPath requires workspaceDir"); + } + return path.join(params.workspaceDir, params.reportPath); + } + if (!params.workspaceDir) { + return undefined; + } + return defaultDreamingShadowTrialReportPath({ + workspaceDir: params.workspaceDir, + candidate: params.candidate, + trialPrompt: params.trialPrompt, + baselineOutcome: params.baselineOutcome, + candidateOutcome: params.candidateOutcome, + verdict: params.verdict, + reason: params.reason, + riskFlags: params.riskFlags, + evidenceRefs: params.evidenceRefs, + nowMs: params.nowMs, + timezone: params.timezone, + }); +} + +export function buildDreamingShadowTrialReport( + input: DreamingShadowTrialInput, +): DreamingShadowTrialReport { + const candidate = normalizeRequiredText(input.candidate, "candidate"); + const trialPrompt = normalizeRequiredText(input.trialPrompt, "trialPrompt"); + const baselineOutcome = normalizeRequiredText(input.baselineOutcome, "baselineOutcome"); + const candidateOutcome = normalizeRequiredText(input.candidateOutcome, "candidateOutcome"); + const reason = normalizeRequiredText(input.reason, "reason"); + const riskFlags = normalizeList(input.riskFlags, "none recorded"); + const evidenceRefs = normalizeList(input.evidenceRefs, "none supplied"); + const recommendation = resolveDreamingShadowTrialRecommendation(input.verdict); + const reportPath = resolveReportPath({ + workspaceDir: input.workspaceDir, + candidate, + trialPrompt, + baselineOutcome, + candidateOutcome, + verdict: input.verdict, + reason, + riskFlags, + evidenceRefs, + reportPath: input.reportPath, + nowMs: input.nowMs, + timezone: input.timezone, + }); + + const markdown = [ + "# Dreaming Shadow Trial Report", + "", + `candidate: ${candidate}`, + `trial prompt: ${trialPrompt}`, + `baseline outcome: ${baselineOutcome}`, + `candidate outcome: ${candidateOutcome}`, + `verdict: ${input.verdict}`, + `recommendation: ${recommendation}`, + `reason: ${reason}`, + "risk flags:", + formatList(riskFlags), + "evidence refs:", + formatList(evidenceRefs), + "promotion action: report-only", + "", + ].join("\n"); + + return { + candidate, + trialPrompt, + baselineOutcome, + candidateOutcome, + verdict: input.verdict, + recommendation, + reason, + riskFlags, + evidenceRefs, + promotionAction: "report-only", + ...(reportPath ? { reportPath } : {}), + markdown, + }; +} + +export async function writeDreamingShadowTrialReport( + input: DreamingShadowTrialInput & { workspaceDir: string }, +): Promise { + const report = buildDreamingShadowTrialReport(input); + if (!report.reportPath) { + throw new Error("dreaming shadow trial report path could not be resolved"); + } + await fs.mkdir(path.dirname(report.reportPath), { recursive: true }); + await fs.writeFile(report.reportPath, report.markdown, "utf-8"); + return report; +}