fix(memory): keep archive transcript visibility safe

Keep reset/deleted session archives searchable while preserving visibility filtering, and keep internal cron-run archives opaque when live ownership metadata is gone.\n\nRefs #56131.\nThanks @buyitsydney.
This commit is contained in:
buyitsydney
2026-05-03 16:17:45 +08:00
committed by GitHub
parent d583662fd9
commit 2ffdb5d248
7 changed files with 361 additions and 25 deletions

View File

@@ -59,6 +59,7 @@ Docs: https://docs.openclaw.ai
- Cron: preserve manual `cron.run` IDs in `cron.runs` history so manual run acknowledgements can be correlated with finished run records. Fixes #76276.
- CLI/devices: request `operator.admin` for `openclaw devices approve <requestId>` only when the exact pending device request would mint or inherit admin-scoped operator access, while keeping lower-scope approvals on the pairing scope.
- Memory/embedding: broaden the embedding reindex retry classifier to include transient socket-layer errors (`fetch failed`, `ECONNRESET`, `socket hang up`, `UND_ERR_*`, `closed`) so memory reindex survives provider network hiccups instead of aborting mid-run. Related #56815, #44166. (#76311) Thanks @buyitsydney.
- Memory/sessions: keep rotated and deleted session transcripts (`.jsonl.reset.<iso>` / `.jsonl.deleted.<iso>`) searchable end-to-end by indexing their real content in `buildSessionEntry` instead of short-circuiting to empty entries, and by mapping archive hit paths back to their live transcript stem during `memory_search` visibility filtering so hits are no longer dropped at the guard. `.jsonl.bak.<iso>` backups and compaction checkpoints remain opaque. Refs #56131. Thanks @buyitsydney.
- Memory/search: keep sqlite-vec optional in packaged installs and point missing-extension recovery at the valid `agents.defaults.memorySearch.store.vector.extensionPath` setting. Thanks @willemsej and @vincentkoc.
- Gateway: keep directly requested plugin tools invokable under restrictive tool profiles while preserving explicit deny lists and the HTTP safety deny list, preventing catalog/invoke mismatches that surface as "Tool not available". Thanks @BunsDev.
- Gateway/update: allow beta binaries to refresh gateway services when the config was last written by the matching stable release version, avoiding false newer-config downgrade blocks during beta channel updates.

View File

@@ -11,6 +11,7 @@ const crossAgentStore = {
sessionFile: "/tmp/sessions/w1.jsonl",
},
};
let combinedSessionStore: typeof crossAgentStore | Record<string, never> = crossAgentStore;
vi.mock("openclaw/plugin-sdk/session-transcript-hit", async (importOriginal) => {
const actual =
@@ -19,7 +20,7 @@ vi.mock("openclaw/plugin-sdk/session-transcript-hit", async (importOriginal) =>
...actual,
loadCombinedSessionStoreForGateway: vi.fn(() => ({
storePath: "(test)",
store: crossAgentStore,
store: combinedSessionStore,
})),
};
});
@@ -27,6 +28,7 @@ vi.mock("openclaw/plugin-sdk/session-transcript-hit", async (importOriginal) =>
describe("filterMemorySearchHitsBySessionVisibility", () => {
afterEach(() => {
vi.mocked(sessionTranscriptHit.loadCombinedSessionStoreForGateway).mockClear();
combinedSessionStore = crossAgentStore;
});
it("drops sessions-sourced hits when requester key is missing (fail closed)", async () => {
@@ -148,4 +150,57 @@ describe("filterMemorySearchHitsBySessionVisibility", () => {
});
expect(filtered).toEqual([]);
});
it("keeps same-agent deleted archive hits using owner metadata when the live store entry is gone", async () => {
combinedSessionStore = {};
const hit: MemorySearchResult = {
path: "sessions/main/deleted-stem.jsonl.deleted.2026-02-16T22-27-33.000Z",
source: "sessions",
score: 1,
snippet: "x",
startLine: 1,
endLine: 2,
};
const cfg = asOpenClawConfig({
tools: {
sessions: { visibility: "agent" },
},
});
const filtered = await filterMemorySearchHitsBySessionVisibility({
cfg,
requesterSessionKey: "agent:main:main",
sandboxed: false,
hits: [hit],
});
expect(filtered).toEqual([hit]);
});
it("still denies cross-agent deleted archive hits resolved from owner metadata when a2a is disabled", async () => {
combinedSessionStore = {};
const hit: MemorySearchResult = {
path: "sessions/peer/deleted-stem.jsonl.deleted.2026-02-16T22-27-33.000Z",
source: "sessions",
score: 1,
snippet: "x",
startLine: 1,
endLine: 2,
};
const cfg = asOpenClawConfig({
tools: {
sessions: { visibility: "all" },
agentToAgent: { enabled: false },
},
});
const filtered = await filterMemorySearchHitsBySessionVisibility({
cfg,
requesterSessionKey: "agent:main:main",
sandboxed: false,
hits: [hit],
});
expect(filtered).toEqual([]);
});
});

View File

@@ -1,7 +1,7 @@
import type { OpenClawConfig } from "openclaw/plugin-sdk/memory-core-host-runtime-core";
import type { MemorySearchResult } from "openclaw/plugin-sdk/memory-core-host-runtime-files";
import {
extractTranscriptStemFromSessionsMemoryHit,
extractTranscriptIdentityFromSessionsMemoryHit,
loadCombinedSessionStoreForGateway,
resolveTranscriptStemToSessionKeys,
} from "openclaw/plugin-sdk/session-transcript-hit";
@@ -42,13 +42,16 @@ export async function filterMemorySearchHitsBySessionVisibility(params: {
if (!params.requesterSessionKey || !guard) {
continue;
}
const stem = extractTranscriptStemFromSessionsMemoryHit(hit.path);
if (!stem) {
const identity = extractTranscriptIdentityFromSessionsMemoryHit(hit.path);
if (!identity) {
continue;
}
const keys = resolveTranscriptStemToSessionKeys({
store: combinedSessionStore,
stem,
stem: identity.stem,
...(identity.archived && identity.ownerAgentId
? { archivedOwnerAgentId: identity.ownerAgentId }
: {}),
});
if (keys.length === 0) {
continue;

View File

@@ -2,7 +2,11 @@ import fsSync from "node:fs";
import os from "node:os";
import path from "node:path";
import { afterAll, afterEach, beforeAll, beforeEach, describe, expect, it } from "vitest";
import { buildSessionEntry, listSessionFilesForAgent } from "./session-files.js";
import {
buildSessionEntry,
listSessionFilesForAgent,
sessionPathForFile,
} from "./session-files.js";
let fixtureRoot: string;
let tmpDir: string;
@@ -61,6 +65,28 @@ describe("listSessionFilesForAgent", () => {
});
});
describe("sessionPathForFile", () => {
it("includes the owning agent id when the transcript lives under an agent sessions dir", () => {
const absPath = path.join(
tmpDir,
"agents",
"main",
"sessions",
"deleted-session.jsonl.deleted.2026-02-16T22-27-33.000Z",
);
expect(sessionPathForFile(absPath)).toBe(
"sessions/main/deleted-session.jsonl.deleted.2026-02-16T22-27-33.000Z",
);
});
it("keeps the legacy basename-only path when the agent owner cannot be derived", () => {
expect(sessionPathForFile(path.join(tmpDir, "loose-session.jsonl"))).toBe(
"sessions/loose-session.jsonl",
);
});
});
describe("buildSessionEntry", () => {
it("returns lineMap tracking original JSONL line numbers", async () => {
// Simulate a real session JSONL file with metadata records interspersed
@@ -116,30 +142,92 @@ describe("buildSessionEntry", () => {
expect(entry!.lineMap).toEqual([]);
});
it("skips deleted and checkpoint transcripts for dreaming ingestion", async () => {
it("indexes usage-counted reset/deleted archives but still skips bak and checkpoint artifacts", async () => {
const resetPath = path.join(tmpDir, "ordinary.jsonl.reset.2026-02-16T22-26-33.000Z");
const deletedPath = path.join(tmpDir, "ordinary.jsonl.deleted.2026-02-16T22-27-33.000Z");
const bakPath = path.join(tmpDir, "ordinary.jsonl.bak.2026-02-16T22-28-33.000Z");
const checkpointPath = path.join(
tmpDir,
"ordinary.checkpoint.11111111-1111-4111-8111-111111111111.jsonl",
);
const content = JSON.stringify({
type: "message",
message: { role: "user", content: "This should never reach the dreaming corpus." },
message: { role: "user", content: "Archived hello" },
});
fsSync.writeFileSync(resetPath, content);
fsSync.writeFileSync(deletedPath, content);
fsSync.writeFileSync(bakPath, content);
fsSync.writeFileSync(checkpointPath, content);
const resetEntry = await buildSessionEntry(resetPath);
const deletedEntry = await buildSessionEntry(deletedPath);
const bakEntry = await buildSessionEntry(bakPath);
const checkpointEntry = await buildSessionEntry(checkpointPath);
expect(deletedEntry).not.toBeNull();
expect(deletedEntry?.content).toBe("");
expect(deletedEntry?.lineMap).toEqual([]);
// Usage-counted archives (reset, deleted) must surface real content so
// post-reset memory_search can recover prior session history.
expect(resetEntry?.content).toContain("User: Archived hello");
expect(resetEntry?.lineMap).toEqual([1]);
expect(deletedEntry?.content).toContain("User: Archived hello");
expect(deletedEntry?.lineMap).toEqual([1]);
// .bak and compaction checkpoints remain opaque pre-archive / snapshot
// artifacts and stay empty so they do not get double-indexed.
expect(bakEntry).not.toBeNull();
expect(bakEntry?.content).toBe("");
expect(bakEntry?.lineMap).toEqual([]);
expect(checkpointEntry).not.toBeNull();
expect(checkpointEntry?.content).toBe("");
expect(checkpointEntry?.lineMap).toEqual([]);
});
it("keeps cron-run deleted archives opaque when the live session store entry is gone", async () => {
const archivePath = path.join(tmpDir, "cron-run.jsonl.deleted.2026-02-16T22-27-33.000Z");
const jsonlLines = [
JSON.stringify({
type: "message",
message: {
role: "user",
content: "[cron:job-1 Codex Sessions Sync] Run internal sync.",
},
}),
JSON.stringify({
type: "message",
message: { role: "assistant", content: "Internal cron output that must stay out." },
}),
];
fsSync.writeFileSync(archivePath, jsonlLines.join("\n"));
const entry = await buildSessionEntry(archivePath);
expect(entry).not.toBeNull();
expect(entry?.content).toBe("");
expect(entry?.lineMap).toEqual([]);
expect(entry?.generatedByCronRun).toBe(true);
});
it("keeps cron-run reset archives opaque when session metadata preserves the cron key", async () => {
const archivePath = path.join(tmpDir, "cron-run.jsonl.reset.2026-02-16T22-26-33.000Z");
const jsonlLines = [
JSON.stringify({
type: "session-meta",
data: { sessionKey: "agent:main:cron:job-1:run:run-1" },
}),
JSON.stringify({
type: "message",
message: { role: "assistant", content: "Internal cron output that must stay out." },
}),
];
fsSync.writeFileSync(archivePath, jsonlLines.join("\n"));
const entry = await buildSessionEntry(archivePath);
expect(entry).not.toBeNull();
expect(entry?.content).toBe("");
expect(entry?.lineMap).toEqual([]);
expect(entry?.generatedByCronRun).toBe(true);
});
it("skips blank lines and invalid JSON without breaking lineMap", async () => {
const jsonlLines = [
"",

View File

@@ -14,6 +14,7 @@ import {
isSessionArchiveArtifactName,
isSilentReplyPayloadText,
isUsageCountedSessionTranscriptFileName,
parseUsageCountedSessionIdFromFileName,
resolveSessionTranscriptsDirForAgent,
stripInboundMetadata,
stripInternalRuntimeContext,
@@ -62,9 +63,32 @@ type SessionTranscriptStoreEntry = {
};
function shouldSkipTranscriptFileForDreaming(absPath: string): boolean {
const fileName = path.basename(absPath);
// Compaction checkpoints are always skipped: they are derived snapshots of an
// active session and would double-index the same content.
if (isCompactionCheckpointTranscriptFileName(fileName)) {
return true;
}
// Legacy backups and `.jsonl.bak.<iso>` rotations are opaque pre-archive
// copies, not a user-facing session artifact; skip them too.
if (
isSessionArchiveArtifactName(fileName) &&
!isUsageCountedSessionTranscriptFileName(fileName)
) {
return true;
}
// Usage-counted archives (`.jsonl.reset.<iso>` / `.jsonl.deleted.<iso>`) are
// the rotated-but-retained copies of real sessions and must stay indexed so
// `memory_search` can surface hits on post-reset / post-delete history.
return false;
}
function isUsageCountedSessionArchiveTranscriptPath(absPath: string): boolean {
const fileName = path.basename(absPath);
return (
isSessionArchiveArtifactName(fileName) || isCompactionCheckpointTranscriptFileName(fileName)
isUsageCountedSessionTranscriptFileName(fileName) &&
isSessionArchiveArtifactName(fileName) &&
parseUsageCountedSessionIdFromFileName(fileName) !== null
);
}
@@ -136,6 +160,30 @@ function isDreamingNarrativeSessionStoreKey(sessionKey: string): boolean {
return sessionSegment.startsWith(DREAMING_NARRATIVE_RUN_PREFIX);
}
function hasCronRunSessionKey(value: unknown): boolean {
return typeof value === "string" && isCronRunSessionKey(value);
}
function isCronRunGeneratedRecord(record: unknown): boolean {
if (!record || typeof record !== "object" || Array.isArray(record)) {
return false;
}
const candidate = record as {
sessionKey?: unknown;
data?: unknown;
};
if (hasCronRunSessionKey(candidate.sessionKey)) {
return true;
}
if (!candidate.data || typeof candidate.data !== "object" || Array.isArray(candidate.data)) {
return false;
}
const nested = candidate.data as {
sessionKey?: unknown;
};
return hasCronRunSessionKey(nested.sessionKey);
}
function normalizeComparablePath(pathname: string): string {
const resolved = path.resolve(pathname);
return process.platform === "win32" ? resolved.toLowerCase() : resolved;
@@ -228,11 +276,20 @@ function classifySessionTranscriptFromSessionStore(absPath: string): {
} {
const sessionsDir = path.dirname(absPath);
const normalizedAbsPath = normalizeComparablePath(absPath);
const primarySessionId = parseUsageCountedSessionIdFromFileName(path.basename(absPath));
const normalizedPrimaryPath =
primarySessionId && isSessionArchiveArtifactName(path.basename(absPath))
? normalizeComparablePath(path.join(sessionsDir, `${primarySessionId}.jsonl`))
: null;
const classification = loadSessionTranscriptClassificationForSessionsDir(sessionsDir);
const hasClassifiedPath = (paths: ReadonlySet<string>) =>
paths.has(normalizedAbsPath) ||
(normalizedPrimaryPath !== null && paths.has(normalizedPrimaryPath));
return {
generatedByDreamingNarrative:
classification.dreamingNarrativeTranscriptPaths.has(normalizedAbsPath),
generatedByCronRun: classification.cronRunTranscriptPaths.has(normalizedAbsPath),
generatedByDreamingNarrative: hasClassifiedPath(
classification.dreamingNarrativeTranscriptPaths,
),
generatedByCronRun: hasClassifiedPath(classification.cronRunTranscriptPaths),
};
}
@@ -250,8 +307,20 @@ export async function listSessionFilesForAgent(agentId: string): Promise<string[
}
}
function extractAgentIdFromSessionPath(absPath: string): string | null {
const parts = path.normalize(path.resolve(absPath)).split(path.sep).filter(Boolean);
const sessionsIndex = parts.lastIndexOf("sessions");
if (sessionsIndex < 2 || parts[sessionsIndex - 2] !== "agents") {
return null;
}
return parts[sessionsIndex - 1] || null;
}
export function sessionPathForFile(absPath: string): string {
return path.join("sessions", path.basename(absPath)).replace(/\\/g, "/");
const agentId = extractAgentIdFromSessionPath(absPath);
return path
.join("sessions", ...(agentId ? [agentId] : []), path.basename(absPath))
.replace(/\\/g, "/");
}
async function logSessionFileReadFailure(absPath: string, err: unknown): Promise<void> {
@@ -481,8 +550,10 @@ export async function buildSessionEntry(
opts.generatedByDreamingNarrative ??
sessionStoreClassification?.generatedByDreamingNarrative ??
false;
const generatedByCronRun =
let generatedByCronRun =
opts.generatedByCronRun ?? sessionStoreClassification?.generatedByCronRun ?? false;
const allowArchiveContentCronClassification =
isUsageCountedSessionArchiveTranscriptPath(absPath);
for (let jsonlIdx = 0; jsonlIdx < lines.length; jsonlIdx++) {
const line = lines[jsonlIdx];
if (!line.trim()) {
@@ -497,6 +568,16 @@ export async function buildSessionEntry(
if (!generatedByDreamingNarrative && isDreamingNarrativeGeneratedRecord(record)) {
generatedByDreamingNarrative = true;
}
if (
!generatedByCronRun &&
allowArchiveContentCronClassification &&
isCronRunGeneratedRecord(record)
) {
generatedByCronRun = true;
collected.length = 0;
lineMap.length = 0;
messageTimestampsMs.length = 0;
}
if (
!record ||
typeof record !== "object" ||
@@ -520,6 +601,16 @@ export async function buildSessionEntry(
if (rawText === null) {
continue;
}
if (
!generatedByCronRun &&
allowArchiveContentCronClassification &&
isGeneratedCronPromptMessage(normalizeSessionText(rawText), message.role)
) {
generatedByCronRun = true;
collected.length = 0;
lineMap.length = 0;
messageTimestampsMs.length = 0;
}
const text = sanitizeSessionText(rawText, message.role);
if (!text) {
// Assistant-side machinery (silent replies, system wrappers) is already

View File

@@ -1,6 +1,7 @@
import { describe, expect, it } from "vitest";
import type { SessionEntry } from "../config/sessions/types.js";
import {
extractTranscriptIdentityFromSessionsMemoryHit,
extractTranscriptStemFromSessionsMemoryHit,
resolveTranscriptStemToSessionKeys,
} from "./session-transcript-hit.js";
@@ -19,6 +20,56 @@ describe("extractTranscriptStemFromSessionsMemoryHit", () => {
it("uses .md basename for QMD exports", () => {
expect(extractTranscriptStemFromSessionsMemoryHit("qmd/sessions/x/y/z.md")).toBe("z");
});
it("strips .jsonl.reset.<iso> archive suffix so rotated transcripts resolve to the live stem", () => {
expect(
extractTranscriptStemFromSessionsMemoryHit(
"sessions/abc-uuid.jsonl.reset.2026-02-16T22-26-33.000Z",
),
).toBe("abc-uuid");
});
it("strips .jsonl.deleted.<iso> archive suffix the same way", () => {
expect(
extractTranscriptStemFromSessionsMemoryHit(
"sessions/def-uuid.jsonl.deleted.2026-02-16T22-27-33.000Z",
),
).toBe("def-uuid");
});
it("handles archive suffix on bare basenames without the sessions/ prefix", () => {
expect(
extractTranscriptStemFromSessionsMemoryHit("ghi-thread.jsonl.reset.2026-02-16T22-28-33.000Z"),
).toBe("ghi-thread");
});
it("does not mistake arbitrary suffixes containing .jsonl. for archives", () => {
// Not a real archive pattern: suffix after .jsonl. must be `reset` or `deleted`.
expect(
extractTranscriptStemFromSessionsMemoryHit("sessions/weird.jsonl.backup.2026-01-01.zst"),
).toBeNull();
});
});
describe("extractTranscriptIdentityFromSessionsMemoryHit", () => {
it("extracts owner metadata from agent-scoped session archive paths", () => {
expect(
extractTranscriptIdentityFromSessionsMemoryHit(
"sessions/main/deleted-uuid.jsonl.deleted.2026-02-16T22-27-33.000Z",
),
).toEqual({
stem: "deleted-uuid",
ownerAgentId: "main",
archived: true,
});
});
it("does not invent owner metadata for legacy basename-only paths", () => {
expect(extractTranscriptIdentityFromSessionsMemoryHit("sessions/abc-uuid.jsonl")).toEqual({
stem: "abc-uuid",
archived: false,
});
});
});
describe("resolveTranscriptStemToSessionKeys", () => {
@@ -40,4 +91,14 @@ describe("resolveTranscriptStemToSessionKeys", () => {
const keys = resolveTranscriptStemToSessionKeys({ store, stem: "stem-a" }).toSorted();
expect(keys).toEqual(["agent:main:s1", "agent:peer:s2"]);
});
it("falls back to archived owner metadata when deleted archives are gone from the live store", () => {
const keys = resolveTranscriptStemToSessionKeys({
store: {},
stem: "deleted-stem",
archivedOwnerAgentId: "main",
});
expect(keys).toEqual(["agent:main:deleted-stem"]);
});
});

View File

@@ -1,27 +1,56 @@
import path from "node:path";
import { parseUsageCountedSessionIdFromFileName } from "../config/sessions/artifacts.js";
import type { SessionEntry } from "../config/sessions/types.js";
import { normalizeAgentId } from "../routing/session-key.js";
import { normalizeOptionalString } from "../shared/string-coerce.js";
export { loadCombinedSessionStoreForGateway } from "../config/sessions/combined-store-gateway.js";
export type SessionTranscriptHitIdentity = {
stem: string;
ownerAgentId?: string;
archived: boolean;
};
function parseSessionsPath(hitPath: string): { base: string; ownerAgentId?: string } {
const normalized = hitPath.replace(/\\/g, "/");
const fromSessionsRoot = normalized.startsWith("sessions/")
? normalized.slice("sessions/".length)
: normalized;
const parts = fromSessionsRoot.split("/").filter(Boolean);
const base = path.posix.basename(fromSessionsRoot);
const ownerAgentId =
normalized.startsWith("sessions/") && parts.length === 2
? normalizeAgentId(parts[0])
: undefined;
return { base, ownerAgentId };
}
/**
* Derive transcript stem `S` from a memory search hit path for `source === "sessions"`.
* Builtin index uses `sessions/<basename>.jsonl`; QMD exports use `<stem>.md`.
* Archived transcripts (`.jsonl.reset.<iso>` / `.jsonl.deleted.<iso>`) resolve
* to the same stem as the live `.jsonl` they were rotated from.
*/
export function extractTranscriptStemFromSessionsMemoryHit(hitPath: string): string | null {
const normalized = hitPath.replace(/\\/g, "/");
const trimmed = normalized.startsWith("sessions/")
? normalized.slice("sessions/".length)
: normalized;
const base = path.basename(trimmed);
return extractTranscriptIdentityFromSessionsMemoryHit(hitPath)?.stem ?? null;
}
export function extractTranscriptIdentityFromSessionsMemoryHit(
hitPath: string,
): SessionTranscriptHitIdentity | null {
const { base, ownerAgentId } = parseSessionsPath(hitPath);
const archivedStem = parseUsageCountedSessionIdFromFileName(base);
if (archivedStem && base !== `${archivedStem}.jsonl`) {
return { stem: archivedStem, ownerAgentId, archived: true };
}
if (base.endsWith(".jsonl")) {
const stem = base.slice(0, -".jsonl".length);
return stem || null;
return stem ? { stem, ownerAgentId, archived: false } : null;
}
if (base.endsWith(".md")) {
const stem = base.slice(0, -".md".length);
return stem || null;
return stem ? { stem, archived: false } : null;
}
return null;
}
@@ -34,6 +63,7 @@ export function extractTranscriptStemFromSessionsMemoryHit(hitPath: string): str
export function resolveTranscriptStemToSessionKeys(params: {
store: Record<string, SessionEntry>;
stem: string;
archivedOwnerAgentId?: string;
}): string[] {
const { store } = params;
const matches: string[] = [];
@@ -54,5 +84,12 @@ export function resolveTranscriptStemToSessionKeys(params: {
matches.push(sessionKey);
}
}
return [...new Set(matches)];
const deduped = [...new Set(matches)];
if (deduped.length > 0) {
return deduped;
}
const archivedOwnerAgentId = normalizeOptionalString(params.archivedOwnerAgentId);
return archivedOwnerAgentId
? [`agent:${normalizeAgentId(archivedOwnerAgentId)}:${params.stem}`]
: [];
}